From 678c54d705e5c29b2fdb580158a4455703b46a0d Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 28 Feb 2021 16:19:03 -0800 Subject: [PATCH] data sink: add incremental mode --- weed/command/scaffold.go | 8 ++++++ weed/replication/replicator.go | 2 +- weed/replication/sink/azuresink/azure_sink.go | 14 +++++++--- weed/replication/sink/b2sink/b2_sink.go | 6 ++++ weed/replication/sink/filersink/filer_sink.go | 6 ++++ weed/replication/sink/gcssink/gcs_sink.go | 6 ++++ weed/replication/sink/localsink/local_sink.go | 4 +++ weed/replication/sink/replication_sink.go | 1 + weed/replication/sink/s3sink/s3_sink.go | 28 ++++++++++++------- 9 files changed, 60 insertions(+), 15 deletions(-) diff --git a/weed/command/scaffold.go b/weed/command/scaffold.go index 993391a42..c2d53e4bd 100644 --- a/weed/command/scaffold.go +++ b/weed/command/scaffold.go @@ -356,6 +356,9 @@ directory = "/buckets" [sink.local] enabled = false directory = "/data" +# all replicated files are under modified time as yyyy-mm-dd directories +# so each date directory contains all new and updated files. +is_incremental = false [sink.local_incremental] # all replicated files are under modified time as yyyy-mm-dd directories @@ -373,6 +376,7 @@ directory = "/backup" replication = "" collection = "" ttlSec = 0 +is_incremental = false [sink.s3] # read credentials doc at https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/sessions.html @@ -384,6 +388,7 @@ region = "us-east-2" bucket = "your_bucket_name" # an existing bucket directory = "/" # destination directory endpoint = "" +is_incremental = false [sink.google_cloud_storage] # read credentials doc at https://cloud.google.com/docs/authentication/getting-started @@ -391,6 +396,7 @@ enabled = false google_application_credentials = "/path/to/x.json" # path to json credential file bucket = "your_bucket_seaweedfs" # an existing bucket directory = "/" # destination directory +is_incremental = false [sink.azure] # experimental, let me know if it works @@ -399,6 +405,7 @@ account_name = "" account_key = "" container = "mycontainer" # an existing container directory = "/" # destination directory +is_incremental = false [sink.backblaze] enabled = false @@ -406,6 +413,7 @@ b2_account_id = "" b2_master_application_key = "" bucket = "mybucket" # an existing bucket directory = "/" # destination directory +is_incremental = false ` diff --git a/weed/replication/replicator.go b/weed/replication/replicator.go index 7688029e6..d7e609c68 100644 --- a/weed/replication/replicator.go +++ b/weed/replication/replicator.go @@ -42,7 +42,7 @@ func (r *Replicator) Replicate(ctx context.Context, key string, message *filer_p return nil } var dateKey string - if r.sink.GetName() == "local_incremental" { + if r.sink.IsIncremental() { var mTime int64 if message.NewEntry != nil { mTime = message.NewEntry.Attributes.Mtime diff --git a/weed/replication/sink/azuresink/azure_sink.go b/weed/replication/sink/azuresink/azure_sink.go index df70be64b..865f1b25c 100644 --- a/weed/replication/sink/azuresink/azure_sink.go +++ b/weed/replication/sink/azuresink/azure_sink.go @@ -18,10 +18,11 @@ import ( ) type AzureSink struct { - containerURL azblob.ContainerURL - container string - dir string - filerSource *source.FilerSource + containerURL azblob.ContainerURL + container string + dir string + filerSource *source.FilerSource + isIncremental bool } func init() { @@ -36,7 +37,12 @@ func (g *AzureSink) GetSinkToDirectory() string { return g.dir } +func (g *AzureSink) IsIncremental() bool { + return g.isIncremental +} + func (g *AzureSink) Initialize(configuration util.Configuration, prefix string) error { + g.isIncremental = configuration.GetBool(prefix+"is_incremental") return g.initialize( configuration.GetString(prefix+"account_name"), configuration.GetString(prefix+"account_key"), diff --git a/weed/replication/sink/b2sink/b2_sink.go b/weed/replication/sink/b2sink/b2_sink.go index 24f0ecbbc..8738231d5 100644 --- a/weed/replication/sink/b2sink/b2_sink.go +++ b/weed/replication/sink/b2sink/b2_sink.go @@ -18,6 +18,7 @@ type B2Sink struct { bucket string dir string filerSource *source.FilerSource + isIncremental bool } func init() { @@ -32,7 +33,12 @@ func (g *B2Sink) GetSinkToDirectory() string { return g.dir } +func (g *B2Sink) IsIncremental() bool { + return g.isIncremental +} + func (g *B2Sink) Initialize(configuration util.Configuration, prefix string) error { + g.isIncremental = configuration.GetBool(prefix+"is_incremental") return g.initialize( configuration.GetString(prefix+"b2_account_id"), configuration.GetString(prefix+"b2_master_application_key"), diff --git a/weed/replication/sink/filersink/filer_sink.go b/weed/replication/sink/filersink/filer_sink.go index 509f75116..4165e87be 100644 --- a/weed/replication/sink/filersink/filer_sink.go +++ b/weed/replication/sink/filersink/filer_sink.go @@ -30,6 +30,7 @@ type FilerSink struct { grpcDialOption grpc.DialOption address string writeChunkByFiler bool + isIncremental bool } func init() { @@ -44,7 +45,12 @@ func (fs *FilerSink) GetSinkToDirectory() string { return fs.dir } +func (fs *FilerSink) IsIncremental() bool { + return fs.isIncremental +} + func (fs *FilerSink) Initialize(configuration util.Configuration, prefix string) error { + fs.isIncremental = configuration.GetBool(prefix+"is_incremental") return fs.DoInitialize( "", configuration.GetString(prefix+"grpcAddress"), diff --git a/weed/replication/sink/gcssink/gcs_sink.go b/weed/replication/sink/gcssink/gcs_sink.go index badabc32c..02f482862 100644 --- a/weed/replication/sink/gcssink/gcs_sink.go +++ b/weed/replication/sink/gcssink/gcs_sink.go @@ -22,6 +22,7 @@ type GcsSink struct { bucket string dir string filerSource *source.FilerSource + isIncremental bool } func init() { @@ -36,7 +37,12 @@ func (g *GcsSink) GetSinkToDirectory() string { return g.dir } +func (g *GcsSink) IsIncremental() bool { + return g.isIncremental +} + func (g *GcsSink) Initialize(configuration util.Configuration, prefix string) error { + g.isIncremental = configuration.GetBool(prefix+"is_incremental") return g.initialize( configuration.GetString(prefix+"google_application_credentials"), configuration.GetString(prefix+"bucket"), diff --git a/weed/replication/sink/localsink/local_sink.go b/weed/replication/sink/localsink/local_sink.go index 21c625c3f..c76647fcc 100644 --- a/weed/replication/sink/localsink/local_sink.go +++ b/weed/replication/sink/localsink/local_sink.go @@ -50,6 +50,10 @@ func (localsink *LocalSink) GetSinkToDirectory() string { return localsink.Dir } +func (localsink *LocalSink) IsIncremental() bool { + return true +} + func (localsink *LocalSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool, signatures []int32) error { if localsink.isMultiPartEntry(key) { return nil diff --git a/weed/replication/sink/replication_sink.go b/weed/replication/sink/replication_sink.go index cfc6e0a4d..4ffd09462 100644 --- a/weed/replication/sink/replication_sink.go +++ b/weed/replication/sink/replication_sink.go @@ -14,6 +14,7 @@ type ReplicationSink interface { UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool, signatures []int32) (foundExistingEntry bool, err error) GetSinkToDirectory() string SetSourceFiler(s *source.FilerSource) + IsIncremental() bool } var ( diff --git a/weed/replication/sink/s3sink/s3_sink.go b/weed/replication/sink/s3sink/s3_sink.go index 58432ee6b..ea219ce74 100644 --- a/weed/replication/sink/s3sink/s3_sink.go +++ b/weed/replication/sink/s3sink/s3_sink.go @@ -21,12 +21,13 @@ import ( ) type S3Sink struct { - conn s3iface.S3API - region string - bucket string - dir string - endpoint string - filerSource *source.FilerSource + conn s3iface.S3API + region string + bucket string + dir string + endpoint string + filerSource *source.FilerSource + isIncremental bool } func init() { @@ -41,11 +42,17 @@ func (s3sink *S3Sink) GetSinkToDirectory() string { return s3sink.dir } +func (s3sink *S3Sink) IsIncremental() bool { + return s3sink.isIncremental +} + func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error { glog.V(0).Infof("sink.s3.region: %v", configuration.GetString(prefix+"region")) glog.V(0).Infof("sink.s3.bucket: %v", configuration.GetString(prefix+"bucket")) glog.V(0).Infof("sink.s3.directory: %v", configuration.GetString(prefix+"directory")) glog.V(0).Infof("sink.s3.endpoint: %v", configuration.GetString(prefix+"endpoint")) + glog.V(0).Infof("sink.s3.is_incremental: %v", configuration.GetString(prefix+"is_incremental")) + s3sink.isIncremental = configuration.GetBool(prefix + "is_incremental") return s3sink.initialize( configuration.GetString(prefix+"aws_access_key_id"), configuration.GetString(prefix+"aws_secret_access_key"), @@ -67,8 +74,8 @@ func (s3sink *S3Sink) initialize(awsAccessKeyId, awsSecretAccessKey, region, buc s3sink.endpoint = endpoint config := &aws.Config{ - Region: aws.String(s3sink.region), - Endpoint: aws.String(s3sink.endpoint), + Region: aws.String(s3sink.region), + Endpoint: aws.String(s3sink.endpoint), } if awsAccessKeyId != "" && awsSecretAccessKey != "" { config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "") @@ -104,7 +111,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures uploadId, err := s3sink.createMultipartUpload(key, entry) if err != nil { - return err + return fmt.Errorf("createMultipartUpload: %v", err) } totalSize := filer.FileSize(entry) @@ -120,6 +127,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures defer wg.Done() if part, uploadErr := s3sink.uploadPart(key, uploadId, partId, chunk); uploadErr != nil { err = uploadErr + glog.Errorf("uploadPart: %v", uploadErr) } else { parts[index] = part } @@ -129,7 +137,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures if err != nil { s3sink.abortMultipartUpload(key, uploadId) - return err + return fmt.Errorf("uploadPart: %v", err) } return s3sink.completeMultipartUpload(context.Background(), key, uploadId, parts)