data sink: add incremental mode

This commit is contained in:
Chris Lu 2021-02-28 16:19:03 -08:00
parent 4ff2c5c4c9
commit 678c54d705
9 changed files with 60 additions and 15 deletions

View file

@ -356,6 +356,9 @@ directory = "/buckets"
[sink.local] [sink.local]
enabled = false enabled = false
directory = "/data" directory = "/data"
# all replicated files are under modified time as yyyy-mm-dd directories
# so each date directory contains all new and updated files.
is_incremental = false
[sink.local_incremental] [sink.local_incremental]
# all replicated files are under modified time as yyyy-mm-dd directories # all replicated files are under modified time as yyyy-mm-dd directories
@ -373,6 +376,7 @@ directory = "/backup"
replication = "" replication = ""
collection = "" collection = ""
ttlSec = 0 ttlSec = 0
is_incremental = false
[sink.s3] [sink.s3]
# read credentials doc at https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/sessions.html # read credentials doc at https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/sessions.html
@ -384,6 +388,7 @@ region = "us-east-2"
bucket = "your_bucket_name" # an existing bucket bucket = "your_bucket_name" # an existing bucket
directory = "/" # destination directory directory = "/" # destination directory
endpoint = "" endpoint = ""
is_incremental = false
[sink.google_cloud_storage] [sink.google_cloud_storage]
# read credentials doc at https://cloud.google.com/docs/authentication/getting-started # read credentials doc at https://cloud.google.com/docs/authentication/getting-started
@ -391,6 +396,7 @@ enabled = false
google_application_credentials = "/path/to/x.json" # path to json credential file google_application_credentials = "/path/to/x.json" # path to json credential file
bucket = "your_bucket_seaweedfs" # an existing bucket bucket = "your_bucket_seaweedfs" # an existing bucket
directory = "/" # destination directory directory = "/" # destination directory
is_incremental = false
[sink.azure] [sink.azure]
# experimental, let me know if it works # experimental, let me know if it works
@ -399,6 +405,7 @@ account_name = ""
account_key = "" account_key = ""
container = "mycontainer" # an existing container container = "mycontainer" # an existing container
directory = "/" # destination directory directory = "/" # destination directory
is_incremental = false
[sink.backblaze] [sink.backblaze]
enabled = false enabled = false
@ -406,6 +413,7 @@ b2_account_id = ""
b2_master_application_key = "" b2_master_application_key = ""
bucket = "mybucket" # an existing bucket bucket = "mybucket" # an existing bucket
directory = "/" # destination directory directory = "/" # destination directory
is_incremental = false
` `

View file

@ -42,7 +42,7 @@ func (r *Replicator) Replicate(ctx context.Context, key string, message *filer_p
return nil return nil
} }
var dateKey string var dateKey string
if r.sink.GetName() == "local_incremental" { if r.sink.IsIncremental() {
var mTime int64 var mTime int64
if message.NewEntry != nil { if message.NewEntry != nil {
mTime = message.NewEntry.Attributes.Mtime mTime = message.NewEntry.Attributes.Mtime

View file

@ -22,6 +22,7 @@ type AzureSink struct {
container string container string
dir string dir string
filerSource *source.FilerSource filerSource *source.FilerSource
isIncremental bool
} }
func init() { func init() {
@ -36,7 +37,12 @@ func (g *AzureSink) GetSinkToDirectory() string {
return g.dir return g.dir
} }
func (g *AzureSink) IsIncremental() bool {
return g.isIncremental
}
func (g *AzureSink) Initialize(configuration util.Configuration, prefix string) error { func (g *AzureSink) Initialize(configuration util.Configuration, prefix string) error {
g.isIncremental = configuration.GetBool(prefix+"is_incremental")
return g.initialize( return g.initialize(
configuration.GetString(prefix+"account_name"), configuration.GetString(prefix+"account_name"),
configuration.GetString(prefix+"account_key"), configuration.GetString(prefix+"account_key"),

View file

@ -18,6 +18,7 @@ type B2Sink struct {
bucket string bucket string
dir string dir string
filerSource *source.FilerSource filerSource *source.FilerSource
isIncremental bool
} }
func init() { func init() {
@ -32,7 +33,12 @@ func (g *B2Sink) GetSinkToDirectory() string {
return g.dir return g.dir
} }
func (g *B2Sink) IsIncremental() bool {
return g.isIncremental
}
func (g *B2Sink) Initialize(configuration util.Configuration, prefix string) error { func (g *B2Sink) Initialize(configuration util.Configuration, prefix string) error {
g.isIncremental = configuration.GetBool(prefix+"is_incremental")
return g.initialize( return g.initialize(
configuration.GetString(prefix+"b2_account_id"), configuration.GetString(prefix+"b2_account_id"),
configuration.GetString(prefix+"b2_master_application_key"), configuration.GetString(prefix+"b2_master_application_key"),

View file

@ -30,6 +30,7 @@ type FilerSink struct {
grpcDialOption grpc.DialOption grpcDialOption grpc.DialOption
address string address string
writeChunkByFiler bool writeChunkByFiler bool
isIncremental bool
} }
func init() { func init() {
@ -44,7 +45,12 @@ func (fs *FilerSink) GetSinkToDirectory() string {
return fs.dir return fs.dir
} }
func (fs *FilerSink) IsIncremental() bool {
return fs.isIncremental
}
func (fs *FilerSink) Initialize(configuration util.Configuration, prefix string) error { func (fs *FilerSink) Initialize(configuration util.Configuration, prefix string) error {
fs.isIncremental = configuration.GetBool(prefix+"is_incremental")
return fs.DoInitialize( return fs.DoInitialize(
"", "",
configuration.GetString(prefix+"grpcAddress"), configuration.GetString(prefix+"grpcAddress"),

View file

@ -22,6 +22,7 @@ type GcsSink struct {
bucket string bucket string
dir string dir string
filerSource *source.FilerSource filerSource *source.FilerSource
isIncremental bool
} }
func init() { func init() {
@ -36,7 +37,12 @@ func (g *GcsSink) GetSinkToDirectory() string {
return g.dir return g.dir
} }
func (g *GcsSink) IsIncremental() bool {
return g.isIncremental
}
func (g *GcsSink) Initialize(configuration util.Configuration, prefix string) error { func (g *GcsSink) Initialize(configuration util.Configuration, prefix string) error {
g.isIncremental = configuration.GetBool(prefix+"is_incremental")
return g.initialize( return g.initialize(
configuration.GetString(prefix+"google_application_credentials"), configuration.GetString(prefix+"google_application_credentials"),
configuration.GetString(prefix+"bucket"), configuration.GetString(prefix+"bucket"),

View file

@ -50,6 +50,10 @@ func (localsink *LocalSink) GetSinkToDirectory() string {
return localsink.Dir return localsink.Dir
} }
func (localsink *LocalSink) IsIncremental() bool {
return true
}
func (localsink *LocalSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool, signatures []int32) error { func (localsink *LocalSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool, signatures []int32) error {
if localsink.isMultiPartEntry(key) { if localsink.isMultiPartEntry(key) {
return nil return nil

View file

@ -14,6 +14,7 @@ type ReplicationSink interface {
UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool, signatures []int32) (foundExistingEntry bool, err error) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool, signatures []int32) (foundExistingEntry bool, err error)
GetSinkToDirectory() string GetSinkToDirectory() string
SetSourceFiler(s *source.FilerSource) SetSourceFiler(s *source.FilerSource)
IsIncremental() bool
} }
var ( var (

View file

@ -27,6 +27,7 @@ type S3Sink struct {
dir string dir string
endpoint string endpoint string
filerSource *source.FilerSource filerSource *source.FilerSource
isIncremental bool
} }
func init() { func init() {
@ -41,11 +42,17 @@ func (s3sink *S3Sink) GetSinkToDirectory() string {
return s3sink.dir return s3sink.dir
} }
func (s3sink *S3Sink) IsIncremental() bool {
return s3sink.isIncremental
}
func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error { func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error {
glog.V(0).Infof("sink.s3.region: %v", configuration.GetString(prefix+"region")) glog.V(0).Infof("sink.s3.region: %v", configuration.GetString(prefix+"region"))
glog.V(0).Infof("sink.s3.bucket: %v", configuration.GetString(prefix+"bucket")) glog.V(0).Infof("sink.s3.bucket: %v", configuration.GetString(prefix+"bucket"))
glog.V(0).Infof("sink.s3.directory: %v", configuration.GetString(prefix+"directory")) glog.V(0).Infof("sink.s3.directory: %v", configuration.GetString(prefix+"directory"))
glog.V(0).Infof("sink.s3.endpoint: %v", configuration.GetString(prefix+"endpoint")) glog.V(0).Infof("sink.s3.endpoint: %v", configuration.GetString(prefix+"endpoint"))
glog.V(0).Infof("sink.s3.is_incremental: %v", configuration.GetString(prefix+"is_incremental"))
s3sink.isIncremental = configuration.GetBool(prefix + "is_incremental")
return s3sink.initialize( return s3sink.initialize(
configuration.GetString(prefix+"aws_access_key_id"), configuration.GetString(prefix+"aws_access_key_id"),
configuration.GetString(prefix+"aws_secret_access_key"), configuration.GetString(prefix+"aws_secret_access_key"),
@ -104,7 +111,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures
uploadId, err := s3sink.createMultipartUpload(key, entry) uploadId, err := s3sink.createMultipartUpload(key, entry)
if err != nil { if err != nil {
return err return fmt.Errorf("createMultipartUpload: %v", err)
} }
totalSize := filer.FileSize(entry) totalSize := filer.FileSize(entry)
@ -120,6 +127,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures
defer wg.Done() defer wg.Done()
if part, uploadErr := s3sink.uploadPart(key, uploadId, partId, chunk); uploadErr != nil { if part, uploadErr := s3sink.uploadPart(key, uploadId, partId, chunk); uploadErr != nil {
err = uploadErr err = uploadErr
glog.Errorf("uploadPart: %v", uploadErr)
} else { } else {
parts[index] = part parts[index] = part
} }
@ -129,7 +137,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures
if err != nil { if err != nil {
s3sink.abortMultipartUpload(key, uploadId) s3sink.abortMultipartUpload(key, uploadId)
return err return fmt.Errorf("uploadPart: %v", err)
} }
return s3sink.completeMultipartUpload(context.Background(), key, uploadId, parts) return s3sink.completeMultipartUpload(context.Background(), key, uploadId, parts)