2021-09-04 11:35:46 +00:00
|
|
|
package command
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
2022-09-10 05:57:38 +00:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
2021-11-28 06:09:23 +00:00
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2022-07-29 07:17:28 +00:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/replication/source"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
2021-09-16 05:47:17 +00:00
|
|
|
"google.golang.org/grpc"
|
2022-08-17 19:05:07 +00:00
|
|
|
"google.golang.org/protobuf/proto"
|
2021-09-04 11:35:46 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func followUpdatesAndUploadToRemote(option *RemoteSyncOptions, filerSource *source.FilerSource, mountedDir string) error {
|
|
|
|
|
|
|
|
// read filer remote storage mount mappings
|
2021-09-13 05:47:52 +00:00
|
|
|
_, _, remoteStorageMountLocation, remoteStorage, detectErr := filer.DetectMountInfo(option.grpcDialOption, pb.ServerAddress(*option.filerAddress), mountedDir)
|
2021-09-04 11:35:46 +00:00
|
|
|
if detectErr != nil {
|
|
|
|
return fmt.Errorf("read mount info: %v", detectErr)
|
|
|
|
}
|
|
|
|
|
2023-12-13 20:02:57 +00:00
|
|
|
eachEntryFunc, err := option.makeEventProcessor(remoteStorage, mountedDir, remoteStorageMountLocation, filerSource)
|
2021-09-04 11:35:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-01-12 18:57:18 +00:00
|
|
|
lastOffsetTs := collectLastSyncOffset(option, option.grpcDialOption, pb.ServerAddress(*option.filerAddress), mountedDir, *option.timeAgo)
|
|
|
|
processor := NewMetadataProcessor(eachEntryFunc, 128, lastOffsetTs.UnixNano())
|
2022-08-22 07:01:48 +00:00
|
|
|
|
|
|
|
var lastLogTsNs = time.Now().UnixNano()
|
|
|
|
processEventFnWithOffset := pb.AddOffsetFunc(func(resp *filer_pb.SubscribeMetadataResponse) error {
|
2022-10-01 09:33:47 +00:00
|
|
|
if resp.EventNotification.NewEntry != nil {
|
|
|
|
if *option.storageClass == "" {
|
|
|
|
if _, ok := resp.EventNotification.NewEntry.Extended[s3_constants.AmzStorageClass]; ok {
|
|
|
|
delete(resp.EventNotification.NewEntry.Extended, s3_constants.AmzStorageClass)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
resp.EventNotification.NewEntry.Extended[s3_constants.AmzStorageClass] = []byte(*option.storageClass)
|
|
|
|
}
|
2022-09-10 21:15:42 +00:00
|
|
|
}
|
2022-10-01 09:33:47 +00:00
|
|
|
|
2022-08-22 07:01:48 +00:00
|
|
|
processor.AddSyncJob(resp)
|
|
|
|
return nil
|
|
|
|
}, 3*time.Second, func(counter int64, lastTsNs int64) error {
|
2024-01-12 18:57:18 +00:00
|
|
|
offsetTsNs := processor.processedTsWatermark.Load()
|
|
|
|
if offsetTsNs == 0 {
|
2022-08-22 07:01:48 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// use processor.processedTsWatermark instead of the lastTsNs from the most recent job
|
|
|
|
now := time.Now().UnixNano()
|
2024-01-12 18:57:18 +00:00
|
|
|
glog.V(0).Infof("remote sync %s progressed to %v %0.2f/sec", *option.filerAddress, time.Unix(0, offsetTsNs), float64(counter)/(float64(now-lastLogTsNs)/1e9))
|
2022-08-22 07:01:48 +00:00
|
|
|
lastLogTsNs = now
|
2024-01-12 18:57:18 +00:00
|
|
|
return remote_storage.SetSyncOffset(option.grpcDialOption, pb.ServerAddress(*option.filerAddress), mountedDir, offsetTsNs)
|
2021-09-04 11:35:46 +00:00
|
|
|
})
|
|
|
|
|
2022-07-23 17:50:28 +00:00
|
|
|
option.clientEpoch++
|
2023-03-22 06:01:49 +00:00
|
|
|
|
|
|
|
metadataFollowOption := &pb.MetadataFollowOption{
|
|
|
|
ClientName: "filer.remote.sync",
|
|
|
|
ClientId: option.clientId,
|
|
|
|
ClientEpoch: option.clientEpoch,
|
|
|
|
SelfSignature: 0,
|
|
|
|
PathPrefix: mountedDir,
|
|
|
|
AdditionalPathPrefixes: []string{filer.DirectoryEtcRemote},
|
|
|
|
DirectoriesToWatch: nil,
|
|
|
|
StartTsNs: lastOffsetTs.UnixNano(),
|
|
|
|
StopTsNs: 0,
|
|
|
|
EventErrorType: pb.TrivialOnError,
|
|
|
|
}
|
|
|
|
|
|
|
|
return pb.FollowMetadata(pb.ServerAddress(*option.filerAddress), option.grpcDialOption, metadataFollowOption, processEventFnWithOffset)
|
2021-09-04 11:35:46 +00:00
|
|
|
}
|
|
|
|
|
2023-12-22 19:33:50 +00:00
|
|
|
func (option *RemoteSyncOptions) makeEventProcessor(remoteStorage *remote_pb.RemoteConf, mountedDir string, remoteStorageMountLocation *remote_pb.RemoteStorageLocation, filerSource *source.FilerSource) (pb.ProcessMetadataFunc, error) {
|
2021-09-04 11:35:46 +00:00
|
|
|
client, err := remote_storage.GetRemoteStorage(remoteStorage)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
handleEtcRemoteChanges := func(resp *filer_pb.SubscribeMetadataResponse) error {
|
|
|
|
message := resp.EventNotification
|
|
|
|
if message.NewEntry == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if message.NewEntry.Name == filer.REMOTE_STORAGE_MOUNT_FILE {
|
|
|
|
mappings, readErr := filer.UnmarshalRemoteStorageMappings(message.NewEntry.Content)
|
|
|
|
if readErr != nil {
|
|
|
|
return fmt.Errorf("unmarshal mappings: %v", readErr)
|
|
|
|
}
|
|
|
|
if remoteLoc, found := mappings.Mappings[mountedDir]; found {
|
|
|
|
if remoteStorageMountLocation.Bucket != remoteLoc.Bucket || remoteStorageMountLocation.Path != remoteLoc.Path {
|
|
|
|
glog.Fatalf("Unexpected mount changes %+v => %+v", remoteStorageMountLocation, remoteLoc)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
glog.V(0).Infof("unmounted %s exiting ...", mountedDir)
|
|
|
|
os.Exit(0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if message.NewEntry.Name == remoteStorage.Name+filer.REMOTE_STORAGE_CONF_SUFFIX {
|
|
|
|
conf := &remote_pb.RemoteConf{}
|
|
|
|
if err := proto.Unmarshal(message.NewEntry.Content, conf); err != nil {
|
|
|
|
return fmt.Errorf("unmarshal %s/%s: %v", filer.DirectoryEtcRemote, message.NewEntry.Name, err)
|
|
|
|
}
|
|
|
|
remoteStorage = conf
|
|
|
|
if newClient, err := remote_storage.GetRemoteStorage(remoteStorage); err == nil {
|
|
|
|
client = newClient
|
|
|
|
} else {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
eachEntryFunc := func(resp *filer_pb.SubscribeMetadataResponse) error {
|
|
|
|
message := resp.EventNotification
|
|
|
|
if strings.HasPrefix(resp.Directory, filer.DirectoryEtcRemote) {
|
|
|
|
return handleEtcRemoteChanges(resp)
|
|
|
|
}
|
|
|
|
|
2022-02-25 09:17:26 +00:00
|
|
|
if filer_pb.IsEmpty(resp) {
|
2021-09-04 11:35:46 +00:00
|
|
|
return nil
|
|
|
|
}
|
2022-02-25 09:17:26 +00:00
|
|
|
if filer_pb.IsCreate(resp) {
|
2022-09-12 04:53:15 +00:00
|
|
|
if isMultipartUploadFile(message.NewParentPath, message.NewEntry.Name) {
|
2022-09-10 05:57:38 +00:00
|
|
|
return nil
|
|
|
|
}
|
2021-09-04 11:35:46 +00:00
|
|
|
if !filer.HasData(message.NewEntry) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
glog.V(2).Infof("create: %+v", resp)
|
|
|
|
if !shouldSendToRemote(message.NewEntry) {
|
|
|
|
glog.V(2).Infof("skipping creating: %+v", resp)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
dest := toRemoteStorageLocation(util.FullPath(mountedDir), util.NewFullPath(message.NewParentPath, message.NewEntry.Name), remoteStorageMountLocation)
|
|
|
|
if message.NewEntry.IsDirectory {
|
|
|
|
glog.V(0).Infof("mkdir %s", remote_storage.FormatLocation(dest))
|
|
|
|
return client.WriteDirectory(dest, message.NewEntry)
|
|
|
|
}
|
|
|
|
glog.V(0).Infof("create %s", remote_storage.FormatLocation(dest))
|
2021-11-23 05:48:00 +00:00
|
|
|
remoteEntry, writeErr := retriedWriteFile(client, filerSource, message.NewEntry, dest)
|
2021-09-04 11:35:46 +00:00
|
|
|
if writeErr != nil {
|
|
|
|
return writeErr
|
|
|
|
}
|
2023-12-13 20:02:57 +00:00
|
|
|
return updateLocalEntry(option, message.NewParentPath, message.NewEntry, remoteEntry)
|
2021-09-04 11:35:46 +00:00
|
|
|
}
|
2022-02-25 09:17:26 +00:00
|
|
|
if filer_pb.IsDelete(resp) {
|
2021-09-04 11:35:46 +00:00
|
|
|
glog.V(2).Infof("delete: %+v", resp)
|
|
|
|
dest := toRemoteStorageLocation(util.FullPath(mountedDir), util.NewFullPath(resp.Directory, message.OldEntry.Name), remoteStorageMountLocation)
|
|
|
|
if message.OldEntry.IsDirectory {
|
|
|
|
glog.V(0).Infof("rmdir %s", remote_storage.FormatLocation(dest))
|
|
|
|
return client.RemoveDirectory(dest)
|
|
|
|
}
|
|
|
|
glog.V(0).Infof("delete %s", remote_storage.FormatLocation(dest))
|
|
|
|
return client.DeleteFile(dest)
|
|
|
|
}
|
|
|
|
if message.OldEntry != nil && message.NewEntry != nil {
|
|
|
|
oldDest := toRemoteStorageLocation(util.FullPath(mountedDir), util.NewFullPath(resp.Directory, message.OldEntry.Name), remoteStorageMountLocation)
|
|
|
|
dest := toRemoteStorageLocation(util.FullPath(mountedDir), util.NewFullPath(message.NewParentPath, message.NewEntry.Name), remoteStorageMountLocation)
|
|
|
|
if !shouldSendToRemote(message.NewEntry) {
|
|
|
|
glog.V(2).Infof("skipping updating: %+v", resp)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if message.NewEntry.IsDirectory {
|
|
|
|
return client.WriteDirectory(dest, message.NewEntry)
|
|
|
|
}
|
|
|
|
if resp.Directory == message.NewParentPath && message.OldEntry.Name == message.NewEntry.Name {
|
|
|
|
if filer.IsSameData(message.OldEntry, message.NewEntry) {
|
|
|
|
glog.V(2).Infof("update meta: %+v", resp)
|
|
|
|
return client.UpdateFileMetadata(dest, message.OldEntry, message.NewEntry)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
glog.V(2).Infof("update: %+v", resp)
|
|
|
|
glog.V(0).Infof("delete %s", remote_storage.FormatLocation(oldDest))
|
|
|
|
if err := client.DeleteFile(oldDest); err != nil {
|
2022-09-12 04:53:15 +00:00
|
|
|
if isMultipartUploadFile(resp.Directory, message.OldEntry.Name) {
|
|
|
|
return nil
|
2022-09-10 05:57:38 +00:00
|
|
|
}
|
2021-09-04 11:35:46 +00:00
|
|
|
}
|
2021-11-23 05:48:00 +00:00
|
|
|
remoteEntry, writeErr := retriedWriteFile(client, filerSource, message.NewEntry, dest)
|
2021-09-04 11:35:46 +00:00
|
|
|
if writeErr != nil {
|
|
|
|
return writeErr
|
|
|
|
}
|
2023-12-13 20:02:57 +00:00
|
|
|
return updateLocalEntry(option, message.NewParentPath, message.NewEntry, remoteEntry)
|
2021-09-04 11:35:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return eachEntryFunc, nil
|
|
|
|
}
|
|
|
|
|
2021-11-23 05:48:00 +00:00
|
|
|
func retriedWriteFile(client remote_storage.RemoteStorageClient, filerSource *source.FilerSource, newEntry *filer_pb.Entry, dest *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
|
|
|
var writeErr error
|
|
|
|
err = util.Retry("writeFile", func() error {
|
|
|
|
reader := filer.NewFileReader(filerSource, newEntry)
|
|
|
|
glog.V(0).Infof("create %s", remote_storage.FormatLocation(dest))
|
|
|
|
remoteEntry, writeErr = client.WriteFile(dest, newEntry, reader)
|
|
|
|
if writeErr != nil {
|
|
|
|
return writeErr
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
2021-11-29 06:06:17 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("write to %s: %v", dest, err)
|
|
|
|
}
|
2021-11-23 05:48:00 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-09-16 05:47:17 +00:00
|
|
|
func collectLastSyncOffset(filerClient filer_pb.FilerClient, grpcDialOption grpc.DialOption, filerAddress pb.ServerAddress, mountedDir string, timeAgo time.Duration) time.Time {
|
2021-09-04 11:35:46 +00:00
|
|
|
// 1. specified by timeAgo
|
|
|
|
// 2. last offset timestamp for this directory
|
|
|
|
// 3. directory creation time
|
|
|
|
var lastOffsetTs time.Time
|
2021-09-16 05:47:17 +00:00
|
|
|
if timeAgo == 0 {
|
|
|
|
mountedDirEntry, err := filer_pb.GetEntry(filerClient, util.FullPath(mountedDir))
|
2021-09-04 11:35:46 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.V(0).Infof("get mounted directory %s: %v", mountedDir, err)
|
|
|
|
return time.Now()
|
|
|
|
}
|
|
|
|
|
2021-09-16 05:47:17 +00:00
|
|
|
lastOffsetTsNs, err := remote_storage.GetSyncOffset(grpcDialOption, filerAddress, mountedDir)
|
2021-09-04 11:35:46 +00:00
|
|
|
if mountedDirEntry != nil {
|
|
|
|
if err == nil && mountedDirEntry.Attributes.Crtime < lastOffsetTsNs/1000000 {
|
|
|
|
lastOffsetTs = time.Unix(0, lastOffsetTsNs)
|
|
|
|
glog.V(0).Infof("resume from %v", lastOffsetTs)
|
|
|
|
} else {
|
|
|
|
lastOffsetTs = time.Unix(mountedDirEntry.Attributes.Crtime, 0)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
lastOffsetTs = time.Now()
|
|
|
|
}
|
|
|
|
} else {
|
2021-09-16 05:47:17 +00:00
|
|
|
lastOffsetTs = time.Now().Add(-timeAgo)
|
2021-09-04 11:35:46 +00:00
|
|
|
}
|
|
|
|
return lastOffsetTs
|
|
|
|
}
|
|
|
|
|
|
|
|
func toRemoteStorageLocation(mountDir, sourcePath util.FullPath, remoteMountLocation *remote_pb.RemoteStorageLocation) *remote_pb.RemoteStorageLocation {
|
|
|
|
source := string(sourcePath[len(mountDir):])
|
|
|
|
dest := util.FullPath(remoteMountLocation.Path).Child(source)
|
|
|
|
return &remote_pb.RemoteStorageLocation{
|
|
|
|
Name: remoteMountLocation.Name,
|
|
|
|
Bucket: remoteMountLocation.Bucket,
|
|
|
|
Path: string(dest),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func shouldSendToRemote(entry *filer_pb.Entry) bool {
|
|
|
|
if entry.RemoteEntry == nil {
|
|
|
|
return true
|
|
|
|
}
|
2021-09-05 01:46:28 +00:00
|
|
|
if entry.RemoteEntry.RemoteMtime < entry.Attributes.Mtime {
|
2021-09-04 11:35:46 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func updateLocalEntry(filerClient filer_pb.FilerClient, dir string, entry *filer_pb.Entry, remoteEntry *filer_pb.RemoteEntry) error {
|
2021-09-04 20:46:22 +00:00
|
|
|
remoteEntry.LastLocalSyncTsNs = time.Now().UnixNano()
|
2021-09-04 11:35:46 +00:00
|
|
|
entry.RemoteEntry = remoteEntry
|
2021-12-26 08:15:03 +00:00
|
|
|
return filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
2021-09-04 11:35:46 +00:00
|
|
|
_, err := client.UpdateEntry(context.Background(), &filer_pb.UpdateEntryRequest{
|
|
|
|
Directory: dir,
|
|
|
|
Entry: entry,
|
|
|
|
})
|
|
|
|
return err
|
|
|
|
})
|
|
|
|
}
|
2022-09-12 04:53:15 +00:00
|
|
|
|
|
|
|
func isMultipartUploadFile(dir string, name string) bool {
|
|
|
|
return strings.HasPrefix(dir, "/buckets/") &&
|
|
|
|
strings.Contains(dir, "/"+s3_constants.MultipartUploadsFolder+"/") &&
|
|
|
|
strings.HasSuffix(name, ".part")
|
|
|
|
}
|