From 708debca147c7bbe65ad000cd90e1c91c52f0b9d Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 14 Aug 2021 15:11:52 -0700 Subject: [PATCH] remote.cache and uncache: more flexible options to select files to cache or uncache --- weed/shell/command_remote_cache.go | 17 ++++-- weed/shell/command_remote_uncache.go | 80 ++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/weed/shell/command_remote_cache.go b/weed/shell/command_remote_cache.go index a964e994c..c74166611 100644 --- a/weed/shell/command_remote_cache.go +++ b/weed/shell/command_remote_cache.go @@ -32,6 +32,12 @@ func (c *commandRemoteCache) Help() string { # after mount, run one of these command to cache the content of the files remote.cache -dir=xxx remote.cache -dir=xxx/some/sub/dir + remote.cache -dir=xxx/some/sub/dir -include=*.pdf + + This is designed to run regularly. So you can add it to some cronjob. + If a file is already synchronized with the remote copy, the file will be skipped to avoid unnecessary copy. + + The actual data copying goes through volume severs. ` } @@ -41,6 +47,7 @@ func (c *commandRemoteCache) Do(args []string, commandEnv *CommandEnv, writer io remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) dir := remoteMountCommand.String("dir", "", "a directory in filer") + fileFiler := newFileFilter(remoteMountCommand) if err = remoteMountCommand.Parse(args); err != nil { return nil @@ -76,7 +83,7 @@ func (c *commandRemoteCache) Do(args []string, commandEnv *CommandEnv, writer io } // pull content from remote - if err = c.cacheContentData(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(*dir), remoteStorageConf); err != nil { + if err = c.cacheContentData(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(*dir), fileFiler, remoteStorageConf); err != nil { return fmt.Errorf("cache content data: %v", err) } @@ -122,7 +129,7 @@ func mayHaveCachedToLocal(entry *filer_pb.Entry) bool { return false } if entry.RemoteEntry == nil { - return false + return false // should not uncache an entry that is not in remote } if entry.RemoteEntry.LocalMtime > 0 && len(entry.Chunks) > 0 { return true @@ -130,13 +137,17 @@ func mayHaveCachedToLocal(entry *filer_pb.Entry) bool { return false } -func (c *commandRemoteCache) cacheContentData(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *filer_pb.RemoteStorageLocation, dirToCache util.FullPath, remoteConf *filer_pb.RemoteConf) error { +func (c *commandRemoteCache) cacheContentData(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *filer_pb.RemoteStorageLocation, dirToCache util.FullPath, fileFilter *FileFilter, remoteConf *filer_pb.RemoteConf) error { return recursivelyTraverseDirectory(commandEnv, dirToCache, func(dir util.FullPath, entry *filer_pb.Entry) bool { if !shouldCacheToLocal(entry) { return true // true means recursive traversal should continue } + if fileFilter.matches(entry) { + return true + } + println(dir, entry.Name) remoteLocation := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dir.Child(entry.Name)) diff --git a/weed/shell/command_remote_uncache.go b/weed/shell/command_remote_uncache.go index 64cc1472c..f94fe8bec 100644 --- a/weed/shell/command_remote_uncache.go +++ b/weed/shell/command_remote_uncache.go @@ -8,6 +8,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/util" "io" + "path/filepath" "strings" ) @@ -25,19 +26,25 @@ func (c *commandRemoteUncache) Name() string { func (c *commandRemoteUncache) Help() string { return `keep the metadata but remote cache the file content for mounted directories or files + This is designed to run regularly. So you can add it to some cronjob. + If a file is not synchronized with the remote copy, the file will be skipped to avoid loss of data. + remote.uncache -dir=xxx remote.uncache -dir=xxx/some/sub/dir + remote.uncache -dir=xxx/some/sub/dir -include=*.pdf + remote.uncache -dir=xxx/some/sub/dir -exclude=*.txt ` } func (c *commandRemoteUncache) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { - remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + remoteUnmountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) - dir := remoteMountCommand.String("dir", "", "a directory in filer") + dir := remoteUnmountCommand.String("dir", "", "a directory in filer") + fileFiler := newFileFilter(remoteUnmountCommand) - if err = remoteMountCommand.Parse(args); err != nil { + if err = remoteUnmountCommand.Parse(args); err != nil { return nil } @@ -64,19 +71,28 @@ func (c *commandRemoteUncache) Do(args []string, commandEnv *CommandEnv, writer } // pull content from remote - if err = c.uncacheContentData(commandEnv, writer, util.FullPath(*dir)); err != nil { + if err = c.uncacheContentData(commandEnv, writer, util.FullPath(*dir), fileFiler); err != nil { return fmt.Errorf("cache content data: %v", err) } return nil } -func (c *commandRemoteUncache) uncacheContentData(commandEnv *CommandEnv, writer io.Writer, dirToCache util.FullPath) error { +func (c *commandRemoteUncache) uncacheContentData(commandEnv *CommandEnv, writer io.Writer, dirToCache util.FullPath, fileFilter *FileFilter) error { return recursivelyTraverseDirectory(commandEnv, dirToCache, func(dir util.FullPath, entry *filer_pb.Entry) bool { if !mayHaveCachedToLocal(entry) { return true // true means recursive traversal should continue } + + if fileFilter.matches(entry) { + return true + } + + if entry.RemoteEntry.LocalMtime < entry.Attributes.Mtime { + return true // should not uncache an entry that is not synchronized with remote + } + entry.RemoteEntry.LocalMtime = 0 entry.Chunks = nil @@ -97,3 +113,57 @@ func (c *commandRemoteUncache) uncacheContentData(commandEnv *CommandEnv, writer return true }) } + +type FileFilter struct { + include *string + exclude *string + minSize *int64 + maxSize *int64 + minAge *int64 + maxAge *int64 +} + +func newFileFilter(remoteMountCommand *flag.FlagSet) (ff *FileFilter) { + ff = &FileFilter{} + ff.include = remoteMountCommand.String("include", "", "pattens of file names, e.g., *.pdf, *.html, ab?d.txt") + ff.exclude = remoteMountCommand.String("exclude", "", "pattens of file names, e.g., *.pdf, *.html, ab?d.txt") + ff.minSize = remoteMountCommand.Int64("minSize", -1, "minimum file size in bytes") + ff.maxSize = remoteMountCommand.Int64("maxSize", -1, "maximum file size in bytes") + ff.minAge = remoteMountCommand.Int64("minAge", -1, "minimum file age in seconds") + ff.maxAge = remoteMountCommand.Int64("maxAge", -1, "maximum file age in seconds") + return +} + +func (ff *FileFilter) matches(entry *filer_pb.Entry) bool { + if *ff.include != "" { + if ok, _ := filepath.Match(*ff.include, entry.Name); !ok { + return true + } + } + if *ff.exclude != "" { + if ok, _ := filepath.Match(*ff.exclude, entry.Name); ok { + return true + } + } + if *ff.minSize != -1 { + if int64(entry.Attributes.FileSize) < *ff.minSize { + return false + } + } + if *ff.maxSize != -1 { + if int64(entry.Attributes.FileSize) > *ff.maxSize { + return false + } + } + if *ff.minAge != -1 { + if entry.Attributes.Crtime < *ff.minAge { + return false + } + } + if *ff.maxAge != -1 { + if entry.Attributes.Crtime > *ff.maxAge { + return false + } + } + return false +}