2021-08-09 21:35:18 +00:00
|
|
|
package shell
|
|
|
|
|
|
|
|
import (
|
|
|
|
"flag"
|
|
|
|
"fmt"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/filer"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
|
2021-08-26 22:18:34 +00:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/pb/remote_pb"
|
2021-08-09 21:35:18 +00:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/util"
|
|
|
|
"io"
|
2021-08-26 23:16:26 +00:00
|
|
|
"sync"
|
2021-08-09 21:35:18 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
Commands = append(Commands, &commandRemoteCache{})
|
|
|
|
}
|
|
|
|
|
|
|
|
type commandRemoteCache struct {
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandRemoteCache) Name() string {
|
|
|
|
return "remote.cache"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandRemoteCache) Help() string {
|
|
|
|
return `cache the file content for mounted directories or files
|
|
|
|
|
2021-08-10 09:48:41 +00:00
|
|
|
# assume a remote storage is configured to name "cloud1"
|
|
|
|
remote.configure -name=cloud1 -type=s3 -access_key=xxx -secret_key=yyy
|
2021-08-09 21:35:18 +00:00
|
|
|
# mount and pull one bucket
|
2021-08-14 22:55:53 +00:00
|
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket
|
2021-08-09 21:35:18 +00:00
|
|
|
|
|
|
|
# after mount, run one of these command to cache the content of the files
|
2021-08-14 22:55:53 +00:00
|
|
|
remote.cache -dir=/xxx
|
|
|
|
remote.cache -dir=/xxx/some/sub/dir
|
|
|
|
remote.cache -dir=/xxx/some/sub/dir -include=*.pdf
|
2021-08-21 09:17:10 +00:00
|
|
|
remote.cache -dir=/xxx/some/sub/dir -exclude=*.txt
|
|
|
|
remote.cache -maxSize=1024000 # cache files smaller than 100K
|
|
|
|
remote.cache -maxAge=3600 # cache files less than 1 hour old
|
2021-08-14 22:11:52 +00:00
|
|
|
|
|
|
|
This is designed to run regularly. So you can add it to some cronjob.
|
|
|
|
If a file is already synchronized with the remote copy, the file will be skipped to avoid unnecessary copy.
|
|
|
|
|
2021-08-21 09:17:10 +00:00
|
|
|
The actual data copying goes through volume severs in parallel.
|
2021-08-09 21:35:18 +00:00
|
|
|
|
|
|
|
`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandRemoteCache) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
|
|
|
|
|
|
remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
|
|
|
2021-09-05 21:47:06 +00:00
|
|
|
dir := remoteMountCommand.String("dir", "", "a mounted directory or one of its sub folders in filer")
|
2021-08-27 00:05:56 +00:00
|
|
|
concurrency := remoteMountCommand.Int("concurrent", 32, "concurrent file downloading")
|
2021-08-14 22:11:52 +00:00
|
|
|
fileFiler := newFileFilter(remoteMountCommand)
|
2021-08-09 21:35:18 +00:00
|
|
|
|
|
|
|
if err = remoteMountCommand.Parse(args); err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-09-05 21:47:06 +00:00
|
|
|
if *dir != "" {
|
|
|
|
if err := c.doCacheOneDirectory(commandEnv, writer, *dir, fileFiler, *concurrency); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
mappings, err := filer.ReadMountMappings(commandEnv.option.GrpcDialOption, commandEnv.option.FilerAddress)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for key, _ := range mappings.Mappings {
|
|
|
|
if err := c.doCacheOneDirectory(commandEnv, writer, key, fileFiler, *concurrency); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandRemoteCache) doCacheOneDirectory(commandEnv *CommandEnv, writer io.Writer, dir string, fileFiler *FileFilter, concurrency int) (error) {
|
|
|
|
mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, detectErr := detectMountInfo(commandEnv, writer, dir)
|
2021-09-01 09:45:42 +00:00
|
|
|
if detectErr != nil {
|
2021-08-15 19:09:54 +00:00
|
|
|
jsonPrintln(writer, mappings)
|
2021-08-15 08:53:46 +00:00
|
|
|
return detectErr
|
2021-08-09 21:35:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// pull content from remote
|
2021-09-05 21:47:06 +00:00
|
|
|
if err := c.cacheContentData(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(dir), fileFiler, remoteStorageConf, concurrency); err != nil {
|
|
|
|
return fmt.Errorf("cache content data on %s: %v", localMountedDir, err)
|
2021-08-09 21:35:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func recursivelyTraverseDirectory(filerClient filer_pb.FilerClient, dirPath util.FullPath, visitEntry func(dir util.FullPath, entry *filer_pb.Entry) bool) (err error) {
|
|
|
|
|
|
|
|
err = filer_pb.ReadDirAllEntries(filerClient, dirPath, "", func(entry *filer_pb.Entry, isLast bool) error {
|
|
|
|
if entry.IsDirectory {
|
|
|
|
if !visitEntry(dirPath, entry) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
subDir := dirPath.Child(entry.Name)
|
|
|
|
if err := recursivelyTraverseDirectory(filerClient, subDir, visitEntry); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-08-09 21:37:34 +00:00
|
|
|
} else {
|
2021-08-09 21:35:18 +00:00
|
|
|
if !visitEntry(dirPath, entry) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func shouldCacheToLocal(entry *filer_pb.Entry) bool {
|
|
|
|
if entry.IsDirectory {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if entry.RemoteEntry == nil {
|
|
|
|
return false
|
|
|
|
}
|
2021-08-15 04:46:34 +00:00
|
|
|
if entry.RemoteEntry.LastLocalSyncTsNs == 0 && entry.RemoteEntry.RemoteSize > 0 {
|
2021-08-09 21:35:18 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func mayHaveCachedToLocal(entry *filer_pb.Entry) bool {
|
|
|
|
if entry.IsDirectory {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if entry.RemoteEntry == nil {
|
2021-08-14 22:11:52 +00:00
|
|
|
return false // should not uncache an entry that is not in remote
|
2021-08-09 21:35:18 +00:00
|
|
|
}
|
2021-08-26 22:18:34 +00:00
|
|
|
if entry.RemoteEntry.LastLocalSyncTsNs > 0 {
|
2021-08-09 21:35:18 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-08-26 23:16:26 +00:00
|
|
|
func (c *commandRemoteCache) cacheContentData(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, dirToCache util.FullPath, fileFilter *FileFilter, remoteConf *remote_pb.RemoteConf, concurrency int) error {
|
2021-08-09 21:35:18 +00:00
|
|
|
|
2021-08-26 23:16:26 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
limitedConcurrentExecutor := util.NewLimitedConcurrentExecutor(concurrency)
|
|
|
|
var executionErr error
|
|
|
|
|
|
|
|
traverseErr := recursivelyTraverseDirectory(commandEnv, dirToCache, func(dir util.FullPath, entry *filer_pb.Entry) bool {
|
2021-08-09 21:35:18 +00:00
|
|
|
if !shouldCacheToLocal(entry) {
|
|
|
|
return true // true means recursive traversal should continue
|
|
|
|
}
|
|
|
|
|
2021-09-04 20:58:14 +00:00
|
|
|
if !fileFilter.matches(entry) {
|
2021-08-14 22:11:52 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2021-08-26 23:16:26 +00:00
|
|
|
wg.Add(1)
|
|
|
|
limitedConcurrentExecutor.Execute(func() {
|
|
|
|
defer wg.Done()
|
|
|
|
fmt.Fprintf(writer, "Cache %+v ...\n", dir.Child(entry.Name))
|
2021-08-09 21:35:18 +00:00
|
|
|
|
2021-08-26 23:16:26 +00:00
|
|
|
remoteLocation := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dir.Child(entry.Name))
|
2021-08-09 21:35:18 +00:00
|
|
|
|
2021-08-26 23:16:26 +00:00
|
|
|
if err := filer.DownloadToLocal(commandEnv, remoteConf, remoteLocation, dir, entry); err != nil {
|
|
|
|
fmt.Fprintf(writer, "DownloadToLocal %+v: %v\n", remoteLocation, err)
|
|
|
|
if executionErr == nil {
|
|
|
|
executionErr = fmt.Errorf("DownloadToLocal %+v: %v\n", remoteLocation, err)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
fmt.Fprintf(writer, "Cache %+v Done\n", dir.Child(entry.Name))
|
|
|
|
})
|
2021-08-09 21:35:18 +00:00
|
|
|
|
|
|
|
return true
|
|
|
|
})
|
2021-08-26 23:16:26 +00:00
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
if traverseErr != nil {
|
|
|
|
return traverseErr
|
|
|
|
}
|
|
|
|
if executionErr != nil {
|
|
|
|
return executionErr
|
|
|
|
}
|
|
|
|
return nil
|
2021-08-09 21:35:18 +00:00
|
|
|
}
|