From 3edfe1d28f6cfa9f33fd591b1a4e40c9c592604a Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 14 Jul 2018 20:51:17 -0700 Subject: [PATCH] extend export command to show tombstone + change output format to CSV merging https://github.com/chrislusf/seaweedfs/pull/610 and add "-limit" option --- weed/command/export.go | 132 ++++++++++++++++++------------ weed/storage/needle.go | 4 + weed/storage/volume_read_write.go | 6 +- 3 files changed, 90 insertions(+), 52 deletions(-) diff --git a/weed/command/export.go b/weed/command/export.go index 72fcae9fb..b78b3f601 100644 --- a/weed/command/export.go +++ b/weed/command/export.go @@ -15,6 +15,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/storage/types" + "io" ) const ( @@ -50,9 +51,11 @@ func init() { } var ( - output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout") - format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}") - newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05") + output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout") + format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}") + newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05") + showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified") + limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified") tarOutputFile *tar.Writer tarHeader tar.Header @@ -63,6 +66,24 @@ var ( localLocation, _ = time.LoadLocation("Local") ) +func printNeedle(vid storage.VolumeId, n *storage.Needle, version storage.Version, deleted bool) { + key := storage.NewFileIdFromNeedle(vid, n).String() + size := n.DataSize + if version == storage.Version1 { + size = n.Size + } + fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n", + key, + n.Name, + size, + n.IsGzipped(), + n.Mime, + n.LastModifiedString(), + n.Ttl.String(), + deleted, + ) +} + func runExport(cmd *Command, args []string) bool { var err error @@ -104,7 +125,7 @@ func runExport(cmd *Command, args []string) bool { t := time.Now() tarHeader = tar.Header{Mode: 0644, ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(), - Typeflag: tar.TypeReg, + Typeflag: tar.TypeReg, AccessTime: t, ChangeTime: t} } @@ -126,6 +147,12 @@ func runExport(cmd *Command, args []string) bool { var version storage.Version + if tarOutputFile == nil { + fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n") + } + + var counter = 0 + err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, func(superBlock storage.SuperBlock) error { @@ -135,22 +162,39 @@ func runExport(cmd *Command, args []string) bool { nv, ok := needleMap.Get(n.Id) glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv) - if ok && nv.Size > 0 && int64(nv.Offset)*8 == offset { + if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset { if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", n.LastModified, newerThanUnix) return nil } - return walker(vid, n, version) + counter++ + if *limit > 0 && counter > *limit { + return io.EOF + } + if tarOutputFile != nil { + return writeFile(vid, n) + } else { + printNeedle(vid, n, version, false) + return nil + } } if !ok { + if *showDeleted && tarOutputFile == nil { + if n.DataSize > 0 { + printNeedle(vid, n, version, true) + } else { + n.Name = []byte("*tombstone") + printNeedle(vid, n, version, true) + } + } glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size) } else { glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size) } return nil }) - if err != nil { + if err != nil && err != io.EOF { glog.Fatalf("Export Volume File [ERROR] %s\n", err) } return true @@ -164,51 +208,37 @@ type nameParams struct { Ext string } -func walker(vid storage.VolumeId, n *storage.Needle, version storage.Version) (err error) { +func writeFile(vid storage.VolumeId, n *storage.Needle) (err error) { key := storage.NewFileIdFromNeedle(vid, n).String() - if tarOutputFile != nil { - fileNameTemplateBuffer.Reset() - if err = fileNameTemplate.Execute(fileNameTemplateBuffer, - nameParams{ - Name: string(n.Name), - Id: n.Id, - Mime: string(n.Mime), - Key: key, - Ext: filepath.Ext(string(n.Name)), - }, - ); err != nil { - return err - } - - fileName := fileNameTemplateBuffer.String() - - if n.IsGzipped() && path.Ext(fileName) != ".gz" { - fileName = fileName + ".gz" - } - - tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data)) - if n.HasLastModifiedDate() { - tarHeader.ModTime = time.Unix(int64(n.LastModified), 0) - } else { - tarHeader.ModTime = time.Unix(0, 0) - } - tarHeader.ChangeTime = tarHeader.ModTime - if err = tarOutputFile.WriteHeader(&tarHeader); err != nil { - return err - } - _, err = tarOutputFile.Write(n.Data) - } else { - size := n.DataSize - if version == storage.Version1 { - size = n.Size - } - fmt.Printf("key=%s Name=%s Size=%d gzip=%t mime=%s\n", - key, - n.Name, - size, - n.IsGzipped(), - n.Mime, - ) + fileNameTemplateBuffer.Reset() + if err = fileNameTemplate.Execute(fileNameTemplateBuffer, + nameParams{ + Name: string(n.Name), + Id: n.Id, + Mime: string(n.Mime), + Key: key, + Ext: filepath.Ext(string(n.Name)), + }, + ); err != nil { + return err } + + fileName := fileNameTemplateBuffer.String() + + if n.IsGzipped() && path.Ext(fileName) != ".gz" { + fileName = fileName + ".gz" + } + + tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data)) + if n.HasLastModifiedDate() { + tarHeader.ModTime = time.Unix(int64(n.LastModified), 0) + } else { + tarHeader.ModTime = time.Unix(0, 0) + } + tarHeader.ChangeTime = tarHeader.ModTime + if err = tarOutputFile.WriteHeader(&tarHeader); err != nil { + return err + } + _, err = tarOutputFile.Write(n.Data) return } diff --git a/weed/storage/needle.go b/weed/storage/needle.go index 39952a3f9..492e4646c 100644 --- a/weed/storage/needle.go +++ b/weed/storage/needle.go @@ -258,3 +258,7 @@ func ParseNeedleIdCookie(key_hash_string string) (NeedleId, Cookie, error) { } return needleId, cookie, nil } + +func (n *Needle) LastModifiedString() string { + return time.Unix(int64(n.LastModified), 0).Format("2006-01-02T15:04:05") +} diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go index dc43c488a..3e36bfb2e 100644 --- a/weed/storage/volume_read_write.go +++ b/weed/storage/volume_read_write.go @@ -217,7 +217,11 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, glog.V(4).Infof("Adjusting n.Size %d=>0 rest:%d=>%d %+v", oldSize, oldRest, rest, n) } } - if err = visitNeedle(n, offset); err != nil { + err = visitNeedle(n, offset) + if err == io.EOF { + return nil + } + if err != nil { glog.V(0).Infof("visit needle error: %v", err) } offset += NeedleEntrySize + rest