From df95ce0b6cc5e4dc91f7dcb0d2fe1ad2bbe47dd7 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 25 Mar 2019 23:01:53 -0700 Subject: [PATCH] weed backup: efficient delta backup fix https://github.com/chrislusf/seaweedfs/issues/399 --- weed/command/export.go | 2 +- weed/command/fix.go | 2 +- weed/server/volume_grpc_follow.go | 6 +++--- weed/storage/volume_follow.go | 28 ++++++++++++++++++++++++---- weed/storage/volume_read_write.go | 28 +++++++++++++++++----------- weed/storage/volume_vacuum.go | 2 +- 6 files changed, 47 insertions(+), 21 deletions(-) diff --git a/weed/command/export.go b/weed/command/export.go index 5c7e064ce..cdced5936 100644 --- a/weed/command/export.go +++ b/weed/command/export.go @@ -107,7 +107,7 @@ func (scanner *VolumeFileScanner4Export) VisitNeedle(n *storage.Needle, offset i nv, ok := needleMap.Get(n.Id) glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv) - if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset { + if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && int64(nv.Offset)*types.NeedlePaddingSize == offset { if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", n.LastModified, newerThanUnix) diff --git a/weed/command/fix.go b/weed/command/fix.go index a800978c6..42ae23a3c 100644 --- a/weed/command/fix.go +++ b/weed/command/fix.go @@ -44,7 +44,7 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool { func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *storage.Needle, offset int64) error { glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped()) - if n.Size > 0 { + if n.Size > 0 && n.Size != types.TombstoneFileSize { pe := scanner.nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size) glog.V(2).Infof("saved %d with error %v", n.Size, pe) } else { diff --git a/weed/server/volume_grpc_follow.go b/weed/server/volume_grpc_follow.go index bdd0ef6f5..6b3330a0d 100644 --- a/weed/server/volume_grpc_follow.go +++ b/weed/server/volume_grpc_follow.go @@ -20,14 +20,14 @@ func (vs *VolumeServer) VolumeFollow(req *volume_server_pb.VolumeFollowRequest, stopOffset := v.Size() foundOffset, isLastOne, err := v.BinarySearchByAppendAtNs(req.Since) if err != nil { - return fmt.Errorf("fail to locate by appendAtNs: %s", err) + return fmt.Errorf("fail to locate by appendAtNs %d: %s", req.Since, err) } if isLastOne { return nil } - startOffset := int64(foundOffset) * int64(types.NeedleEntrySize) + startOffset := int64(foundOffset) * int64(types.NeedlePaddingSize) buf := make([]byte, 1024*1024*2) return sendFileContent(v.DataFile(), buf, startOffset, stopOffset, stream) @@ -40,7 +40,7 @@ func sendFileContent(datFile *os.File, buf []byte, startOffset, stopOffset int64 n, readErr := datFile.ReadAt(buf, startOffset+i) if readErr == nil || readErr == io.EOF { resp := &volume_server_pb.VolumeFollowResponse{} - resp.FileContent = buf[i : i+int64(n)] + resp.FileContent = buf[:int64(n)] sendErr := stream.Send(resp) if sendErr != nil { return sendErr diff --git a/weed/storage/volume_follow.go b/weed/storage/volume_follow.go index 2aedd1682..e1a5fcb83 100644 --- a/weed/storage/volume_follow.go +++ b/weed/storage/volume_follow.go @@ -87,9 +87,9 @@ func (v *Volume) Follow(volumeServer string, grpcDialOption grpc.DialOption) (er return err } - // TODO add to needle map + // add to needle map + return ScanVolumeFileFrom(v.version, v.dataFile, startFromOffset, &VolumeFileScanner4GenIdx{v:v}) - return nil } func (v *Volume) findLastAppendAtNs() (uint64, error) { @@ -138,9 +138,9 @@ func (v *Volume) readAppendAtNs(offset Offset) (uint64, error) { if err != nil { return 0, fmt.Errorf("ReadNeedleHeader: %v", err) } - err = n.ReadNeedleBody(v.dataFile, v.SuperBlock.version, int64(offset)*NeedlePaddingSize, bodyLength) + err = n.ReadNeedleBody(v.dataFile, v.SuperBlock.version, int64(offset)*NeedlePaddingSize+int64(NeedleEntrySize), bodyLength) if err != nil { - return 0, fmt.Errorf("ReadNeedleBody offset %d: %v", int64(offset)*NeedlePaddingSize, err) + return 0, fmt.Errorf("ReadNeedleBody offset %d, bodyLength %d: %v", int64(offset)*NeedlePaddingSize, bodyLength, err) } return n.AppendAtNs, nil @@ -218,3 +218,23 @@ func (v *Volume) readAppendAtNsForIndexEntry(indexFile *os.File, bytes []byte, m _, offset, _ := IdxFileEntry(bytes) return offset, nil } + +// generate the volume idx +type VolumeFileScanner4GenIdx struct { + v *Volume +} + +func (scanner *VolumeFileScanner4GenIdx) VisitSuperBlock(superBlock SuperBlock) error { + return nil + +} +func (scanner *VolumeFileScanner4GenIdx) ReadNeedleBody() bool { + return false +} + +func (scanner *VolumeFileScanner4GenIdx) VisitNeedle(n *Needle, offset int64) error { + if n.Size > 0 && n.Size != TombstoneFileSize { + return scanner.v.nm.Put(n.Id, Offset(offset/NeedlePaddingSize), n.Size) + } + return scanner.v.nm.Delete(n.Id, Offset(offset/NeedlePaddingSize)) +} diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go index ed9729c84..5366a547d 100644 --- a/weed/storage/volume_read_write.go +++ b/weed/storage/volume_read_write.go @@ -180,30 +180,37 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, volumeFileScanner VolumeFileScanner) (err error) { var v *Volume if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil { - return fmt.Errorf("Failed to load volume %d: %v", id, err) + return fmt.Errorf("failed to load volume %d: %v", id, err) } if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil { - return fmt.Errorf("Failed to process volume %d super block: %v", id, err) + return fmt.Errorf("failed to process volume %d super block: %v", id, err) } defer v.Close() version := v.Version() offset := int64(v.SuperBlock.BlockSize()) - n, rest, e := ReadNeedleHeader(v.dataFile, version, offset) + + return ScanVolumeFileFrom(version, v.dataFile, offset, volumeFileScanner) +} + +func ScanVolumeFileFrom(version Version, dataFile *os.File, offset int64, volumeFileScanner VolumeFileScanner) (err error) { + n, rest, e := ReadNeedleHeader(dataFile, version, offset) if e != nil { - err = fmt.Errorf("cannot read needle header: %v", e) - return + if e == io.EOF { + return nil + } + return fmt.Errorf("cannot read %s at offset %d: %v", dataFile.Name(), offset, e) } for n != nil { if volumeFileScanner.ReadNeedleBody() { - if err = n.ReadNeedleBody(v.dataFile, version, offset+NeedleEntrySize, rest); err != nil { + if err = n.ReadNeedleBody(dataFile, version, offset+NeedleEntrySize, rest); err != nil { glog.V(0).Infof("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err) //return } } - err = volumeFileScanner.VisitNeedle(n, offset) + err := volumeFileScanner.VisitNeedle(n, offset) if err == io.EOF { return nil } @@ -212,14 +219,13 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, } offset += NeedleEntrySize + rest glog.V(4).Infof("==> new entry offset %d", offset) - if n, rest, err = ReadNeedleHeader(v.dataFile, version, offset); err != nil { + if n, rest, err = ReadNeedleHeader(dataFile, version, offset); err != nil { if err == io.EOF { return nil } - return fmt.Errorf("cannot read needle header: %v", err) + return fmt.Errorf("cannot read needle header at offset %d: %v", offset, err) } glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest) } - - return + return nil } diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go index de39628db..b575277cd 100644 --- a/weed/storage/volume_vacuum.go +++ b/weed/storage/volume_vacuum.go @@ -261,7 +261,7 @@ func (scanner *VolumeFileScanner4Vacuum) VisitNeedle(n *Needle, offset int64) er } nv, ok := scanner.v.nm.Get(n.Id) glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) - if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 { + if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 && nv.Size != TombstoneFileSize { if err := scanner.nm.Put(n.Id, Offset(scanner.newOffset/NeedlePaddingSize), n.Size); err != nil { return fmt.Errorf("cannot put needle: %s", err) }