From 4f317c7e3d2a9e639ccb53ca9debe0c3981c6321 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 24 Jul 2018 01:36:04 -0700 Subject: [PATCH] working version3 --- unmaintained/fix_dat/fix_dat.go | 3 +- weed/command/export.go | 2 +- weed/command/fix.go | 4 +- weed/server/volume_server_handlers_sync.go | 2 +- weed/storage/needle.go | 15 ++-- weed/storage/needle_read_write.go | 86 ++++++++++++++-------- weed/storage/types/needle_types.go | 1 + weed/storage/volume_checking.go | 5 +- weed/storage/volume_read_write.go | 13 ---- weed/storage/volume_super_block.go | 2 +- weed/storage/volume_vacuum.go | 8 +- weed/storage/volume_version.go | 3 +- 12 files changed, 80 insertions(+), 64 deletions(-) diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index 10dd94810..cf3aa055f 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -106,8 +106,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Printf("key: %d offsetFromIndex %d n.Size %d sizeFromIndex:%d\n", key, offsetFromIndex, n.Size, sizeFromIndex) - padding := types.NeedlePaddingSize - ((sizeFromIndex + types.NeedleEntrySize + storage.NeedleChecksumSize) % types.NeedlePaddingSize) - rest = int64(sizeFromIndex + storage.NeedleChecksumSize + padding) + rest = storage.NeedleBodyLength(sizeFromIndex, version) func() { defer func() { diff --git a/weed/command/export.go b/weed/command/export.go index 529ee47e3..1202d687c 100644 --- a/weed/command/export.go +++ b/weed/command/export.go @@ -161,7 +161,7 @@ func runExport(cmd *Command, args []string) bool { }, true, func(n *storage.Needle, offset int64) error { nv, ok := needleMap.Get(n.Id) glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", - n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv) + n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped(), ok, nv) if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset { if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", diff --git a/weed/command/fix.go b/weed/command/fix.go index 32b09fd72..916338d3e 100644 --- a/weed/command/fix.go +++ b/weed/command/fix.go @@ -47,13 +47,15 @@ func runFix(cmd *Command, args []string) bool { nm := storage.NewBtreeNeedleMap(indexFile) defer nm.Close() + var version storage.Version vid := storage.VolumeId(*fixVolumeId) err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, storage.NeedleMapInMemory, func(superBlock storage.SuperBlock) error { + version = superBlock.Version() return nil }, false, func(n *storage.Needle, offset int64) error { - glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped()) + glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped()) if n.Size > 0 { pe := nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size) glog.V(2).Infof("saved %d with error %v", n.Size, pe) diff --git a/weed/server/volume_server_handlers_sync.go b/weed/server/volume_server_handlers_sync.go index c6e32bb9b..1ceec4ddd 100644 --- a/weed/server/volume_server_handlers_sync.go +++ b/weed/server/volume_server_handlers_sync.go @@ -51,7 +51,7 @@ func (vs *VolumeServer) getVolumeDataContentHandler(w http.ResponseWriter, r *ht } offset := uint32(util.ParseUint64(r.FormValue("offset"), 0)) size := uint32(util.ParseUint64(r.FormValue("size"), 0)) - content, err := storage.ReadNeedleBlob(v.DataFile(), int64(offset)*types.NeedlePaddingSize, size) + content, err := storage.ReadNeedleBlob(v.DataFile(), int64(offset)*types.NeedlePaddingSize, size, v.Version()) if err != nil { writeJsonError(w, r, http.StatusInternalServerError, err) return diff --git a/weed/storage/needle.go b/weed/storage/needle.go index 31bada091..03a5e0a38 100644 --- a/weed/storage/needle.go +++ b/weed/storage/needle.go @@ -29,18 +29,19 @@ type Needle struct { DataSize uint32 `comment:"Data size"` //version2 Data []byte `comment:"The actual file data"` - Flags byte `comment:"boolean flags"` //version2 - NameSize uint8 //version2 + Flags byte `comment:"boolean flags"` //version2 + NameSize uint8 //version2 Name []byte `comment:"maximum 256 characters"` //version2 - MimeSize uint8 //version2 + MimeSize uint8 //version2 Mime []byte `comment:"maximum 256 characters"` //version2 - PairsSize uint16 //version2 + PairsSize uint16 //version2 Pairs []byte `comment:"additional name value pairs, json format, maximum 64kB"` LastModified uint64 //only store LastModifiedBytesLength bytes, which is 5 bytes to disk Ttl *TTL - Checksum CRC `comment:"CRC32 to check integrity"` - Padding []byte `comment:"Aligned to 8 bytes"` + Checksum CRC `comment:"CRC32 to check integrity"` + AppendAtNs uint64 `comment:"append timestamp in nano seconds"` //version3 + Padding []byte `comment:"Aligned to 8 bytes"` } func (n *Needle) String() (str string) { @@ -134,7 +135,7 @@ func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { dotSep := strings.LastIndex(r.URL.Path, ".") fid := r.URL.Path[commaSep+1:] if dotSep > 0 { - fid = r.URL.Path[commaSep+1 : dotSep] + fid = r.URL.Path[commaSep+1: dotSep] } e = n.ParsePath(fid) diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle_read_write.go index bfd325475..b78e56396 100644 --- a/weed/storage/needle_read_write.go +++ b/weed/storage/needle_read_write.go @@ -23,8 +23,8 @@ const ( TtlBytesLength = 2 ) -func (n *Needle) DiskSize() int64 { - return getActualSize(n.Size) +func (n *Needle) DiskSize(version Version) int64 { + return getActualSize(n.Size, version) } func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize int64, err error) { @@ -57,12 +57,12 @@ func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize i return } actualSize = NeedleEntrySize + int64(n.Size) - padding := NeedlePaddingSize - ((NeedleEntrySize + n.Size + NeedleChecksumSize) % NeedlePaddingSize) + padding := PaddingLength(n.Size, version) util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value()) - _, err = w.Write(header[0 : NeedleChecksumSize+padding]) + _, err = w.Write(header[0: NeedleChecksumSize+padding]) return - case Version2: - header := make([]byte, NeedleEntrySize) + case Version2, Version3: + header := make([]byte, NeedleEntrySize+TimestampSize) // adding timestamp to reuse it and avoid extra allocation CookieToBytes(header[0:CookieSize], n.Cookie) NeedleIdToBytes(header[CookieSize:CookieSize+NeedleIdSize], n.Id) n.DataSize, n.NameSize, n.MimeSize = uint32(len(n.Data)), uint8(len(n.Name)), uint8(len(n.Mime)) @@ -88,7 +88,7 @@ func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize i } size = n.DataSize util.Uint32toBytes(header[CookieSize+NeedleIdSize:CookieSize+NeedleIdSize+SizeSize], n.Size) - if _, err = w.Write(header); err != nil { + if _, err = w.Write(header[0:NeedleEntrySize]); err != nil { return } if n.DataSize > 0 { @@ -123,7 +123,7 @@ func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize i } if n.HasLastModifiedDate() { util.Uint64toBytes(header[0:8], n.LastModified) - if _, err = w.Write(header[8-LastModifiedBytesLength : 8]); err != nil { + if _, err = w.Write(header[8-LastModifiedBytesLength: 8]); err != nil { return } } @@ -143,23 +143,29 @@ func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize i } } } - padding := NeedlePaddingSize - ((NeedleEntrySize + n.Size + NeedleChecksumSize) % NeedlePaddingSize) + padding := PaddingLength(n.Size, version) util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value()) - _, err = w.Write(header[0 : NeedleChecksumSize+padding]) + if version == Version2 { + _, err = w.Write(header[0: NeedleChecksumSize+padding]) + } else { + // version3 + util.Uint64toBytes(header[NeedleChecksumSize:NeedleChecksumSize+TimestampSize], n.AppendAtNs) + _, err = w.Write(header[0: NeedleChecksumSize+TimestampSize+padding]) + } - return n.DataSize, getActualSize(n.Size), err + return n.DataSize, getActualSize(n.Size, version), err } return 0, 0, fmt.Errorf("Unsupported Version! (%d)", version) } -func ReadNeedleBlob(r *os.File, offset int64, size uint32) (dataSlice []byte, err error) { - dataSlice = make([]byte, int(getActualSize(size))) +func ReadNeedleBlob(r *os.File, offset int64, size uint32, version Version) (dataSlice []byte, err error) { + dataSlice = make([]byte, int(getActualSize(size, version))) _, err = r.ReadAt(dataSlice, offset) return dataSlice, err } func (n *Needle) ReadData(r *os.File, offset int64, size uint32, version Version) (err error) { - bytes, err := ReadNeedleBlob(r, offset, size) + bytes, err := ReadNeedleBlob(r, offset, size, version) if err != nil { return err } @@ -169,39 +175,43 @@ func (n *Needle) ReadData(r *os.File, offset int64, size uint32, version Version } switch version { case Version1: - n.Data = bytes[NeedleEntrySize : NeedleEntrySize+size] - case Version2: - n.readNeedleDataVersion2(bytes[NeedleEntrySize : NeedleEntrySize+int(n.Size)]) + n.Data = bytes[NeedleEntrySize: NeedleEntrySize+size] + case Version2, Version3: + n.readNeedleDataVersion2(bytes[NeedleEntrySize: NeedleEntrySize+int(n.Size)]) } if size == 0 { return nil } - checksum := util.BytesToUint32(bytes[NeedleEntrySize+size : NeedleEntrySize+size+NeedleChecksumSize]) + checksum := util.BytesToUint32(bytes[NeedleEntrySize+size: NeedleEntrySize+size+NeedleChecksumSize]) newChecksum := NewCRC(n.Data) if checksum != newChecksum.Value() { return errors.New("CRC error! Data On Disk Corrupted") } n.Checksum = newChecksum + if version == Version3 { + tsOffset := NeedleEntrySize + size + NeedleChecksumSize + n.AppendAtNs = util.BytesToUint64(bytes[tsOffset: tsOffset+TimestampSize]) + } return nil } func (n *Needle) ParseNeedleHeader(bytes []byte) { n.Cookie = BytesToCookie(bytes[0:CookieSize]) - n.Id = BytesToNeedleId(bytes[CookieSize : CookieSize+NeedleIdSize]) - n.Size = util.BytesToUint32(bytes[CookieSize+NeedleIdSize : NeedleEntrySize]) + n.Id = BytesToNeedleId(bytes[CookieSize: CookieSize+NeedleIdSize]) + n.Size = util.BytesToUint32(bytes[CookieSize+NeedleIdSize: NeedleEntrySize]) } func (n *Needle) readNeedleDataVersion2(bytes []byte) { index, lenBytes := 0, len(bytes) if index < lenBytes { - n.DataSize = util.BytesToUint32(bytes[index : index+4]) + n.DataSize = util.BytesToUint32(bytes[index: index+4]) index = index + 4 if int(n.DataSize)+index > lenBytes { // this if clause is due to bug #87 and #93, fixed in v0.69 // remove this clause later return } - n.Data = bytes[index : index+int(n.DataSize)] + n.Data = bytes[index: index+int(n.DataSize)] index = index + int(n.DataSize) n.Flags = bytes[index] index = index + 1 @@ -209,25 +219,25 @@ func (n *Needle) readNeedleDataVersion2(bytes []byte) { if index < lenBytes && n.HasName() { n.NameSize = uint8(bytes[index]) index = index + 1 - n.Name = bytes[index : index+int(n.NameSize)] + n.Name = bytes[index: index+int(n.NameSize)] index = index + int(n.NameSize) } if index < lenBytes && n.HasMime() { n.MimeSize = uint8(bytes[index]) index = index + 1 - n.Mime = bytes[index : index+int(n.MimeSize)] + n.Mime = bytes[index: index+int(n.MimeSize)] index = index + int(n.MimeSize) } if index < lenBytes && n.HasLastModifiedDate() { - n.LastModified = util.BytesToUint64(bytes[index : index+LastModifiedBytesLength]) + n.LastModified = util.BytesToUint64(bytes[index: index+LastModifiedBytesLength]) index = index + LastModifiedBytesLength } if index < lenBytes && n.HasTtl() { - n.Ttl = LoadTTLFromBytes(bytes[index : index+TtlBytesLength]) + n.Ttl = LoadTTLFromBytes(bytes[index: index+TtlBytesLength]) index = index + TtlBytesLength } if index < lenBytes && n.HasPairs() { - n.PairsSize = util.BytesToUint16(bytes[index : index+2]) + n.PairsSize = util.BytesToUint16(bytes[index: index+2]) index += 2 end := index + int(n.PairsSize) n.Pairs = bytes[index:end] @@ -237,7 +247,7 @@ func (n *Needle) readNeedleDataVersion2(bytes []byte) { func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bodyLength int64, err error) { n = new(Needle) - if version == Version1 || version == Version2 { + if version == Version1 || version == Version2 || version == Version3 { bytes := make([]byte, NeedleEntrySize) var count int count, err = r.ReadAt(bytes, offset) @@ -245,12 +255,26 @@ func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bod return nil, 0, err } n.ParseNeedleHeader(bytes) - padding := NeedlePaddingSize - ((n.Size + NeedleEntrySize + NeedleChecksumSize) % NeedlePaddingSize) - bodyLength = int64(n.Size) + NeedleChecksumSize + int64(padding) + bodyLength = NeedleBodyLength(n.Size, version) } return } +func PaddingLength(needleSize uint32, version Version) uint32 { + if version == Version3 { + // this is same value as version2, but just listed here for clarity + return NeedlePaddingSize - ((NeedleEntrySize + needleSize + NeedleChecksumSize + TimestampSize) % NeedlePaddingSize) + } + return NeedlePaddingSize - ((NeedleEntrySize + needleSize + NeedleChecksumSize) % NeedlePaddingSize) +} + +func NeedleBodyLength(needleSize uint32, version Version) int64 { + if version == Version3 { + return int64(needleSize) + NeedleChecksumSize + TimestampSize + int64(PaddingLength(needleSize, version)) + } + return int64(needleSize) + NeedleChecksumSize + int64(PaddingLength(needleSize, version)) +} + //n should be a needle already read the header //the input stream will read until next file entry func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength int64) (err error) { @@ -265,7 +289,7 @@ func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyL } n.Data = bytes[:n.Size] n.Checksum = NewCRC(n.Data) - case Version2: + case Version2, Version3: bytes := make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { return diff --git a/weed/storage/types/needle_types.go b/weed/storage/types/needle_types.go index 8a2054fc5..ce4e601e4 100644 --- a/weed/storage/types/needle_types.go +++ b/weed/storage/types/needle_types.go @@ -14,6 +14,7 @@ const ( OffsetSize = 4 SizeSize = 4 // uint32 size NeedleEntrySize = NeedleIdSize + OffsetSize + SizeSize + TimestampSize = 8 // int64 size NeedlePaddingSize = 8 MaxPossibleVolumeSize = 4 * 1024 * 1024 * 1024 * 8 TombstoneFileSize = math.MaxUint32 diff --git a/weed/storage/volume_checking.go b/weed/storage/volume_checking.go index c928ae9a2..12c282be9 100644 --- a/weed/storage/volume_checking.go +++ b/weed/storage/volume_checking.go @@ -8,9 +8,8 @@ import ( "github.com/chrislusf/seaweedfs/weed/util" ) -func getActualSize(size uint32) int64 { - padding := NeedlePaddingSize - ((NeedleEntrySize + size + NeedleChecksumSize) % NeedlePaddingSize) - return NeedleEntrySize + int64(size) + NeedleChecksumSize + int64(padding) +func getActualSize(size uint32, version Version) int64 { + return NeedleEntrySize + NeedleBodyLength(size, version) } func CheckVolumeDataIntegrity(v *Volume, indexFile *os.File) error { diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go index 3e36bfb2e..3e711708e 100644 --- a/weed/storage/volume_read_write.go +++ b/weed/storage/volume_read_write.go @@ -203,19 +203,6 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, //err = fmt.Errorf("cannot read needle body: %v", err) //return } - if n.DataSize >= n.Size { - // this should come from a bug reported on #87 and #93 - // fixed in v0.69 - // remove this whole "if" clause later, long after 0.69 - oldRest, oldSize := rest, n.Size - padding := NeedlePaddingSize - ((n.Size + NeedleEntrySize + NeedleChecksumSize) % NeedlePaddingSize) - n.Size = 0 - rest = int64(n.Size + NeedleChecksumSize + padding) - if rest%NeedlePaddingSize != 0 { - rest += (NeedlePaddingSize - rest%NeedlePaddingSize) - } - glog.V(4).Infof("Adjusting n.Size %d=>0 rest:%d=>%d %+v", oldSize, oldRest, rest, n) - } } err = visitNeedle(n, offset) if err == io.EOF { diff --git a/weed/storage/volume_super_block.go b/weed/storage/volume_super_block.go index 1b81788d3..6435a051f 100644 --- a/weed/storage/volume_super_block.go +++ b/weed/storage/volume_super_block.go @@ -33,7 +33,7 @@ type SuperBlock struct { func (s *SuperBlock) BlockSize() int { switch s.version { - case Version2: + case Version2, Version3: return _SuperBlockSize + int(s.extraSize) } return _SuperBlockSize diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go index 58ecc73cb..fea5e7d6e 100644 --- a/weed/storage/volume_vacuum.go +++ b/weed/storage/volume_vacuum.go @@ -196,7 +196,7 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI //even the needle cache in memory is hit, the need_bytes is correct glog.V(4).Infof("file %d offset %d size %d", key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size) var needle_bytes []byte - needle_bytes, err = ReadNeedleBlob(oldDatFile, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size) + needle_bytes, err = ReadNeedleBlob(oldDatFile, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, v.Version()) if err != nil { return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, err) } @@ -243,8 +243,10 @@ func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, prealloca now := uint64(time.Now().Unix()) + var version Version err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, func(superBlock SuperBlock) error { + version = superBlock.Version() superBlock.CompactRevision++ _, err = dst.Write(superBlock.Bytes()) new_offset = int64(superBlock.BlockSize()) @@ -262,7 +264,7 @@ func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, prealloca if _, _, err := n.Append(dst, v.Version()); err != nil { return fmt.Errorf("cannot append needle: %s", err) } - new_offset += n.DiskSize() + new_offset += n.DiskSize(version) glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) } return nil @@ -322,7 +324,7 @@ func (v *Volume) copyDataBasedOnIndexFile(dstName, idxName string) (err error) { if _, _, err = n.Append(dst, v.Version()); err != nil { return fmt.Errorf("cannot append needle: %s", err) } - new_offset += n.DiskSize() + new_offset += n.DiskSize(v.Version()) glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) } return nil diff --git a/weed/storage/volume_version.go b/weed/storage/volume_version.go index 2e9f58aa2..fc0270c03 100644 --- a/weed/storage/volume_version.go +++ b/weed/storage/volume_version.go @@ -5,5 +5,6 @@ type Version uint8 const ( Version1 = Version(1) Version2 = Version(2) - CurrentVersion = Version2 + Version3 = Version(3) + CurrentVersion = Version3 )