From 2e74fb60c8d051911066d492d8f90123f98354ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=8D=E6=99=93=E6=A0=8B?= Date: Sat, 2 Jul 2016 21:34:30 +0800 Subject: [PATCH 1/5] add data integrity checking --- weed/storage/needle_read_write.go | 14 +++++++- weed/storage/volume.go | 54 ++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle_read_write.go index 2f26147d6..9499c825d 100644 --- a/weed/storage/needle_read_write.go +++ b/weed/storage/needle_read_write.go @@ -226,7 +226,7 @@ func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bod //n should be a needle already read the header //the input stream will read until next file entry -func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32) (err error) { +func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32, verifyCheckSum bool) (err error) { if bodyLength <= 0 { return nil } @@ -238,6 +238,12 @@ func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyL } n.Data = bytes[:n.Size] n.Checksum = NewCRC(n.Data) + if verifyCheckSum { + checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) + if n.Checksum.Value() != checksum { + err = fmt.Errorf("CRC check failed") + } + } case Version2: bytes := make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { @@ -245,6 +251,12 @@ func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyL } n.readNeedleDataVersion2(bytes[0:n.Size]) n.Checksum = NewCRC(n.Data) + if verifyCheckSum { + checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) + if n.Checksum.Value() != checksum { + err = fmt.Errorf("CRC check failed") + } + } default: err = fmt.Errorf("Unsupported Version! (%d)", version) } diff --git a/weed/storage/volume.go b/weed/storage/volume.go index d40bdc565..b0ab9e008 100644 --- a/weed/storage/volume.go +++ b/weed/storage/volume.go @@ -46,6 +46,57 @@ func loadVolumeWithoutIndex(dirname string, collection string, id VolumeId, need e = v.load(false, false, needleMapKind) return } +func verifyIndexFileIntegrity(indexFile *os.File) (indexSize int64, err error) { + var fi os.FileInfo + if fi, err = indexFile.Stat(); err != nil { + return + } else if indexSize = fi.Size(); indexSize != 0 && indexSize%16 != 0 { + err = fmt.Errorf("index file %s's size is %d bytes, maybe corrupted", indexFile.Name(), fi.Size()) + return + } + return +} +func readIndexEntryAtOffset(indexFile *os.File, offset int64, v Version) (bytes []byte, err error) { + if offset < 0 { + err = fmt.Errorf("offset %d for index file %s is invalid", offset, indexFile.Name()) + return + } + bytes = make([]byte, 16) + _, err = indexFile.ReadAt(bytes, offset) + return +} +func verifyNeedleIntegrity(datFile *os.File, v Version, offset int64, key uint64) error { + if n, bodyLength, err := ReadNeedleHeader(datFile, v, offset); err != nil { + return fmt.Errorf("can not read needle header: %s", err.Error()) + } else { + if n.Id != key { + return fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) + } else { + if err := n.ReadNeedleBody(datFile, v, offset+int64(NeedleHeaderSize), bodyLength, true); err != nil { + return fmt.Errorf("dat file %s's body reading failed: %s", datFile.Name(), err.Error()) + } + } + } + return nil +} +func volumeDataIntegrityChecking(v *Volume, indexFile *os.File) { + var indexSize int64 + var e error + if indexSize, e = verifyIndexFileIntegrity(indexFile); e != nil { + glog.V(0).Infof("verifyIndexFileIntegrity failed %s", e.Error()) + v.readOnly = true + } + var lastIdxEntry []byte + if lastIdxEntry, e = readIndexEntryAtOffset(indexFile, indexSize-16, v.Version()); e != nil { + glog.V(0).Infof("readLastIndexEntry failed %s", e.Error()) + v.readOnly = true + } + key, offset, _ := idxFileEntry(lastIdxEntry) + if e = verifyNeedleIntegrity(v.dataFile, v.Version(), int64(offset)*NeedlePaddingSize, key); e != nil { + glog.V(0).Infof("verifyNeedleIntegrity failed %s", e.Error()) + v.readOnly = true + } +} func (v *Volume) FileName() (fileName string) { if v.Collection == "" { fileName = path.Join(v.dir, v.Id.String()) @@ -105,6 +156,7 @@ func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind return fmt.Errorf("cannot write Volume Index %s.idx: %v", fileName, e) } } + volumeDataIntegrityChecking(v, indexFile) switch needleMapKind { case NeedleMapInMemory: glog.V(0).Infoln("loading index file", fileName+".idx", "readonly", v.readOnly) @@ -332,7 +384,7 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, } for n != nil { if readNeedleBody { - if err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest); err != nil { + if err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest, false); err != nil { glog.V(0).Infof("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err) //return From 3018443cd790e8f9bd9df8b2b596840a315447c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=8D=E6=99=93=E6=A0=8B?= Date: Sat, 2 Jul 2016 21:37:14 +0800 Subject: [PATCH 2/5] code change for fix_dat.go --- unmaintained/fix_dat/fix_dat.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index bcb985fe9..84a2264ed 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -107,7 +107,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Println("Recovered in f", r) } }() - if err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest); err != nil { + if err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest, false); err != nil { fmt.Printf("cannot read needle body: offset %d body %d %v\n", offset, rest, err) } }() From dda13def2aa1da566252ef31ef50936941f0b1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=8D=E6=99=93=E6=A0=8B?= Date: Sun, 3 Jul 2016 12:53:03 +0800 Subject: [PATCH 3/5] refactor data integrity checking code --- unmaintained/fix_dat/fix_dat.go | 2 +- weed/storage/needle_map.go | 4 ++ weed/storage/needle_read_write.go | 20 ++-------- weed/storage/volume.go | 63 +++++++++++++++++++------------ weed/util/file_util.go | 8 ++++ 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index 84a2264ed..ae45f7cc8 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -107,7 +107,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Println("Recovered in f", r) } }() - if err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest, false); err != nil { + if _, err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest, false); err != nil { fmt.Printf("cannot read needle body: offset %d body %d %v\n", offset, rest, err) } }() diff --git a/weed/storage/needle_map.go b/weed/storage/needle_map.go index 05bc6e86c..142018946 100644 --- a/weed/storage/needle_map.go +++ b/weed/storage/needle_map.go @@ -17,6 +17,10 @@ const ( NeedleMapBoltDb ) +const ( + NeedleIndexSize = 16 +) + type NeedleMapper interface { Put(key uint64, offset uint32, size uint32) error Get(key uint64) (element *NeedleValue, ok bool) diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle_read_write.go index 9499c825d..c7115f800 100644 --- a/weed/storage/needle_read_write.go +++ b/weed/storage/needle_read_write.go @@ -226,37 +226,25 @@ func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bod //n should be a needle already read the header //the input stream will read until next file entry -func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32, verifyCheckSum bool) (err error) { +func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32) (bytes []byte, err error) { if bodyLength <= 0 { - return nil + return } switch version { case Version1: - bytes := make([]byte, bodyLength) + bytes = make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { return } n.Data = bytes[:n.Size] n.Checksum = NewCRC(n.Data) - if verifyCheckSum { - checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) - if n.Checksum.Value() != checksum { - err = fmt.Errorf("CRC check failed") - } - } case Version2: - bytes := make([]byte, bodyLength) + bytes = make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { return } n.readNeedleDataVersion2(bytes[0:n.Size]) n.Checksum = NewCRC(n.Data) - if verifyCheckSum { - checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) - if n.Checksum.Value() != checksum { - err = fmt.Errorf("CRC check failed") - } - } default: err = fmt.Errorf("Unsupported Version! (%d)", version) } diff --git a/weed/storage/volume.go b/weed/storage/volume.go index b0ab9e008..43b239fd2 100644 --- a/weed/storage/volume.go +++ b/weed/storage/volume.go @@ -11,6 +11,7 @@ import ( "time" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" ) type Volume struct { @@ -47,55 +48,64 @@ func loadVolumeWithoutIndex(dirname string, collection string, id VolumeId, need return } func verifyIndexFileIntegrity(indexFile *os.File) (indexSize int64, err error) { - var fi os.FileInfo - if fi, err = indexFile.Stat(); err != nil { - return - } else if indexSize = fi.Size(); indexSize != 0 && indexSize%16 != 0 { - err = fmt.Errorf("index file %s's size is %d bytes, maybe corrupted", indexFile.Name(), fi.Size()) - return + if indexSize, err = util.GetFileSize(indexFile); err == nil { + if indexSize%NeedleIndexSize != 0 { + err = fmt.Errorf("index file's size is %d bytes, maybe corrupted", indexSize) + } } return } func readIndexEntryAtOffset(indexFile *os.File, offset int64, v Version) (bytes []byte, err error) { if offset < 0 { - err = fmt.Errorf("offset %d for index file %s is invalid", offset, indexFile.Name()) + err = fmt.Errorf("offset %d for index file is invalid", offset) return } - bytes = make([]byte, 16) + bytes = make([]byte, NeedleIndexSize) _, err = indexFile.ReadAt(bytes, offset) return } func verifyNeedleIntegrity(datFile *os.File, v Version, offset int64, key uint64) error { if n, bodyLength, err := ReadNeedleHeader(datFile, v, offset); err != nil { - return fmt.Errorf("can not read needle header: %s", err.Error()) + return fmt.Errorf("can not read needle header: %v", err) } else { if n.Id != key { return fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) } else { - if err := n.ReadNeedleBody(datFile, v, offset+int64(NeedleHeaderSize), bodyLength, true); err != nil { - return fmt.Errorf("dat file %s's body reading failed: %s", datFile.Name(), err.Error()) + if bytes, err := n.ReadNeedleBody(datFile, v, offset+int64(NeedleHeaderSize), bodyLength); err != nil { + return fmt.Errorf("dat file's body reading failed: %v", err) + } else { + checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) + if n.Checksum.Value() != checksum { + return fmt.Errorf("CRC check failed") + } } } } return nil } -func volumeDataIntegrityChecking(v *Volume, indexFile *os.File) { +func volumeDataIntegrityChecking(v *Volume, indexFile *os.File) error { var indexSize int64 var e error if indexSize, e = verifyIndexFileIntegrity(indexFile); e != nil { - glog.V(0).Infof("verifyIndexFileIntegrity failed %s", e.Error()) - v.readOnly = true + return fmt.Errorf("verifyIndexFileIntegrity failed: %v", e) } - var lastIdxEntry []byte - if lastIdxEntry, e = readIndexEntryAtOffset(indexFile, indexSize-16, v.Version()); e != nil { - glog.V(0).Infof("readLastIndexEntry failed %s", e.Error()) - v.readOnly = true - } - key, offset, _ := idxFileEntry(lastIdxEntry) - if e = verifyNeedleIntegrity(v.dataFile, v.Version(), int64(offset)*NeedlePaddingSize, key); e != nil { - glog.V(0).Infof("verifyNeedleIntegrity failed %s", e.Error()) - v.readOnly = true + if indexSize != 0 { + var lastIdxEntry []byte + if lastIdxEntry, e = readIndexEntryAtOffset(indexFile, indexSize-NeedleIndexSize, v.Version()); e != nil { + return fmt.Errorf("readLastIndexEntry failed: %v", e) + } + key, offset, _ := idxFileEntry(lastIdxEntry) + if e = verifyNeedleIntegrity(v.dataFile, v.Version(), int64(offset)*NeedlePaddingSize, key); e != nil { + return fmt.Errorf("verifyNeedleIntegrity failed: %v", e) + } + } else { + if datSize, err := util.GetFileSize(v.dataFile); err == nil { + if datSize > 0 { + return fmt.Errorf("dat file size is %d, not empty while the index file is empty!", datSize) + } + } } + return nil } func (v *Volume) FileName() (fileName string) { if v.Collection == "" { @@ -156,7 +166,10 @@ func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind return fmt.Errorf("cannot write Volume Index %s.idx: %v", fileName, e) } } - volumeDataIntegrityChecking(v, indexFile) + if e = volumeDataIntegrityChecking(v, indexFile); e != nil { + v.readOnly = true + glog.V(0).Infof("volumeDataIntegrityChecking failed %v", e) + } switch needleMapKind { case NeedleMapInMemory: glog.V(0).Infoln("loading index file", fileName+".idx", "readonly", v.readOnly) @@ -384,7 +397,7 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, } for n != nil { if readNeedleBody { - if err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest, false); err != nil { + if _, err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest); err != nil { glog.V(0).Infof("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err) //return diff --git a/weed/util/file_util.go b/weed/util/file_util.go index a39fb0860..4461bdc51 100644 --- a/weed/util/file_util.go +++ b/weed/util/file_util.go @@ -36,3 +36,11 @@ func Readln(r *bufio.Reader) ([]byte, error) { } return ln, err } + +func GetFileSize(file *os.File) (size int64, err error) { + var fi os.FileInfo + if fi, err = file.Stat(); err == nil { + size = fi.Size() + } + return +} From 16f6984d021d361fcfe7d2b8de605755135eb53b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=8D=E6=99=93=E6=A0=8B?= Date: Sun, 3 Jul 2016 14:11:25 +0800 Subject: [PATCH 4/5] refactor data integrity checking code v2 --- unmaintained/fix_dat/fix_dat.go | 2 +- weed/storage/needle_read_write.go | 8 ++++---- weed/storage/volume.go | 30 +++++++++++------------------- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index ae45f7cc8..84a2264ed 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -107,7 +107,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Println("Recovered in f", r) } }() - if _, err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest, false); err != nil { + if err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest, false); err != nil { fmt.Printf("cannot read needle body: offset %d body %d %v\n", offset, rest, err) } }() diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle_read_write.go index c7115f800..2f26147d6 100644 --- a/weed/storage/needle_read_write.go +++ b/weed/storage/needle_read_write.go @@ -226,20 +226,20 @@ func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bod //n should be a needle already read the header //the input stream will read until next file entry -func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32) (bytes []byte, err error) { +func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32) (err error) { if bodyLength <= 0 { - return + return nil } switch version { case Version1: - bytes = make([]byte, bodyLength) + bytes := make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { return } n.Data = bytes[:n.Size] n.Checksum = NewCRC(n.Data) case Version2: - bytes = make([]byte, bodyLength) + bytes := make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { return } diff --git a/weed/storage/volume.go b/weed/storage/volume.go index 43b239fd2..4c125fa4e 100644 --- a/weed/storage/volume.go +++ b/weed/storage/volume.go @@ -64,22 +64,14 @@ func readIndexEntryAtOffset(indexFile *os.File, offset int64, v Version) (bytes _, err = indexFile.ReadAt(bytes, offset) return } -func verifyNeedleIntegrity(datFile *os.File, v Version, offset int64, key uint64) error { - if n, bodyLength, err := ReadNeedleHeader(datFile, v, offset); err != nil { - return fmt.Errorf("can not read needle header: %v", err) - } else { - if n.Id != key { - return fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) - } else { - if bytes, err := n.ReadNeedleBody(datFile, v, offset+int64(NeedleHeaderSize), bodyLength); err != nil { - return fmt.Errorf("dat file's body reading failed: %v", err) - } else { - checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) - if n.Checksum.Value() != checksum { - return fmt.Errorf("CRC check failed") - } - } - } +func verifyNeedleIntegrity(datFile *os.File, v Version, offset int64, key uint64, size uint32) error { + n := new(Needle) + err := n.ReadData(datFile, offset, size, v) + if err != nil { + return err + } + if n.Id != key { + return fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) } return nil } @@ -94,8 +86,8 @@ func volumeDataIntegrityChecking(v *Volume, indexFile *os.File) error { if lastIdxEntry, e = readIndexEntryAtOffset(indexFile, indexSize-NeedleIndexSize, v.Version()); e != nil { return fmt.Errorf("readLastIndexEntry failed: %v", e) } - key, offset, _ := idxFileEntry(lastIdxEntry) - if e = verifyNeedleIntegrity(v.dataFile, v.Version(), int64(offset)*NeedlePaddingSize, key); e != nil { + key, offset, size := idxFileEntry(lastIdxEntry) + if e = verifyNeedleIntegrity(v.dataFile, v.Version(), int64(offset)*NeedlePaddingSize, key, size); e != nil { return fmt.Errorf("verifyNeedleIntegrity failed: %v", e) } } else { @@ -397,7 +389,7 @@ func ScanVolumeFile(dirname string, collection string, id VolumeId, } for n != nil { if readNeedleBody { - if _, err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest); err != nil { + if err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest); err != nil { glog.V(0).Infof("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err) //return From c527d85d979170d37a945b2ab35a46d00e5b0f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=8D=E6=99=93=E6=A0=8B?= Date: Sun, 3 Jul 2016 14:13:43 +0800 Subject: [PATCH 5/5] refactor data integrity checking code v3 --- unmaintained/fix_dat/fix_dat.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index 84a2264ed..bcb985fe9 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -107,7 +107,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Println("Recovered in f", r) } }() - if err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest, false); err != nil { + if err = n.ReadNeedleBody(datFile, version, offset+int64(storage.NeedleHeaderSize), rest); err != nil { fmt.Printf("cannot read needle body: offset %d body %d %v\n", offset, rest, err) } }()