mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
faster loading boltdb or leveldb needle map metrics by bloomfilter
avoid btree
This commit is contained in:
parent
aba1fe01b3
commit
5bfb72d058
|
@ -84,46 +84,3 @@ func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) {
|
|||
defer nm.indexFileAccessLock.Unlock()
|
||||
return ioutil.ReadFile(nm.indexFile.Name())
|
||||
}
|
||||
|
||||
type mapMetric struct {
|
||||
indexFile *os.File
|
||||
|
||||
DeletionCounter int `json:"DeletionCounter"`
|
||||
FileCounter int `json:"FileCounter"`
|
||||
DeletionByteCounter uint64 `json:"DeletionByteCounter"`
|
||||
FileByteCounter uint64 `json:"FileByteCounter"`
|
||||
MaximumFileKey uint64 `json:"MaxFileKey"`
|
||||
}
|
||||
|
||||
func (mm *mapMetric) logDelete(deletedByteCount uint32) {
|
||||
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
|
||||
mm.DeletionCounter++
|
||||
}
|
||||
|
||||
func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
|
||||
if key > mm.MaximumFileKey {
|
||||
mm.MaximumFileKey = key
|
||||
}
|
||||
mm.FileCounter++
|
||||
mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
|
||||
if oldSize > 0 {
|
||||
mm.DeletionCounter++
|
||||
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
|
||||
}
|
||||
}
|
||||
|
||||
func (mm mapMetric) ContentSize() uint64 {
|
||||
return mm.FileByteCounter
|
||||
}
|
||||
func (mm mapMetric) DeletedSize() uint64 {
|
||||
return mm.DeletionByteCounter
|
||||
}
|
||||
func (mm mapMetric) FileCount() int {
|
||||
return mm.FileCounter
|
||||
}
|
||||
func (mm mapMetric) DeletedCount() int {
|
||||
return mm.DeletionCounter
|
||||
}
|
||||
func (mm mapMetric) MaxFileKey() uint64 {
|
||||
return mm.MaximumFileKey
|
||||
}
|
||||
|
|
|
@ -33,11 +33,11 @@ func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleM
|
|||
return
|
||||
}
|
||||
glog.V(1).Infof("Loading %s...", indexFile.Name())
|
||||
nm, indexLoadError := LoadBtreeNeedleMap(indexFile)
|
||||
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
|
||||
if indexLoadError != nil {
|
||||
return nil, indexLoadError
|
||||
}
|
||||
m.mapMetric = nm.mapMetric
|
||||
m.mapMetric = *mm
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -31,11 +31,11 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedl
|
|||
return
|
||||
}
|
||||
glog.V(1).Infof("Loading %s...", indexFile.Name())
|
||||
nm, indexLoadError := LoadBtreeNeedleMap(indexFile)
|
||||
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
|
||||
if indexLoadError != nil {
|
||||
return nil, indexLoadError
|
||||
}
|
||||
m.mapMetric = nm.mapMetric
|
||||
m.mapMetric = *mm
|
||||
return
|
||||
}
|
||||
|
||||
|
|
107
weed/storage/needle_map_metric.go
Normal file
107
weed/storage/needle_map_metric.go
Normal file
|
@ -0,0 +1,107 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"github.com/willf/bloom"
|
||||
"github.com/chrislusf/seaweedfs/weed/glog"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
type mapMetric struct {
|
||||
DeletionCounter int `json:"DeletionCounter"`
|
||||
FileCounter int `json:"FileCounter"`
|
||||
DeletionByteCounter uint64 `json:"DeletionByteCounter"`
|
||||
FileByteCounter uint64 `json:"FileByteCounter"`
|
||||
MaximumFileKey uint64 `json:"MaxFileKey"`
|
||||
}
|
||||
|
||||
func (mm *mapMetric) logDelete(deletedByteCount uint32) {
|
||||
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
|
||||
mm.DeletionCounter++
|
||||
}
|
||||
|
||||
func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
|
||||
if key > mm.MaximumFileKey {
|
||||
mm.MaximumFileKey = key
|
||||
}
|
||||
mm.FileCounter++
|
||||
mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
|
||||
if oldSize > 0 {
|
||||
mm.DeletionCounter++
|
||||
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
|
||||
}
|
||||
}
|
||||
|
||||
func (mm mapMetric) ContentSize() uint64 {
|
||||
return mm.FileByteCounter
|
||||
}
|
||||
func (mm mapMetric) DeletedSize() uint64 {
|
||||
return mm.DeletionByteCounter
|
||||
}
|
||||
func (mm mapMetric) FileCount() int {
|
||||
return mm.FileCounter
|
||||
}
|
||||
func (mm mapMetric) DeletedCount() int {
|
||||
return mm.DeletionCounter
|
||||
}
|
||||
func (mm mapMetric) MaxFileKey() uint64 {
|
||||
return mm.MaximumFileKey
|
||||
}
|
||||
|
||||
func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) {
|
||||
mm = &mapMetric{}
|
||||
var bf *bloom.BloomFilter
|
||||
buf := make([]byte, 8)
|
||||
err = reverseWalkIndexFile(r, func(entryCount int64) {
|
||||
bf = bloom.NewWithEstimates(uint(entryCount), 0.001)
|
||||
}, func(key uint64, offset, size uint32) error {
|
||||
|
||||
if key > mm.MaximumFileKey {
|
||||
mm.MaximumFileKey = key
|
||||
}
|
||||
|
||||
binary.BigEndian.PutUint64(buf, key)
|
||||
if size != TombstoneFileSize {
|
||||
mm.FileByteCounter += uint64(size)
|
||||
}
|
||||
|
||||
if !bf.Test(buf) {
|
||||
mm.FileCounter++
|
||||
bf.Add(buf)
|
||||
} else {
|
||||
// deleted file
|
||||
mm.DeletionCounter++
|
||||
if size != TombstoneFileSize {
|
||||
// previously already deleted file
|
||||
mm.DeletionByteCounter += uint64(size)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key uint64, offset, size uint32) error) error {
|
||||
fi, err := r.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("file %s stat error: %v", r.Name(), err)
|
||||
}
|
||||
fileSize := fi.Size()
|
||||
if fileSize%NeedleIndexSize != 0 {
|
||||
return fmt.Errorf("unexpected file %s size: %d", r.Name(), fileSize)
|
||||
}
|
||||
|
||||
initFn(fileSize / NeedleIndexSize)
|
||||
|
||||
bytes := make([]byte, NeedleIndexSize)
|
||||
for readerOffset := fileSize - NeedleIndexSize; readerOffset >= 0; readerOffset -= NeedleIndexSize {
|
||||
count, e := r.ReadAt(bytes, readerOffset)
|
||||
glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e)
|
||||
key, offset, size := idxFileEntry(bytes)
|
||||
if e = fn(key, offset, size); e != nil {
|
||||
return e
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
29
weed/storage/needle_map_metric_test.go
Normal file
29
weed/storage/needle_map_metric_test.go
Normal file
|
@ -0,0 +1,29 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"github.com/chrislusf/seaweedfs/weed/glog"
|
||||
)
|
||||
|
||||
func TestFastLoadingNeedleMapMetrics(t *testing.T) {
|
||||
|
||||
idxFile, _ := ioutil.TempFile("", "tmp.idx")
|
||||
nm := NewBtreeNeedleMap(idxFile)
|
||||
|
||||
for i := 0; i < 10000; i++ {
|
||||
nm.Put(uint64(i+1), uint32(0), uint32(1))
|
||||
if rand.Float32() < 0.2 {
|
||||
nm.Delete(uint64(rand.Int63n(int64(i))+1), uint32(0))
|
||||
}
|
||||
}
|
||||
|
||||
mm, _ := newNeedleMapMetricFromIndexFile(idxFile)
|
||||
|
||||
glog.V(0).Infof("FileCount expected %d actual %d", nm.FileCount(), mm.FileCount())
|
||||
glog.V(0).Infof("DeletedSize expected %d actual %d", nm.DeletedSize(), mm.DeletedSize())
|
||||
glog.V(0).Infof("ContentSize expected %d actual %d", nm.ContentSize(), mm.ContentSize())
|
||||
glog.V(0).Infof("DeletedCount expected %d actual %d", nm.DeletedCount(), mm.DeletedCount())
|
||||
glog.V(0).Infof("MaxFileKey expected %d actual %d", nm.MaxFileKey(), mm.MaxFileKey())
|
||||
}
|
Loading…
Reference in a new issue