faster loading boltdb or leveldb needle map metrics by bloomfilter

avoid btree
This commit is contained in:
Chris Lu 2018-07-07 00:51:17 -07:00
parent aba1fe01b3
commit 5bfb72d058
5 changed files with 140 additions and 47 deletions

View file

@ -84,46 +84,3 @@ func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) {
defer nm.indexFileAccessLock.Unlock()
return ioutil.ReadFile(nm.indexFile.Name())
}
type mapMetric struct {
indexFile *os.File
DeletionCounter int `json:"DeletionCounter"`
FileCounter int `json:"FileCounter"`
DeletionByteCounter uint64 `json:"DeletionByteCounter"`
FileByteCounter uint64 `json:"FileByteCounter"`
MaximumFileKey uint64 `json:"MaxFileKey"`
}
func (mm *mapMetric) logDelete(deletedByteCount uint32) {
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
mm.DeletionCounter++
}
func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
if key > mm.MaximumFileKey {
mm.MaximumFileKey = key
}
mm.FileCounter++
mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
if oldSize > 0 {
mm.DeletionCounter++
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
}
}
func (mm mapMetric) ContentSize() uint64 {
return mm.FileByteCounter
}
func (mm mapMetric) DeletedSize() uint64 {
return mm.DeletionByteCounter
}
func (mm mapMetric) FileCount() int {
return mm.FileCounter
}
func (mm mapMetric) DeletedCount() int {
return mm.DeletionCounter
}
func (mm mapMetric) MaxFileKey() uint64 {
return mm.MaximumFileKey
}

View file

@ -33,11 +33,11 @@ func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleM
return
}
glog.V(1).Infof("Loading %s...", indexFile.Name())
nm, indexLoadError := LoadBtreeNeedleMap(indexFile)
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
if indexLoadError != nil {
return nil, indexLoadError
}
m.mapMetric = nm.mapMetric
m.mapMetric = *mm
return
}

View file

@ -31,11 +31,11 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedl
return
}
glog.V(1).Infof("Loading %s...", indexFile.Name())
nm, indexLoadError := LoadBtreeNeedleMap(indexFile)
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
if indexLoadError != nil {
return nil, indexLoadError
}
m.mapMetric = nm.mapMetric
m.mapMetric = *mm
return
}

View file

@ -0,0 +1,107 @@
package storage
import (
"fmt"
"os"
"github.com/willf/bloom"
"github.com/chrislusf/seaweedfs/weed/glog"
"encoding/binary"
)
type mapMetric struct {
DeletionCounter int `json:"DeletionCounter"`
FileCounter int `json:"FileCounter"`
DeletionByteCounter uint64 `json:"DeletionByteCounter"`
FileByteCounter uint64 `json:"FileByteCounter"`
MaximumFileKey uint64 `json:"MaxFileKey"`
}
func (mm *mapMetric) logDelete(deletedByteCount uint32) {
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
mm.DeletionCounter++
}
func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
if key > mm.MaximumFileKey {
mm.MaximumFileKey = key
}
mm.FileCounter++
mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
if oldSize > 0 {
mm.DeletionCounter++
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
}
}
func (mm mapMetric) ContentSize() uint64 {
return mm.FileByteCounter
}
func (mm mapMetric) DeletedSize() uint64 {
return mm.DeletionByteCounter
}
func (mm mapMetric) FileCount() int {
return mm.FileCounter
}
func (mm mapMetric) DeletedCount() int {
return mm.DeletionCounter
}
func (mm mapMetric) MaxFileKey() uint64 {
return mm.MaximumFileKey
}
func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) {
mm = &mapMetric{}
var bf *bloom.BloomFilter
buf := make([]byte, 8)
err = reverseWalkIndexFile(r, func(entryCount int64) {
bf = bloom.NewWithEstimates(uint(entryCount), 0.001)
}, func(key uint64, offset, size uint32) error {
if key > mm.MaximumFileKey {
mm.MaximumFileKey = key
}
binary.BigEndian.PutUint64(buf, key)
if size != TombstoneFileSize {
mm.FileByteCounter += uint64(size)
}
if !bf.Test(buf) {
mm.FileCounter++
bf.Add(buf)
} else {
// deleted file
mm.DeletionCounter++
if size != TombstoneFileSize {
// previously already deleted file
mm.DeletionByteCounter += uint64(size)
}
}
return nil
})
return
}
func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key uint64, offset, size uint32) error) error {
fi, err := r.Stat()
if err != nil {
return fmt.Errorf("file %s stat error: %v", r.Name(), err)
}
fileSize := fi.Size()
if fileSize%NeedleIndexSize != 0 {
return fmt.Errorf("unexpected file %s size: %d", r.Name(), fileSize)
}
initFn(fileSize / NeedleIndexSize)
bytes := make([]byte, NeedleIndexSize)
for readerOffset := fileSize - NeedleIndexSize; readerOffset >= 0; readerOffset -= NeedleIndexSize {
count, e := r.ReadAt(bytes, readerOffset)
glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e)
key, offset, size := idxFileEntry(bytes)
if e = fn(key, offset, size); e != nil {
return e
}
}
return nil
}

View file

@ -0,0 +1,29 @@
package storage
import (
"testing"
"io/ioutil"
"math/rand"
"github.com/chrislusf/seaweedfs/weed/glog"
)
func TestFastLoadingNeedleMapMetrics(t *testing.T) {
idxFile, _ := ioutil.TempFile("", "tmp.idx")
nm := NewBtreeNeedleMap(idxFile)
for i := 0; i < 10000; i++ {
nm.Put(uint64(i+1), uint32(0), uint32(1))
if rand.Float32() < 0.2 {
nm.Delete(uint64(rand.Int63n(int64(i))+1), uint32(0))
}
}
mm, _ := newNeedleMapMetricFromIndexFile(idxFile)
glog.V(0).Infof("FileCount expected %d actual %d", nm.FileCount(), mm.FileCount())
glog.V(0).Infof("DeletedSize expected %d actual %d", nm.DeletedSize(), mm.DeletedSize())
glog.V(0).Infof("ContentSize expected %d actual %d", nm.ContentSize(), mm.ContentSize())
glog.V(0).Infof("DeletedCount expected %d actual %d", nm.DeletedCount(), mm.DeletedCount())
glog.V(0).Infof("MaxFileKey expected %d actual %d", nm.MaxFileKey(), mm.MaxFileKey())
}