mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
faster loading boltdb or leveldb needle map metrics by bloomfilter
avoid btree
This commit is contained in:
parent
aba1fe01b3
commit
5bfb72d058
|
@ -84,46 +84,3 @@ func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) {
|
||||||
defer nm.indexFileAccessLock.Unlock()
|
defer nm.indexFileAccessLock.Unlock()
|
||||||
return ioutil.ReadFile(nm.indexFile.Name())
|
return ioutil.ReadFile(nm.indexFile.Name())
|
||||||
}
|
}
|
||||||
|
|
||||||
type mapMetric struct {
|
|
||||||
indexFile *os.File
|
|
||||||
|
|
||||||
DeletionCounter int `json:"DeletionCounter"`
|
|
||||||
FileCounter int `json:"FileCounter"`
|
|
||||||
DeletionByteCounter uint64 `json:"DeletionByteCounter"`
|
|
||||||
FileByteCounter uint64 `json:"FileByteCounter"`
|
|
||||||
MaximumFileKey uint64 `json:"MaxFileKey"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (mm *mapMetric) logDelete(deletedByteCount uint32) {
|
|
||||||
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
|
|
||||||
mm.DeletionCounter++
|
|
||||||
}
|
|
||||||
|
|
||||||
func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
|
|
||||||
if key > mm.MaximumFileKey {
|
|
||||||
mm.MaximumFileKey = key
|
|
||||||
}
|
|
||||||
mm.FileCounter++
|
|
||||||
mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
|
|
||||||
if oldSize > 0 {
|
|
||||||
mm.DeletionCounter++
|
|
||||||
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (mm mapMetric) ContentSize() uint64 {
|
|
||||||
return mm.FileByteCounter
|
|
||||||
}
|
|
||||||
func (mm mapMetric) DeletedSize() uint64 {
|
|
||||||
return mm.DeletionByteCounter
|
|
||||||
}
|
|
||||||
func (mm mapMetric) FileCount() int {
|
|
||||||
return mm.FileCounter
|
|
||||||
}
|
|
||||||
func (mm mapMetric) DeletedCount() int {
|
|
||||||
return mm.DeletionCounter
|
|
||||||
}
|
|
||||||
func (mm mapMetric) MaxFileKey() uint64 {
|
|
||||||
return mm.MaximumFileKey
|
|
||||||
}
|
|
||||||
|
|
|
@ -33,11 +33,11 @@ func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleM
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
glog.V(1).Infof("Loading %s...", indexFile.Name())
|
glog.V(1).Infof("Loading %s...", indexFile.Name())
|
||||||
nm, indexLoadError := LoadBtreeNeedleMap(indexFile)
|
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
|
||||||
if indexLoadError != nil {
|
if indexLoadError != nil {
|
||||||
return nil, indexLoadError
|
return nil, indexLoadError
|
||||||
}
|
}
|
||||||
m.mapMetric = nm.mapMetric
|
m.mapMetric = *mm
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,11 +31,11 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedl
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
glog.V(1).Infof("Loading %s...", indexFile.Name())
|
glog.V(1).Infof("Loading %s...", indexFile.Name())
|
||||||
nm, indexLoadError := LoadBtreeNeedleMap(indexFile)
|
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
|
||||||
if indexLoadError != nil {
|
if indexLoadError != nil {
|
||||||
return nil, indexLoadError
|
return nil, indexLoadError
|
||||||
}
|
}
|
||||||
m.mapMetric = nm.mapMetric
|
m.mapMetric = *mm
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
107
weed/storage/needle_map_metric.go
Normal file
107
weed/storage/needle_map_metric.go
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"github.com/willf/bloom"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
||||||
|
"encoding/binary"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mapMetric struct {
|
||||||
|
DeletionCounter int `json:"DeletionCounter"`
|
||||||
|
FileCounter int `json:"FileCounter"`
|
||||||
|
DeletionByteCounter uint64 `json:"DeletionByteCounter"`
|
||||||
|
FileByteCounter uint64 `json:"FileByteCounter"`
|
||||||
|
MaximumFileKey uint64 `json:"MaxFileKey"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mm *mapMetric) logDelete(deletedByteCount uint32) {
|
||||||
|
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
|
||||||
|
mm.DeletionCounter++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
|
||||||
|
if key > mm.MaximumFileKey {
|
||||||
|
mm.MaximumFileKey = key
|
||||||
|
}
|
||||||
|
mm.FileCounter++
|
||||||
|
mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
|
||||||
|
if oldSize > 0 {
|
||||||
|
mm.DeletionCounter++
|
||||||
|
mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mm mapMetric) ContentSize() uint64 {
|
||||||
|
return mm.FileByteCounter
|
||||||
|
}
|
||||||
|
func (mm mapMetric) DeletedSize() uint64 {
|
||||||
|
return mm.DeletionByteCounter
|
||||||
|
}
|
||||||
|
func (mm mapMetric) FileCount() int {
|
||||||
|
return mm.FileCounter
|
||||||
|
}
|
||||||
|
func (mm mapMetric) DeletedCount() int {
|
||||||
|
return mm.DeletionCounter
|
||||||
|
}
|
||||||
|
func (mm mapMetric) MaxFileKey() uint64 {
|
||||||
|
return mm.MaximumFileKey
|
||||||
|
}
|
||||||
|
|
||||||
|
func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) {
|
||||||
|
mm = &mapMetric{}
|
||||||
|
var bf *bloom.BloomFilter
|
||||||
|
buf := make([]byte, 8)
|
||||||
|
err = reverseWalkIndexFile(r, func(entryCount int64) {
|
||||||
|
bf = bloom.NewWithEstimates(uint(entryCount), 0.001)
|
||||||
|
}, func(key uint64, offset, size uint32) error {
|
||||||
|
|
||||||
|
if key > mm.MaximumFileKey {
|
||||||
|
mm.MaximumFileKey = key
|
||||||
|
}
|
||||||
|
|
||||||
|
binary.BigEndian.PutUint64(buf, key)
|
||||||
|
if size != TombstoneFileSize {
|
||||||
|
mm.FileByteCounter += uint64(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !bf.Test(buf) {
|
||||||
|
mm.FileCounter++
|
||||||
|
bf.Add(buf)
|
||||||
|
} else {
|
||||||
|
// deleted file
|
||||||
|
mm.DeletionCounter++
|
||||||
|
if size != TombstoneFileSize {
|
||||||
|
// previously already deleted file
|
||||||
|
mm.DeletionByteCounter += uint64(size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key uint64, offset, size uint32) error) error {
|
||||||
|
fi, err := r.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("file %s stat error: %v", r.Name(), err)
|
||||||
|
}
|
||||||
|
fileSize := fi.Size()
|
||||||
|
if fileSize%NeedleIndexSize != 0 {
|
||||||
|
return fmt.Errorf("unexpected file %s size: %d", r.Name(), fileSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
initFn(fileSize / NeedleIndexSize)
|
||||||
|
|
||||||
|
bytes := make([]byte, NeedleIndexSize)
|
||||||
|
for readerOffset := fileSize - NeedleIndexSize; readerOffset >= 0; readerOffset -= NeedleIndexSize {
|
||||||
|
count, e := r.ReadAt(bytes, readerOffset)
|
||||||
|
glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e)
|
||||||
|
key, offset, size := idxFileEntry(bytes)
|
||||||
|
if e = fn(key, offset, size); e != nil {
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
29
weed/storage/needle_map_metric_test.go
Normal file
29
weed/storage/needle_map_metric_test.go
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"io/ioutil"
|
||||||
|
"math/rand"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFastLoadingNeedleMapMetrics(t *testing.T) {
|
||||||
|
|
||||||
|
idxFile, _ := ioutil.TempFile("", "tmp.idx")
|
||||||
|
nm := NewBtreeNeedleMap(idxFile)
|
||||||
|
|
||||||
|
for i := 0; i < 10000; i++ {
|
||||||
|
nm.Put(uint64(i+1), uint32(0), uint32(1))
|
||||||
|
if rand.Float32() < 0.2 {
|
||||||
|
nm.Delete(uint64(rand.Int63n(int64(i))+1), uint32(0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mm, _ := newNeedleMapMetricFromIndexFile(idxFile)
|
||||||
|
|
||||||
|
glog.V(0).Infof("FileCount expected %d actual %d", nm.FileCount(), mm.FileCount())
|
||||||
|
glog.V(0).Infof("DeletedSize expected %d actual %d", nm.DeletedSize(), mm.DeletedSize())
|
||||||
|
glog.V(0).Infof("ContentSize expected %d actual %d", nm.ContentSize(), mm.ContentSize())
|
||||||
|
glog.V(0).Infof("DeletedCount expected %d actual %d", nm.DeletedCount(), mm.DeletedCount())
|
||||||
|
glog.V(0).Infof("MaxFileKey expected %d actual %d", nm.MaxFileKey(), mm.MaxFileKey())
|
||||||
|
}
|
Loading…
Reference in a new issue