2015-03-27 23:34:58 +00:00
package storage
import (
"fmt"
"os"
"path/filepath"
2022-07-19 01:20:45 +00:00
"strings"
2015-03-27 23:34:58 +00:00
2020-05-22 17:54:42 +00:00
"github.com/syndtr/goleveldb/leveldb/errors"
2019-05-22 05:41:20 +00:00
"github.com/syndtr/goleveldb/leveldb/opt"
2020-05-22 17:54:42 +00:00
"github.com/chrislusf/seaweedfs/weed/storage/idx"
2022-07-19 01:20:45 +00:00
"github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
2020-05-22 17:54:42 +00:00
"github.com/syndtr/goleveldb/leveldb"
2016-06-03 01:09:14 +00:00
"github.com/chrislusf/seaweedfs/weed/glog"
2019-04-19 04:43:36 +00:00
"github.com/chrislusf/seaweedfs/weed/storage/needle_map"
2018-07-08 09:28:04 +00:00
. "github.com/chrislusf/seaweedfs/weed/storage/types"
2015-03-27 23:34:58 +00:00
)
2022-07-19 01:20:45 +00:00
//use "2 >> 16" to reduce cpu cost
const milestoneCnt = 40
const milestoneKey = 0xffffffffffffffff - 1
2015-03-27 23:34:58 +00:00
type LevelDbNeedleMap struct {
2019-05-05 04:33:05 +00:00
baseNeedleMapper
2015-03-27 23:34:58 +00:00
dbFileName string
db * leveldb . DB
2022-07-19 01:20:45 +00:00
recordNum uint64
2015-03-27 23:34:58 +00:00
}
2019-04-09 16:42:06 +00:00
func NewLevelDbNeedleMap ( dbFileName string , indexFile * os . File , opts * opt . Options ) ( m * LevelDbNeedleMap , err error ) {
2022-07-19 01:20:45 +00:00
glog . V ( 0 ) . Infof ( "NewLevelDbNeedleMap pocessing %s..." , indexFile . Name ( ) )
db , errd := leveldb . OpenFile ( dbFileName , opts )
glog . V ( 0 ) . Infof ( "begain %v %s %d" , errd , dbFileName , getMileStone ( db ) )
db . Close ( )
2015-05-26 07:58:41 +00:00
m = & LevelDbNeedleMap { dbFileName : dbFileName }
m . indexFile = indexFile
2015-03-27 23:34:58 +00:00
if ! isLevelDbFresh ( dbFileName , indexFile ) {
2022-07-19 01:20:45 +00:00
glog . V ( 0 ) . Infof ( "Start to Generate %s from %s" , dbFileName , indexFile . Name ( ) )
Add boltdb for volume needle map
boltdb is fairly slow to write, about 6 minutes for recreating index
for 1553934 files. Boltdb loads 1,553,934 x 16 = 24,862,944bytes from
disk, and generate the boltdb as large as 134,217,728 bytes in 6
minutes.
To compare, for leveldb, it recreates index in leveldb as large as
27,188,148 bytes in 8 seconds.
For in memory version, it loads the index in
To test the memory consumption, the leveldb or boltdb index are
created. And the server is restarted. Using the benchmark tool to read
lots of files. There are 7 volumes in benchmark collection, each with
about 1553K files.
For leveldb, the memory starts at 142,884KB, and stays at 179,340KB.
For boltdb, the memory starts at 73,756KB, and stays at 144,564KB.
For in-memory, the memory starts at 368,152KB, and stays at 448,032KB.
2015-03-29 18:04:32 +00:00
generateLevelDbFile ( dbFileName , indexFile )
2022-07-19 01:20:45 +00:00
glog . V ( 0 ) . Infof ( "Finished Generating %s from %s" , dbFileName , indexFile . Name ( ) )
2015-03-27 23:34:58 +00:00
}
2021-02-20 20:39:25 +00:00
if stat , err := indexFile . Stat ( ) ; err != nil {
glog . Fatalf ( "stat file %s: %v" , indexFile . Name ( ) , err )
} else {
m . indexFileOffset = stat . Size ( )
}
2022-07-19 01:20:45 +00:00
glog . V ( 0 ) . Infof ( "Opening %s..." , dbFileName )
2019-04-09 16:42:06 +00:00
if m . db , err = leveldb . OpenFile ( dbFileName , opts ) ; err != nil {
2020-05-22 17:54:42 +00:00
if errors . IsCorrupted ( err ) {
m . db , err = leveldb . RecoverFile ( dbFileName , opts )
}
2020-05-26 07:03:44 +00:00
if err != nil {
return
}
2015-03-27 23:34:58 +00:00
}
2022-07-19 01:20:45 +00:00
glog . V ( 0 ) . Infof ( "getMileStone %s : %d" , dbFileName , getMileStone ( m . db ) )
m . recordNum = uint64 ( m . indexFileOffset / types . NeedleMapEntrySize )
milestone := ( m . recordNum / milestoneCnt ) * milestoneCnt
err = setMileStone ( m . db , milestone )
if err != nil {
return
}
glog . V ( 0 ) . Infof ( "Loading %s... %d %d" , indexFile . Name ( ) , milestone , getMileStone ( m . db ) )
2018-07-07 07:51:17 +00:00
mm , indexLoadError := newNeedleMapMetricFromIndexFile ( indexFile )
2015-03-27 23:34:58 +00:00
if indexLoadError != nil {
return nil , indexLoadError
}
2022-07-19 01:20:45 +00:00
glog . V ( 0 ) . Infof ( "finish Loading %s..." , indexFile . Name ( ) )
2018-07-07 07:51:17 +00:00
m . mapMetric = * mm
2015-03-27 23:34:58 +00:00
return
}
func isLevelDbFresh ( dbFileName string , indexFile * os . File ) bool {
// normally we always write to index file first
dbLogFile , err := os . Open ( filepath . Join ( dbFileName , "LOG" ) )
if err != nil {
return false
}
defer dbLogFile . Close ( )
dbStat , dbStatErr := dbLogFile . Stat ( )
indexStat , indexStatErr := indexFile . Stat ( )
if dbStatErr != nil || indexStatErr != nil {
glog . V ( 0 ) . Infof ( "Can not stat file: %v and %v" , dbStatErr , indexStatErr )
return false
}
return dbStat . ModTime ( ) . After ( indexStat . ModTime ( ) )
}
Add boltdb for volume needle map
boltdb is fairly slow to write, about 6 minutes for recreating index
for 1553934 files. Boltdb loads 1,553,934 x 16 = 24,862,944bytes from
disk, and generate the boltdb as large as 134,217,728 bytes in 6
minutes.
To compare, for leveldb, it recreates index in leveldb as large as
27,188,148 bytes in 8 seconds.
For in memory version, it loads the index in
To test the memory consumption, the leveldb or boltdb index are
created. And the server is restarted. Using the benchmark tool to read
lots of files. There are 7 volumes in benchmark collection, each with
about 1553K files.
For leveldb, the memory starts at 142,884KB, and stays at 179,340KB.
For boltdb, the memory starts at 73,756KB, and stays at 144,564KB.
For in-memory, the memory starts at 368,152KB, and stays at 448,032KB.
2015-03-29 18:04:32 +00:00
func generateLevelDbFile ( dbFileName string , indexFile * os . File ) error {
2015-03-27 23:34:58 +00:00
db , err := leveldb . OpenFile ( dbFileName , nil )
if err != nil {
return err
}
defer db . Close ( )
2022-07-19 01:20:45 +00:00
milestone := getMileStone ( db )
if stat , err := indexFile . Stat ( ) ; err != nil {
glog . Fatalf ( "stat file %s: %v" , indexFile . Name ( ) , err )
return err
} else {
if milestone * types . NeedleMapEntrySize > uint64 ( stat . Size ( ) ) {
glog . Warningf ( "wrong milestone %d for filesize %d, set milestone to 0" , milestone , stat . Size ( ) )
milestone = 0
}
glog . V ( 0 ) . Infof ( "generateLevelDbFile %s, milestone %d, num of entries:%d" , dbFileName , milestone , ( uint64 ( stat . Size ( ) ) - milestone * types . NeedleMapEntrySize ) / types . NeedleMapEntrySize )
}
return idx . WalkIndexFileIncrent ( indexFile , milestone , func ( key NeedleId , offset Offset , size Size ) error {
2020-09-12 19:42:36 +00:00
if ! offset . IsZero ( ) && size . IsValid ( ) {
2022-07-19 01:20:45 +00:00
levelDbWrite ( db , key , offset , size , 0 )
2015-03-27 23:34:58 +00:00
} else {
levelDbDelete ( db , key )
}
return nil
} )
}
2019-04-19 04:43:36 +00:00
func ( m * LevelDbNeedleMap ) Get ( key NeedleId ) ( element * needle_map . NeedleValue , ok bool ) {
2018-07-08 09:28:04 +00:00
bytes := make ( [ ] byte , NeedleIdSize )
NeedleIdToBytes ( bytes [ 0 : NeedleIdSize ] , key )
2015-03-27 23:34:58 +00:00
data , err := m . db . Get ( bytes , nil )
2018-07-08 09:28:04 +00:00
if err != nil || len ( data ) != OffsetSize + SizeSize {
2015-03-27 23:34:58 +00:00
return nil , false
}
2018-07-08 09:28:04 +00:00
offset := BytesToOffset ( data [ 0 : OffsetSize ] )
2020-08-19 00:04:28 +00:00
size := BytesToSize ( data [ OffsetSize : OffsetSize + SizeSize ] )
2019-06-21 08:14:10 +00:00
return & needle_map . NeedleValue { Key : key , Offset : offset , Size : size } , true
2015-03-27 23:34:58 +00:00
}
2020-08-19 00:04:28 +00:00
func ( m * LevelDbNeedleMap ) Put ( key NeedleId , offset Offset , size Size ) error {
var oldSize Size
2022-07-19 01:20:45 +00:00
var milestone uint64
2015-03-27 23:34:58 +00:00
if oldNeedle , ok := m . Get ( key ) ; ok {
oldSize = oldNeedle . Size
}
m . logPut ( key , oldSize , size )
// write to index file first
2015-05-26 07:58:41 +00:00
if err := m . appendToIndexFile ( key , offset , size ) ; err != nil {
2015-03-27 23:34:58 +00:00
return fmt . Errorf ( "cannot write to indexfile %s: %v" , m . indexFile . Name ( ) , err )
}
2022-07-19 01:20:45 +00:00
//atomic.AddUint64(&m.recordNum, 1)
//milestone = atomic.LoadUint64(&m.recordNum)
m . recordNum ++
if m . recordNum % milestoneCnt != 0 {
milestone = 0
} else {
milestone = ( m . recordNum / milestoneCnt ) * milestoneCnt
glog . V ( 0 ) . Infof ( "put cnt:%d milestone:%s %d" , m . recordNum , m . dbFileName , milestone )
}
return levelDbWrite ( m . db , key , offset , size , milestone )
2015-03-27 23:34:58 +00:00
}
2022-07-19 01:20:45 +00:00
func getMileStone ( db * leveldb . DB ) uint64 {
var mskBytes = make ( [ ] byte , 8 )
util . Uint64toBytes ( mskBytes , milestoneKey )
data , err := db . Get ( mskBytes , nil )
if err != nil || len ( data ) != 8 {
glog . Warningf ( "get milestone from db error: %v, %d" , err , len ( data ) )
if ! strings . Contains ( strings . ToLower ( err . Error ( ) ) , "not found" ) {
err = setMileStone ( db , 0 )
if err != nil {
glog . Errorf ( "failed to set milestone: %v" , err )
}
}
return 0
}
return util . BytesToUint64 ( data )
}
func setMileStone ( db * leveldb . DB , milestone uint64 ) error {
glog . V ( 0 ) . Infof ( "set milestone %d" , milestone )
var mskBytes = make ( [ ] byte , 8 )
util . Uint64toBytes ( mskBytes , milestoneKey )
var msBytes = make ( [ ] byte , 8 )
util . Uint64toBytes ( msBytes , milestone )
if err := db . Put ( mskBytes , msBytes , nil ) ; err != nil {
return fmt . Errorf ( "failed to setMileStone: %v" , err )
}
glog . V ( 0 ) . Infof ( "ssset milestone %d, %d" , milestone , getMileStone ( db ) )
return nil
}
func levelDbWrite ( db * leveldb . DB , key NeedleId , offset Offset , size Size , milestone uint64 ) error {
2018-07-08 09:28:04 +00:00
2019-07-21 20:50:24 +00:00
bytes := needle_map . ToBytes ( key , offset , size )
2018-07-08 09:28:04 +00:00
if err := db . Put ( bytes [ 0 : NeedleIdSize ] , bytes [ NeedleIdSize : NeedleIdSize + OffsetSize + SizeSize ] , nil ) ; err != nil {
2015-03-27 23:34:58 +00:00
return fmt . Errorf ( "failed to write leveldb: %v" , err )
}
2022-07-19 01:20:45 +00:00
// set milestone
if milestone != 0 {
glog . V ( 0 ) . Infof ( "actually set milestone %d" , milestone )
return setMileStone ( db , milestone )
}
2015-03-27 23:34:58 +00:00
return nil
}
2018-07-08 09:28:04 +00:00
func levelDbDelete ( db * leveldb . DB , key NeedleId ) error {
bytes := make ( [ ] byte , NeedleIdSize )
NeedleIdToBytes ( bytes , key )
2015-03-27 23:34:58 +00:00
return db . Delete ( bytes , nil )
}
2020-09-12 19:42:36 +00:00
func ( m * LevelDbNeedleMap ) Delete ( key NeedleId , offset Offset ) error {
2022-07-19 01:20:45 +00:00
var milestone uint64
2020-08-19 02:22:16 +00:00
oldNeedle , found := m . Get ( key )
if ! found || oldNeedle . Size . IsDeleted ( ) {
return nil
2015-03-27 23:34:58 +00:00
}
2020-08-19 02:22:16 +00:00
m . logDelete ( oldNeedle . Size )
2015-03-27 23:34:58 +00:00
// write to index file first
2020-09-12 19:42:36 +00:00
if err := m . appendToIndexFile ( key , offset , TombstoneFileSize ) ; err != nil {
2015-03-27 23:34:58 +00:00
return err
}
2022-07-19 01:20:45 +00:00
//atomic.AddUint64(&m.recordNum, 1)
//milestone = atomic.LoadUint64(&m.recordNum)
m . recordNum ++
if m . recordNum % milestoneCnt != 0 {
milestone = 0
} else {
milestone = ( m . recordNum / milestoneCnt ) * milestoneCnt
}
glog . V ( 0 ) . Infof ( "delete cnt:%d milestone:%s %d" , m . recordNum , m . dbFileName , milestone )
return levelDbWrite ( m . db , key , oldNeedle . Offset , - oldNeedle . Size , milestone )
2015-03-27 23:34:58 +00:00
}
func ( m * LevelDbNeedleMap ) Close ( ) {
2020-02-04 18:37:14 +00:00
indexFileName := m . indexFile . Name ( )
if err := m . indexFile . Sync ( ) ; err != nil {
glog . Warningf ( "sync file %s failed: %v" , indexFileName , err )
}
if err := m . indexFile . Close ( ) ; err != nil {
glog . Warningf ( "close index file %s failed: %v" , indexFileName , err )
}
2021-03-07 09:49:06 +00:00
if m . db != nil {
if err := m . db . Close ( ) ; err != nil {
glog . Warningf ( "close levelDB failed: %v" , err )
}
2020-02-04 18:37:14 +00:00
}
2015-03-27 23:34:58 +00:00
}
func ( m * LevelDbNeedleMap ) Destroy ( ) error {
m . Close ( )
os . Remove ( m . indexFile . Name ( ) )
2017-08-30 14:55:03 +00:00
return os . RemoveAll ( m . dbFileName )
2015-03-27 23:34:58 +00:00
}