mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
bptree does not work well for auto-increasing keys
This commit is contained in:
parent
51c8f2518f
commit
df1d6133a8
60
weed/util/bptree/README.md
Normal file
60
weed/util/bptree/README.md
Normal file
|
@ -0,0 +1,60 @@
|
|||
This adapts one b+ tree implementation
|
||||
https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree
|
||||
to persist changes to on disk.
|
||||
|
||||
# When a node needs to persist itself?
|
||||
|
||||
* A node changed its key or value
|
||||
* When an item is added.
|
||||
* When an item is updated.
|
||||
* When an item is deleted.
|
||||
|
||||
* When a node is split.
|
||||
* 2 new nodes are created (they shoud persist themselves).
|
||||
* Parent node need to point to the new nodes.
|
||||
|
||||
* When a node is merged.
|
||||
* delete one node
|
||||
* persist the merged node
|
||||
|
||||
|
||||
In general, if one node is returned from a function, the node should have already been persisted.
|
||||
The parent node may need to delete the old node.
|
||||
|
||||
BpTree
|
||||
Add(key ItemKey, value ItemValue)
|
||||
new_root = self.getRoot().put(key,value)
|
||||
a, b, err := self.insert(key, value)
|
||||
self.internal_insert(key, value)
|
||||
self.internal_split(q.keys[0], q)
|
||||
persist(a,b)
|
||||
self.persist() // child add q node
|
||||
self.maybePersist(child == p)
|
||||
self.leaf_insert(key, value)
|
||||
self.persist() // if dedup
|
||||
self.leaf_split(key, value)
|
||||
self.pure_leaf_split(key, value)
|
||||
persist(a,b)
|
||||
a.persist()
|
||||
persist(a,b)
|
||||
self.put_kv(key, value)
|
||||
new_root.persist()
|
||||
self.setRoot(new_root)
|
||||
oldroot.destroy()
|
||||
// maybe persist BpTree new root
|
||||
|
||||
Replace(key ItemKey, where WhereFunc, value ItemValue)
|
||||
leaf.persist()
|
||||
RemoveWhere(key ItemKey, where WhereFunc)
|
||||
self.getRoot().remove(key, where)
|
||||
self.internal_remove(key, nil, where)
|
||||
child.leaf_remove(key, nil, where)
|
||||
child.leaf_remove(key, sibling.keys[0], where)
|
||||
l.destroy() // when the node is empty
|
||||
a.maybePersist(hasChange)
|
||||
self.destroy() // when no keys left
|
||||
self.persist() // when some keys are left
|
||||
self.leaf_remove(key, self.keys[len(self.keys)-1], where)
|
||||
new_root.persist() // when new root is added
|
||||
// maybe persist BpTree new root
|
||||
|
|
@ -47,7 +47,9 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) {
|
|||
return nil, err
|
||||
}
|
||||
if new_root == nil {
|
||||
self.setRoot(NewLeaf(ns, true))
|
||||
new_root = NewLeaf(ns, false)
|
||||
err = new_root.persist()
|
||||
self.setRoot(new_root)
|
||||
} else {
|
||||
self.setRoot(new_root)
|
||||
}
|
||||
|
|
|
@ -54,9 +54,13 @@ func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err
|
|||
for i, leaf, next := li(); next != nil; i, leaf, next = next() {
|
||||
if where(leaf.values[i]) {
|
||||
leaf.values[i] = value
|
||||
if persistErr := leaf.persist(); persistErr != nil && err == nil {
|
||||
err = persistErr
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (self *BpTree) Find(key ItemKey) (kvi KVIterator) {
|
||||
|
@ -89,11 +93,13 @@ func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) {
|
|||
return err
|
||||
}
|
||||
if new_root == nil {
|
||||
self.setRoot(NewLeaf(ns, false))
|
||||
new_root = NewLeaf(ns, false)
|
||||
err = new_root.persist()
|
||||
self.setRoot(new_root)
|
||||
} else {
|
||||
self.setRoot(new_root)
|
||||
}
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (self *BpTree) Keys() (ki KIterator) {
|
||||
|
|
|
@ -2,15 +2,23 @@ package bptree
|
|||
|
||||
type ItemKey Hashable
|
||||
type ItemValue Equatable
|
||||
type PersistFunc func(node *BpNode) error
|
||||
type DestroyFunc func(node *BpNode) error
|
||||
|
||||
var (
|
||||
PersistFn PersistFunc
|
||||
DestroyFn DestroyFunc
|
||||
)
|
||||
|
||||
type BpNode struct {
|
||||
keys []ItemKey
|
||||
values []ItemValue
|
||||
pointers []*BpNode
|
||||
next *BpNode
|
||||
prev *BpNode
|
||||
no_dup bool
|
||||
protoNode *ProtoNode
|
||||
keys []ItemKey
|
||||
values []ItemValue
|
||||
pointers []*BpNode
|
||||
next *BpNode
|
||||
prev *BpNode
|
||||
no_dup bool
|
||||
protoNodeId int64
|
||||
protoNode *ProtoNode
|
||||
}
|
||||
|
||||
func NewInternal(size int) *BpNode {
|
||||
|
@ -18,8 +26,9 @@ func NewInternal(size int) *BpNode {
|
|||
panic(NegativeSize())
|
||||
}
|
||||
return &BpNode{
|
||||
keys: make([]ItemKey, 0, size),
|
||||
pointers: make([]*BpNode, 0, size),
|
||||
keys: make([]ItemKey, 0, size),
|
||||
pointers: make([]*BpNode, 0, size),
|
||||
protoNodeId: GetProtoNodeId(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -28,9 +37,10 @@ func NewLeaf(size int, no_dup bool) *BpNode {
|
|||
panic(NegativeSize())
|
||||
}
|
||||
return &BpNode{
|
||||
keys: make([]ItemKey, 0, size),
|
||||
values: make([]ItemValue, 0, size),
|
||||
no_dup: no_dup,
|
||||
keys: make([]ItemKey, 0, size),
|
||||
values: make([]ItemValue, 0, size),
|
||||
no_dup: no_dup,
|
||||
protoNodeId: GetProtoNodeId(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -191,7 +201,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error)
|
|||
root = NewInternal(self.NodeSize())
|
||||
root.put_kp(a.keys[0], a)
|
||||
root.put_kp(b.keys[0], b)
|
||||
return root, nil
|
||||
return root, root.persist()
|
||||
}
|
||||
|
||||
// right is only set on split
|
||||
|
@ -237,10 +247,10 @@ func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode,
|
|||
if err := self.put_kp(q.keys[0], q); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return self, nil, nil
|
||||
return self, nil, self.persist()
|
||||
}
|
||||
}
|
||||
return self, nil, nil
|
||||
return self, nil, self.maybePersist(child != p)
|
||||
}
|
||||
|
||||
/* On split
|
||||
|
@ -268,7 +278,7 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err
|
|||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
return a, b, nil
|
||||
return a, b, persist(a, b)
|
||||
}
|
||||
|
||||
/* if the leaf is full then it will defer to a leaf_split
|
||||
|
@ -284,7 +294,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
|
|||
i, has := self.find(key)
|
||||
if has {
|
||||
self.values[i] = value
|
||||
return self, nil, nil
|
||||
return self, nil, self.persist()
|
||||
}
|
||||
}
|
||||
if self.Full() {
|
||||
|
@ -293,7 +303,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
|
|||
if err := self.put_kv(key, value); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return self, nil, nil
|
||||
return self, nil, self.persist()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -323,7 +333,7 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err
|
|||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
return a, b, nil
|
||||
return a, b, persist(a, b)
|
||||
}
|
||||
|
||||
/* a pure leaf split has two cases:
|
||||
|
@ -349,7 +359,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
|
|||
return nil, nil, err
|
||||
}
|
||||
insert_linked_list_node(a, b.getPrev(), b)
|
||||
return a, b, nil
|
||||
return a, b, persist(a, b)
|
||||
} else {
|
||||
a = self
|
||||
e := self.find_end_of_pure_run()
|
||||
|
@ -357,7 +367,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
|
|||
if err := e.put_kv(key, value); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return a, nil, nil
|
||||
return a, nil, a.persist()
|
||||
} else {
|
||||
b = NewLeaf(self.NodeSize(), self.no_dup)
|
||||
if err := b.put_kv(key, value); err != nil {
|
||||
|
@ -367,7 +377,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
|
|||
if e.keys[0].Equals(key) {
|
||||
return a, nil, nil
|
||||
}
|
||||
return a, b, nil
|
||||
return a, b, persist(a, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -484,6 +494,7 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun
|
|||
sibling = sibling.left_most_leaf()
|
||||
}
|
||||
child := self.pointers[i]
|
||||
oldChild := child
|
||||
if child.Internal() {
|
||||
child, err = child.internal_remove(key, sibling, where)
|
||||
} else {
|
||||
|
@ -508,9 +519,9 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun
|
|||
self.pointers[i] = child
|
||||
}
|
||||
if len(self.keys) == 0 {
|
||||
return nil, nil
|
||||
return nil, self.destroy()
|
||||
}
|
||||
return self, nil
|
||||
return self, self.maybePersist(oldChild != child)
|
||||
}
|
||||
|
||||
func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) {
|
||||
|
@ -518,8 +529,10 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode,
|
|||
return nil, BpTreeError("Expected a leaf node")
|
||||
}
|
||||
a = self
|
||||
hasChange := false
|
||||
for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() {
|
||||
if where(l.values[j]) {
|
||||
hasChange = true
|
||||
if err := l.remove_key_at(j); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -538,8 +551,14 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode,
|
|||
} else {
|
||||
a = nil
|
||||
}
|
||||
if err := l.destroy(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
if a != nil {
|
||||
return a, a.maybePersist(hasChange)
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
|
||||
|
|
34
weed/util/bptree/bptree_store_test.go
Normal file
34
weed/util/bptree/bptree_store_test.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
package bptree
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAddRemove(t *testing.T) {
|
||||
tree := NewBpTree(32)
|
||||
PersistFn = func(node *BpNode) error {
|
||||
println("saving", node.protoNodeId)
|
||||
return nil
|
||||
}
|
||||
DestroyFn = func(node *BpNode) error {
|
||||
println("delete", node.protoNodeId)
|
||||
return nil
|
||||
}
|
||||
for i:=0;i<1024;i++{
|
||||
println("++++++++++", i)
|
||||
tree.Add(String(fmt.Sprintf("%02d", i)), String(fmt.Sprintf("%02d", i)))
|
||||
printTree(tree.root, "")
|
||||
}
|
||||
}
|
||||
|
||||
func printTree(node *BpNode, prefix string) {
|
||||
fmt.Printf("%sNode %d\n", prefix, node.protoNodeId)
|
||||
prefix += " "
|
||||
for i:=0;i<len(node.keys);i++{
|
||||
fmt.Printf("%skey %s\n", prefix, node.keys[i])
|
||||
if i < len(node.pointers) && node.pointers[i] != nil {
|
||||
printTree(node.pointers[i], prefix+" ")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +1,13 @@
|
|||
package bptree
|
||||
|
||||
var (
|
||||
protoNodeId = int64(0)
|
||||
)
|
||||
func GetProtoNodeId() int64 {
|
||||
protoNodeId++
|
||||
return protoNodeId
|
||||
}
|
||||
|
||||
func (self *BpMap) getRoot() *BpNode {
|
||||
return self.root
|
||||
}
|
||||
|
@ -26,3 +34,39 @@ func (self *BpNode) getPrev() *BpNode {
|
|||
func (self *BpNode) setPrev(prev *BpNode) {
|
||||
self.prev = prev
|
||||
}
|
||||
func (self *BpNode) getNode(x int)(*BpNode) {
|
||||
return self.pointers[x]
|
||||
}
|
||||
|
||||
func (self *BpNode) maybePersist(shouldPersist bool) error {
|
||||
if !shouldPersist {
|
||||
return nil
|
||||
}
|
||||
return self.persist()
|
||||
}
|
||||
func (self *BpNode) persist() error {
|
||||
if PersistFn != nil {
|
||||
return PersistFn(self)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (self *BpNode) destroy() error {
|
||||
if DestroyFn != nil {
|
||||
return DestroyFn(self)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func persist(a, b *BpNode) error {
|
||||
if a != nil {
|
||||
if err := a.persist(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if b != nil {
|
||||
if err := b.persist(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in a new issue