mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
bptree does not work well for auto-increasing keys
This commit is contained in:
parent
51c8f2518f
commit
df1d6133a8
60
weed/util/bptree/README.md
Normal file
60
weed/util/bptree/README.md
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
This adapts one b+ tree implementation
|
||||||
|
https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree
|
||||||
|
to persist changes to on disk.
|
||||||
|
|
||||||
|
# When a node needs to persist itself?
|
||||||
|
|
||||||
|
* A node changed its key or value
|
||||||
|
* When an item is added.
|
||||||
|
* When an item is updated.
|
||||||
|
* When an item is deleted.
|
||||||
|
|
||||||
|
* When a node is split.
|
||||||
|
* 2 new nodes are created (they shoud persist themselves).
|
||||||
|
* Parent node need to point to the new nodes.
|
||||||
|
|
||||||
|
* When a node is merged.
|
||||||
|
* delete one node
|
||||||
|
* persist the merged node
|
||||||
|
|
||||||
|
|
||||||
|
In general, if one node is returned from a function, the node should have already been persisted.
|
||||||
|
The parent node may need to delete the old node.
|
||||||
|
|
||||||
|
BpTree
|
||||||
|
Add(key ItemKey, value ItemValue)
|
||||||
|
new_root = self.getRoot().put(key,value)
|
||||||
|
a, b, err := self.insert(key, value)
|
||||||
|
self.internal_insert(key, value)
|
||||||
|
self.internal_split(q.keys[0], q)
|
||||||
|
persist(a,b)
|
||||||
|
self.persist() // child add q node
|
||||||
|
self.maybePersist(child == p)
|
||||||
|
self.leaf_insert(key, value)
|
||||||
|
self.persist() // if dedup
|
||||||
|
self.leaf_split(key, value)
|
||||||
|
self.pure_leaf_split(key, value)
|
||||||
|
persist(a,b)
|
||||||
|
a.persist()
|
||||||
|
persist(a,b)
|
||||||
|
self.put_kv(key, value)
|
||||||
|
new_root.persist()
|
||||||
|
self.setRoot(new_root)
|
||||||
|
oldroot.destroy()
|
||||||
|
// maybe persist BpTree new root
|
||||||
|
|
||||||
|
Replace(key ItemKey, where WhereFunc, value ItemValue)
|
||||||
|
leaf.persist()
|
||||||
|
RemoveWhere(key ItemKey, where WhereFunc)
|
||||||
|
self.getRoot().remove(key, where)
|
||||||
|
self.internal_remove(key, nil, where)
|
||||||
|
child.leaf_remove(key, nil, where)
|
||||||
|
child.leaf_remove(key, sibling.keys[0], where)
|
||||||
|
l.destroy() // when the node is empty
|
||||||
|
a.maybePersist(hasChange)
|
||||||
|
self.destroy() // when no keys left
|
||||||
|
self.persist() // when some keys are left
|
||||||
|
self.leaf_remove(key, self.keys[len(self.keys)-1], where)
|
||||||
|
new_root.persist() // when new root is added
|
||||||
|
// maybe persist BpTree new root
|
||||||
|
|
|
@ -47,7 +47,9 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if new_root == nil {
|
if new_root == nil {
|
||||||
self.setRoot(NewLeaf(ns, true))
|
new_root = NewLeaf(ns, false)
|
||||||
|
err = new_root.persist()
|
||||||
|
self.setRoot(new_root)
|
||||||
} else {
|
} else {
|
||||||
self.setRoot(new_root)
|
self.setRoot(new_root)
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,9 +54,13 @@ func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err
|
||||||
for i, leaf, next := li(); next != nil; i, leaf, next = next() {
|
for i, leaf, next := li(); next != nil; i, leaf, next = next() {
|
||||||
if where(leaf.values[i]) {
|
if where(leaf.values[i]) {
|
||||||
leaf.values[i] = value
|
leaf.values[i] = value
|
||||||
|
if persistErr := leaf.persist(); persistErr != nil && err == nil {
|
||||||
|
err = persistErr
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self *BpTree) Find(key ItemKey) (kvi KVIterator) {
|
func (self *BpTree) Find(key ItemKey) (kvi KVIterator) {
|
||||||
|
@ -89,11 +93,13 @@ func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if new_root == nil {
|
if new_root == nil {
|
||||||
self.setRoot(NewLeaf(ns, false))
|
new_root = NewLeaf(ns, false)
|
||||||
|
err = new_root.persist()
|
||||||
|
self.setRoot(new_root)
|
||||||
} else {
|
} else {
|
||||||
self.setRoot(new_root)
|
self.setRoot(new_root)
|
||||||
}
|
}
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self *BpTree) Keys() (ki KIterator) {
|
func (self *BpTree) Keys() (ki KIterator) {
|
||||||
|
|
|
@ -2,15 +2,23 @@ package bptree
|
||||||
|
|
||||||
type ItemKey Hashable
|
type ItemKey Hashable
|
||||||
type ItemValue Equatable
|
type ItemValue Equatable
|
||||||
|
type PersistFunc func(node *BpNode) error
|
||||||
|
type DestroyFunc func(node *BpNode) error
|
||||||
|
|
||||||
|
var (
|
||||||
|
PersistFn PersistFunc
|
||||||
|
DestroyFn DestroyFunc
|
||||||
|
)
|
||||||
|
|
||||||
type BpNode struct {
|
type BpNode struct {
|
||||||
keys []ItemKey
|
keys []ItemKey
|
||||||
values []ItemValue
|
values []ItemValue
|
||||||
pointers []*BpNode
|
pointers []*BpNode
|
||||||
next *BpNode
|
next *BpNode
|
||||||
prev *BpNode
|
prev *BpNode
|
||||||
no_dup bool
|
no_dup bool
|
||||||
protoNode *ProtoNode
|
protoNodeId int64
|
||||||
|
protoNode *ProtoNode
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewInternal(size int) *BpNode {
|
func NewInternal(size int) *BpNode {
|
||||||
|
@ -18,8 +26,9 @@ func NewInternal(size int) *BpNode {
|
||||||
panic(NegativeSize())
|
panic(NegativeSize())
|
||||||
}
|
}
|
||||||
return &BpNode{
|
return &BpNode{
|
||||||
keys: make([]ItemKey, 0, size),
|
keys: make([]ItemKey, 0, size),
|
||||||
pointers: make([]*BpNode, 0, size),
|
pointers: make([]*BpNode, 0, size),
|
||||||
|
protoNodeId: GetProtoNodeId(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,9 +37,10 @@ func NewLeaf(size int, no_dup bool) *BpNode {
|
||||||
panic(NegativeSize())
|
panic(NegativeSize())
|
||||||
}
|
}
|
||||||
return &BpNode{
|
return &BpNode{
|
||||||
keys: make([]ItemKey, 0, size),
|
keys: make([]ItemKey, 0, size),
|
||||||
values: make([]ItemValue, 0, size),
|
values: make([]ItemValue, 0, size),
|
||||||
no_dup: no_dup,
|
no_dup: no_dup,
|
||||||
|
protoNodeId: GetProtoNodeId(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +201,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error)
|
||||||
root = NewInternal(self.NodeSize())
|
root = NewInternal(self.NodeSize())
|
||||||
root.put_kp(a.keys[0], a)
|
root.put_kp(a.keys[0], a)
|
||||||
root.put_kp(b.keys[0], b)
|
root.put_kp(b.keys[0], b)
|
||||||
return root, nil
|
return root, root.persist()
|
||||||
}
|
}
|
||||||
|
|
||||||
// right is only set on split
|
// right is only set on split
|
||||||
|
@ -237,10 +247,10 @@ func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode,
|
||||||
if err := self.put_kp(q.keys[0], q); err != nil {
|
if err := self.put_kp(q.keys[0], q); err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
return self, nil, nil
|
return self, nil, self.persist()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return self, nil, nil
|
return self, nil, self.maybePersist(child != p)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* On split
|
/* On split
|
||||||
|
@ -268,7 +278,7 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return a, b, nil
|
return a, b, persist(a, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if the leaf is full then it will defer to a leaf_split
|
/* if the leaf is full then it will defer to a leaf_split
|
||||||
|
@ -284,7 +294,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
|
||||||
i, has := self.find(key)
|
i, has := self.find(key)
|
||||||
if has {
|
if has {
|
||||||
self.values[i] = value
|
self.values[i] = value
|
||||||
return self, nil, nil
|
return self, nil, self.persist()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if self.Full() {
|
if self.Full() {
|
||||||
|
@ -293,7 +303,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
|
||||||
if err := self.put_kv(key, value); err != nil {
|
if err := self.put_kv(key, value); err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
return self, nil, nil
|
return self, nil, self.persist()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,7 +333,7 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return a, b, nil
|
return a, b, persist(a, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* a pure leaf split has two cases:
|
/* a pure leaf split has two cases:
|
||||||
|
@ -349,7 +359,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
insert_linked_list_node(a, b.getPrev(), b)
|
insert_linked_list_node(a, b.getPrev(), b)
|
||||||
return a, b, nil
|
return a, b, persist(a, b)
|
||||||
} else {
|
} else {
|
||||||
a = self
|
a = self
|
||||||
e := self.find_end_of_pure_run()
|
e := self.find_end_of_pure_run()
|
||||||
|
@ -357,7 +367,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
|
||||||
if err := e.put_kv(key, value); err != nil {
|
if err := e.put_kv(key, value); err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
return a, nil, nil
|
return a, nil, a.persist()
|
||||||
} else {
|
} else {
|
||||||
b = NewLeaf(self.NodeSize(), self.no_dup)
|
b = NewLeaf(self.NodeSize(), self.no_dup)
|
||||||
if err := b.put_kv(key, value); err != nil {
|
if err := b.put_kv(key, value); err != nil {
|
||||||
|
@ -367,7 +377,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
|
||||||
if e.keys[0].Equals(key) {
|
if e.keys[0].Equals(key) {
|
||||||
return a, nil, nil
|
return a, nil, nil
|
||||||
}
|
}
|
||||||
return a, b, nil
|
return a, b, persist(a, b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -484,6 +494,7 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun
|
||||||
sibling = sibling.left_most_leaf()
|
sibling = sibling.left_most_leaf()
|
||||||
}
|
}
|
||||||
child := self.pointers[i]
|
child := self.pointers[i]
|
||||||
|
oldChild := child
|
||||||
if child.Internal() {
|
if child.Internal() {
|
||||||
child, err = child.internal_remove(key, sibling, where)
|
child, err = child.internal_remove(key, sibling, where)
|
||||||
} else {
|
} else {
|
||||||
|
@ -508,9 +519,9 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun
|
||||||
self.pointers[i] = child
|
self.pointers[i] = child
|
||||||
}
|
}
|
||||||
if len(self.keys) == 0 {
|
if len(self.keys) == 0 {
|
||||||
return nil, nil
|
return nil, self.destroy()
|
||||||
}
|
}
|
||||||
return self, nil
|
return self, self.maybePersist(oldChild != child)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) {
|
func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) {
|
||||||
|
@ -518,8 +529,10 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode,
|
||||||
return nil, BpTreeError("Expected a leaf node")
|
return nil, BpTreeError("Expected a leaf node")
|
||||||
}
|
}
|
||||||
a = self
|
a = self
|
||||||
|
hasChange := false
|
||||||
for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() {
|
for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() {
|
||||||
if where(l.values[j]) {
|
if where(l.values[j]) {
|
||||||
|
hasChange = true
|
||||||
if err := l.remove_key_at(j); err != nil {
|
if err := l.remove_key_at(j); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -538,8 +551,14 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode,
|
||||||
} else {
|
} else {
|
||||||
a = nil
|
a = nil
|
||||||
}
|
}
|
||||||
|
if err := l.destroy(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if a != nil {
|
||||||
|
return a, a.maybePersist(hasChange)
|
||||||
|
}
|
||||||
return a, nil
|
return a, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
34
weed/util/bptree/bptree_store_test.go
Normal file
34
weed/util/bptree/bptree_store_test.go
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
package bptree
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAddRemove(t *testing.T) {
|
||||||
|
tree := NewBpTree(32)
|
||||||
|
PersistFn = func(node *BpNode) error {
|
||||||
|
println("saving", node.protoNodeId)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
DestroyFn = func(node *BpNode) error {
|
||||||
|
println("delete", node.protoNodeId)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for i:=0;i<1024;i++{
|
||||||
|
println("++++++++++", i)
|
||||||
|
tree.Add(String(fmt.Sprintf("%02d", i)), String(fmt.Sprintf("%02d", i)))
|
||||||
|
printTree(tree.root, "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func printTree(node *BpNode, prefix string) {
|
||||||
|
fmt.Printf("%sNode %d\n", prefix, node.protoNodeId)
|
||||||
|
prefix += " "
|
||||||
|
for i:=0;i<len(node.keys);i++{
|
||||||
|
fmt.Printf("%skey %s\n", prefix, node.keys[i])
|
||||||
|
if i < len(node.pointers) && node.pointers[i] != nil {
|
||||||
|
printTree(node.pointers[i], prefix+" ")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,13 @@
|
||||||
package bptree
|
package bptree
|
||||||
|
|
||||||
|
var (
|
||||||
|
protoNodeId = int64(0)
|
||||||
|
)
|
||||||
|
func GetProtoNodeId() int64 {
|
||||||
|
protoNodeId++
|
||||||
|
return protoNodeId
|
||||||
|
}
|
||||||
|
|
||||||
func (self *BpMap) getRoot() *BpNode {
|
func (self *BpMap) getRoot() *BpNode {
|
||||||
return self.root
|
return self.root
|
||||||
}
|
}
|
||||||
|
@ -26,3 +34,39 @@ func (self *BpNode) getPrev() *BpNode {
|
||||||
func (self *BpNode) setPrev(prev *BpNode) {
|
func (self *BpNode) setPrev(prev *BpNode) {
|
||||||
self.prev = prev
|
self.prev = prev
|
||||||
}
|
}
|
||||||
|
func (self *BpNode) getNode(x int)(*BpNode) {
|
||||||
|
return self.pointers[x]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self *BpNode) maybePersist(shouldPersist bool) error {
|
||||||
|
if !shouldPersist {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return self.persist()
|
||||||
|
}
|
||||||
|
func (self *BpNode) persist() error {
|
||||||
|
if PersistFn != nil {
|
||||||
|
return PersistFn(self)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
func (self *BpNode) destroy() error {
|
||||||
|
if DestroyFn != nil {
|
||||||
|
return DestroyFn(self)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func persist(a, b *BpNode) error {
|
||||||
|
if a != nil {
|
||||||
|
if err := a.persist(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if b != nil {
|
||||||
|
if err := b.persist(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
Loading…
Reference in a new issue