mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
generate ec01~ec14, generate ecx file with sorted needle values
This commit is contained in:
parent
12dc6608f0
commit
87f63b9c08
|
@ -1,9 +1,11 @@
|
||||||
package erasure_coding
|
package erasure_coding
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
||||||
"github.com/klauspost/reedsolomon"
|
"github.com/klauspost/reedsolomon"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -14,7 +16,46 @@ const (
|
||||||
ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
|
ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
|
||||||
)
|
)
|
||||||
|
|
||||||
func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte) error {
|
func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
|
||||||
|
|
||||||
|
bufferSize := int64(len(buffers[0]))
|
||||||
|
batchCount := blockSize/bufferSize
|
||||||
|
if blockSize%bufferSize!=0 {
|
||||||
|
glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
for b := int64(0); b < batchCount; b++ {
|
||||||
|
err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func openEcFiles(baseFileName string) (files []*os.File, err error){
|
||||||
|
for i := 0; i< DataShardsCount+ParityShardsCount; i++{
|
||||||
|
fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1)
|
||||||
|
f, err := os.OpenFile(fname, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return files, fmt.Errorf("failed to open file %s: %v", fname, err)
|
||||||
|
}
|
||||||
|
files = append(files, f)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func closeEcFiles(files []*os.File){
|
||||||
|
for _, f := range files{
|
||||||
|
if f != nil {
|
||||||
|
f.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
|
||||||
|
|
||||||
// read data into buffers
|
// read data into buffers
|
||||||
for i := 0; i < DataShardsCount; i++ {
|
for i := 0; i < DataShardsCount; i++ {
|
||||||
|
@ -36,5 +77,12 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
|
||||||
|
_, err := outputs[i].Write(buffers[i])
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
package erasure_coding
|
package erasure_coding
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/storage"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/storage/needle_map"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/storage/types"
|
||||||
"github.com/klauspost/reedsolomon"
|
"github.com/klauspost/reedsolomon"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -11,8 +15,9 @@ func TestEncodingDecoding(t *testing.T) {
|
||||||
largeBlockSize := int64(10000)
|
largeBlockSize := int64(10000)
|
||||||
smallBlockSize := int64(100)
|
smallBlockSize := int64(100)
|
||||||
bufferSize := 50
|
bufferSize := 50
|
||||||
|
baseFileName := "1"
|
||||||
|
|
||||||
file, err := os.OpenFile("1.dat", os.O_RDONLY, 0)
|
file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Logf("failed to open dat file: %v", err)
|
t.Logf("failed to open dat file: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -22,31 +27,95 @@ func TestEncodingDecoding(t *testing.T) {
|
||||||
t.Logf("failed to stat dat file: %v", err)
|
t.Logf("failed to stat dat file: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
remainingSize := fi.Size()
|
err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
|
||||||
var processedSize int64
|
|
||||||
|
|
||||||
enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Logf("failed to create encoder: %v", err)
|
t.Logf("failed to stat dat file: %v", err)
|
||||||
|
}
|
||||||
|
file.Close()
|
||||||
|
|
||||||
|
err = writeSortedEcxFiles(baseFileName)
|
||||||
|
if err != nil {
|
||||||
|
t.Logf("writeSortedEcxFiles: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
buffers := make([][]byte, DataShardsCount+ParityShardsCount)
|
err = validateFiles(baseFileName)
|
||||||
|
if err != nil {
|
||||||
for i, _ := range buffers {
|
t.Logf("writeSortedEcxFiles: %v", err)
|
||||||
buffers[i] = make([]byte, bufferSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
for remainingSize > largeBlockSize*DataShardsCount {
|
|
||||||
encodeData(file, enc, processedSize, largeBlockSize, buffers)
|
|
||||||
remainingSize -= largeBlockSize * DataShardsCount
|
|
||||||
processedSize += largeBlockSize * DataShardsCount
|
|
||||||
}
|
|
||||||
|
|
||||||
for remainingSize > 0 {
|
|
||||||
encodeData(file, enc, processedSize, smallBlockSize, buffers)
|
|
||||||
remainingSize -= smallBlockSize * DataShardsCount
|
|
||||||
processedSize += smallBlockSize * DataShardsCount
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
|
||||||
|
var processedSize int64
|
||||||
|
enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create encoder: %v", err)
|
||||||
|
}
|
||||||
|
buffers := make([][]byte, DataShardsCount+ParityShardsCount)
|
||||||
|
outputs, err := openEcFiles(baseFileName)
|
||||||
|
defer closeEcFiles(outputs)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
|
}
|
||||||
|
for i, _ := range buffers {
|
||||||
|
buffers[i] = make([]byte, bufferSize)
|
||||||
|
}
|
||||||
|
for remainingSize > largeBlockSize*DataShardsCount {
|
||||||
|
err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to encode large chunk data: %v", err)
|
||||||
|
}
|
||||||
|
remainingSize -= largeBlockSize * DataShardsCount
|
||||||
|
processedSize += largeBlockSize * DataShardsCount
|
||||||
|
}
|
||||||
|
for remainingSize > 0 {
|
||||||
|
encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to encode small chunk data: %v", err)
|
||||||
|
}
|
||||||
|
remainingSize -= smallBlockSize * DataShardsCount
|
||||||
|
processedSize += smallBlockSize * DataShardsCount
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSortedEcxFiles(baseFileName string) (e error) {
|
||||||
|
|
||||||
|
var indexFile *os.File
|
||||||
|
if indexFile, e = os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644); e != nil {
|
||||||
|
return fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, e)
|
||||||
|
}
|
||||||
|
|
||||||
|
cm := needle_map.NewCompactMap()
|
||||||
|
storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
|
||||||
|
if !offset.IsZero() && size != types.TombstoneFileSize {
|
||||||
|
cm.Set(key, offset, size)
|
||||||
|
} else {
|
||||||
|
cm.Delete(key)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
|
}
|
||||||
|
defer ecxFile.Close()
|
||||||
|
|
||||||
|
err = cm.AscendingVisit(func(value needle_map.NeedleValue) error {
|
||||||
|
bytes := value.ToBytes()
|
||||||
|
_, writeErr := ecxFile.Write(bytes)
|
||||||
|
return writeErr
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateFiles(baseFileName string) error {
|
||||||
|
return nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -62,10 +62,7 @@ func IdxFileEntry(bytes []byte) (key NeedleId, offset Offset, size uint32) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
func (nm *baseNeedleMapper) appendToIndexFile(key NeedleId, offset Offset, size uint32) error {
|
func (nm *baseNeedleMapper) appendToIndexFile(key NeedleId, offset Offset, size uint32) error {
|
||||||
bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize)
|
bytes := needle_map.ToBytes(key, offset, size)
|
||||||
NeedleIdToBytes(bytes[0:NeedleIdSize], key)
|
|
||||||
OffsetToBytes(bytes[NeedleIdSize:NeedleIdSize+OffsetSize], offset)
|
|
||||||
util.Uint32toBytes(bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], size)
|
|
||||||
|
|
||||||
nm.indexFileAccessLock.Lock()
|
nm.indexFileAccessLock.Lock()
|
||||||
defer nm.indexFileAccessLock.Unlock()
|
defer nm.indexFileAccessLock.Unlock()
|
||||||
|
@ -76,6 +73,7 @@ func (nm *baseNeedleMapper) appendToIndexFile(key NeedleId, offset Offset, size
|
||||||
_, err := nm.indexFile.Write(bytes)
|
_, err := nm.indexFile.Write(bytes)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) {
|
func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) {
|
||||||
nm.indexFileAccessLock.Lock()
|
nm.indexFileAccessLock.Lock()
|
||||||
defer nm.indexFileAccessLock.Unlock()
|
defer nm.indexFileAccessLock.Unlock()
|
||||||
|
|
|
@ -43,7 +43,7 @@ func (cm *BtreeMap) Get(key NeedleId) (*NeedleValue, bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visit visits all entries or stop if any error when visiting
|
// Visit visits all entries or stop if any error when visiting
|
||||||
func (cm *BtreeMap) Visit(visit func(NeedleValue) error) (ret error) {
|
func (cm *BtreeMap) AscendingVisit(visit func(NeedleValue) error) (ret error) {
|
||||||
cm.tree.Ascend(func(item btree.Item) bool {
|
cm.tree.Ascend(func(item btree.Item) bool {
|
||||||
needle := item.(NeedleValue)
|
needle := item.(NeedleValue)
|
||||||
ret = visit(needle)
|
ret = visit(needle)
|
||||||
|
|
|
@ -244,24 +244,37 @@ func (cm *CompactMap) binarySearchCompactSection(key NeedleId) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visit visits all entries or stop if any error when visiting
|
// Visit visits all entries or stop if any error when visiting
|
||||||
func (cm *CompactMap) Visit(visit func(NeedleValue) error) error {
|
func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error {
|
||||||
for _, cs := range cm.list {
|
for _, cs := range cm.list {
|
||||||
cs.RLock()
|
cs.RLock()
|
||||||
for i, v := range cs.overflow {
|
var i, j int
|
||||||
if err := visit(toNeedleValue(cs.overflowExtra[i], v, cs)); err != nil {
|
for i, j = 0, 0; i < len(cs.overflow) && j < len(cs.values) && j<cs.counter; {
|
||||||
|
if cs.overflow[i].Key < cs.values[j].Key {
|
||||||
|
if err := visit(toNeedleValue(cs.overflowExtra[i], cs.overflow[i], cs)); err != nil {
|
||||||
|
cs.RUnlock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}else if cs.overflow[i].Key == cs.values[j].Key {
|
||||||
|
j++
|
||||||
|
}else{
|
||||||
|
if err := visit(toNeedleValue(cs.valuesExtra[j], cs.values[j], cs)); err != nil {
|
||||||
|
cs.RUnlock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for ;i < len(cs.overflow);i++{
|
||||||
|
if err := visit(toNeedleValue(cs.overflowExtra[i], cs.overflow[i], cs)); err != nil {
|
||||||
cs.RUnlock()
|
cs.RUnlock()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for i, v := range cs.values {
|
for ; j < len(cs.values)&& j<cs.counter;j++{
|
||||||
if i >= cs.counter {
|
if err := visit(toNeedleValue(cs.valuesExtra[j], cs.values[j], cs)); err != nil {
|
||||||
break
|
cs.RUnlock()
|
||||||
}
|
return err
|
||||||
if _, _, found := cs.findOverflowEntry(v.Key); !found {
|
|
||||||
if err := visit(toNeedleValue(cs.valuesExtra[i], v, cs)); err != nil {
|
|
||||||
cs.RUnlock()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cs.RUnlock()
|
cs.RUnlock()
|
||||||
|
@ -279,10 +292,10 @@ func toNeedleValue(snve SectionalNeedleValueExtra, snv SectionalNeedleValue, cs
|
||||||
|
|
||||||
func (nv NeedleValue) toSectionalNeedleValue(cs *CompactSection) (SectionalNeedleValue, SectionalNeedleValueExtra) {
|
func (nv NeedleValue) toSectionalNeedleValue(cs *CompactSection) (SectionalNeedleValue, SectionalNeedleValueExtra) {
|
||||||
return SectionalNeedleValue{
|
return SectionalNeedleValue{
|
||||||
SectionalNeedleId(nv.Key - cs.start),
|
SectionalNeedleId(nv.Key - cs.start),
|
||||||
nv.Offset.OffsetLower,
|
nv.Offset.OffsetLower,
|
||||||
nv.Size,
|
nv.Size,
|
||||||
}, SectionalNeedleValueExtra{
|
}, SectionalNeedleValueExtra{
|
||||||
nv.Offset.OffsetHigher,
|
nv.Offset.OffsetHigher,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ func TestOverflow2(t *testing.T) {
|
||||||
m.Set(NeedleId(150158), ToOffset(8), 3000073)
|
m.Set(NeedleId(150158), ToOffset(8), 3000073)
|
||||||
m.Set(NeedleId(150162), ToOffset(8), 3000073)
|
m.Set(NeedleId(150162), ToOffset(8), 3000073)
|
||||||
|
|
||||||
m.Visit(func(value NeedleValue) error {
|
m.AscendingVisit(func(value NeedleValue) error {
|
||||||
println("needle key:", value.Key)
|
println("needle key:", value.Key)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
|
@ -2,6 +2,7 @@ package needle_map
|
||||||
|
|
||||||
import (
|
import (
|
||||||
. "github.com/chrislusf/seaweedfs/weed/storage/types"
|
. "github.com/chrislusf/seaweedfs/weed/storage/types"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/util"
|
||||||
"github.com/google/btree"
|
"github.com/google/btree"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -15,3 +16,15 @@ func (this NeedleValue) Less(than btree.Item) bool {
|
||||||
that := than.(NeedleValue)
|
that := than.(NeedleValue)
|
||||||
return this.Key < that.Key
|
return this.Key < that.Key
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nv NeedleValue) ToBytes() []byte {
|
||||||
|
return ToBytes(nv.Key, nv.Offset, nv.Size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToBytes(key NeedleId, offset Offset, size uint32) []byte {
|
||||||
|
bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize)
|
||||||
|
NeedleIdToBytes(bytes[0:NeedleIdSize], key)
|
||||||
|
OffsetToBytes(bytes[NeedleIdSize:NeedleIdSize+OffsetSize], offset)
|
||||||
|
util.Uint32toBytes(bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], size)
|
||||||
|
return bytes
|
||||||
|
}
|
||||||
|
|
|
@ -8,5 +8,5 @@ type NeedleValueMap interface {
|
||||||
Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32)
|
Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32)
|
||||||
Delete(key NeedleId) uint32
|
Delete(key NeedleId) uint32
|
||||||
Get(key NeedleId) (*NeedleValue, bool)
|
Get(key NeedleId) (*NeedleValue, bool)
|
||||||
Visit(visit func(NeedleValue) error) error
|
AscendingVisit(visit func(NeedleValue) error) error
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue