mount: better combines connected intervals to write to volume servers

This commit is contained in:
Chris Lu 2020-01-22 23:00:04 -08:00
parent 6a5c037099
commit c2e589f202
4 changed files with 265 additions and 119 deletions

View file

@ -4,8 +4,8 @@ import (
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"io"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
@ -15,28 +15,19 @@ import (
) )
type ContinuousDirtyPages struct { type ContinuousDirtyPages struct {
hasData bool intervals *ContinuousIntervals
Offset int64
Size int64
Data []byte
f *File f *File
lock sync.Mutex lock sync.Mutex
} }
func newDirtyPages(file *File) *ContinuousDirtyPages { func newDirtyPages(file *File) *ContinuousDirtyPages {
return &ContinuousDirtyPages{ return &ContinuousDirtyPages{
Data: nil, intervals: &ContinuousIntervals{},
f: file, f: file,
} }
} }
func (pages *ContinuousDirtyPages) releaseResource() { func (pages *ContinuousDirtyPages) releaseResource() {
if pages.Data != nil {
pages.f.wfs.bufPool.Put(pages.Data)
pages.Data = nil
atomic.AddInt32(&counter, -1)
glog.V(3).Infof("%s/%s releasing resource %d", pages.f.dir.Path, pages.f.Name, counter)
}
} }
var counter = int32(0) var counter = int32(0)
@ -46,61 +37,29 @@ func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, da
pages.lock.Lock() pages.lock.Lock()
defer pages.lock.Unlock() defer pages.lock.Unlock()
var chunk *filer_pb.FileChunk glog.V(3).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data)))
if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) { if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) {
// this is more than what buffer can hold. // this is more than what buffer can hold.
return pages.flushAndSave(ctx, offset, data) return pages.flushAndSave(ctx, offset, data)
} }
if pages.Data == nil { hasOverlap := pages.intervals.AddInterval(data, offset)
pages.Data = pages.f.wfs.bufPool.Get().([]byte) if hasOverlap {
atomic.AddInt32(&counter, 1) chunks, err = pages.saveExistingPagesToStorage(ctx)
glog.V(3).Infof("%s/%s acquire resource %d", pages.f.dir.Path, pages.f.Name, counter) pages.intervals.AddInterval(data, offset)
return
} }
if offset < pages.Offset || offset >= pages.Offset+int64(len(pages.Data)) || var chunk *filer_pb.FileChunk
pages.Offset+int64(len(pages.Data)) < offset+int64(len(data)) { var hasSavedData bool
// if the data is out of range,
// or buffer is full if adding new data,
// flush current buffer and add new data
glog.V(4).Infof("offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data)) if pages.intervals.TotalSize() > pages.f.wfs.option.ChunkSizeLimit {
chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage(ctx)
if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { if hasSavedData {
if chunk != nil {
glog.V(4).Infof("%s/%s add save [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size))
chunks = append(chunks, chunk) chunks = append(chunks, chunk)
} }
} else {
glog.V(0).Infof("%s/%s add save [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err)
return
} }
pages.Offset = offset
glog.V(4).Infof("copy data0: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
copy(pages.Data, data)
pages.Size = int64(len(data))
return
}
if offset != pages.Offset+pages.Size {
// when this happens, debug shows the data overlapping with existing data is empty
// the data is not just append
if offset == pages.Offset && int(pages.Size) < len(data) {
glog.V(4).Infof("copy data1: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
copy(pages.Data[pages.Size:], data[pages.Size:])
} else {
if pages.Size != 0 {
glog.V(1).Infof("%s/%s add page: pages [%d, %d) write [%d, %d)", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Offset+pages.Size, offset, offset+int64(len(data)))
}
return pages.flushAndSave(ctx, offset, data)
}
} else {
glog.V(4).Infof("copy data2: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
copy(pages.Data[offset-pages.Offset:], data)
}
pages.Size = max(pages.Size, offset+int64(len(data))-pages.Offset)
return return
} }
@ -108,22 +67,19 @@ func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, da
func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) {
var chunk *filer_pb.FileChunk var chunk *filer_pb.FileChunk
var newChunks []*filer_pb.FileChunk
// flush existing // flush existing
if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { if newChunks, err = pages.saveExistingPagesToStorage(ctx); err == nil {
if chunk != nil { if newChunks != nil {
glog.V(4).Infof("%s/%s flush existing [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId) chunks = append(chunks, newChunks...)
chunks = append(chunks, chunk)
} }
} else { } else {
glog.V(0).Infof("%s/%s failed to flush1 [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err)
return return
} }
pages.Size = 0
pages.Offset = 0
// flush the new page // flush the new page
if chunk, err = pages.saveToStorage(ctx, data, offset); err == nil { if chunk, err = pages.saveToStorage(ctx, bytes.NewReader(data), offset, int64(len(data))); err == nil {
if chunk != nil { if chunk != nil {
glog.V(4).Infof("%s/%s flush big request [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId) glog.V(4).Infof("%s/%s flush big request [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId)
chunks = append(chunks, chunk) chunks = append(chunks, chunk)
@ -136,37 +92,55 @@ func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int6
return return
} }
func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, err error) { func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunks []*filer_pb.FileChunk, err error) {
pages.lock.Lock() pages.lock.Lock()
defer pages.lock.Unlock() defer pages.lock.Unlock()
if pages.Size == 0 { return pages.saveExistingPagesToStorage(ctx)
return nil, nil
} }
if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (chunks []*filer_pb.FileChunk, err error) {
pages.Size = 0
pages.Offset = 0 var hasSavedData bool
if chunk != nil { var chunk *filer_pb.FileChunk
glog.V(4).Infof("%s/%s flush [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size))
for {
chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage(ctx)
if !hasSavedData {
return chunks, err
}
if err == nil {
chunks = append(chunks, chunk)
} else {
return
} }
} }
}
func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, hasSavedData bool, err error) {
maxList := pages.intervals.RemoveLargestIntervalLinkedList()
if maxList == nil {
return nil, false, nil
}
chunk, err = pages.saveToStorage(ctx, maxList.ToReader(), maxList.Offset(), maxList.Size())
if err == nil {
hasSavedData = true
glog.V(3).Infof("%s saveToStorage [%d,%d) %s", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), chunk.FileId)
} else {
glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), err)
return return
} }
func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (*filer_pb.FileChunk, error) { return
if pages.Size == 0 {
return nil, nil
} }
glog.V(0).Infof("%s/%s saveExistingPagesToStorage [%d,%d): Data len=%d", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Size, len(pages.Data)) func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, reader io.Reader, offset int64, size int64) (*filer_pb.FileChunk, error) {
return pages.saveToStorage(ctx, pages.Data[:pages.Size], pages.Offset)
}
func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte, offset int64) (*filer_pb.FileChunk, error) {
var fileId, host string var fileId, host string
var auth security.EncodedJwt var auth security.EncodedJwt
@ -195,8 +169,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte
} }
fileUrl := fmt.Sprintf("http://%s/%s", host, fileId) fileUrl := fmt.Sprintf("http://%s/%s", host, fileId)
bufReader := bytes.NewReader(buf) uploadResult, err := operation.Upload(fileUrl, pages.f.Name, reader, false, "", nil, auth)
uploadResult, err := operation.Upload(fileUrl, pages.f.Name, bufReader, false, "", nil, auth)
if err != nil { if err != nil {
glog.V(0).Infof("upload data %v to %s: %v", pages.f.Name, fileUrl, err) glog.V(0).Infof("upload data %v to %s: %v", pages.f.Name, fileUrl, err)
return nil, fmt.Errorf("upload data: %v", err) return nil, fmt.Errorf("upload data: %v", err)
@ -209,7 +182,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte
return &filer_pb.FileChunk{ return &filer_pb.FileChunk{
FileId: fileId, FileId: fileId,
Offset: offset, Offset: offset,
Size: uint64(len(buf)), Size: uint64(size),
Mtime: time.Now().UnixNano(), Mtime: time.Now().UnixNano(),
ETag: uploadResult.ETag, ETag: uploadResult.ETag,
}, nil }, nil
@ -229,23 +202,11 @@ func min(x, y int64) int64 {
return y return y
} }
func (pages *ContinuousDirtyPages) ReadDirtyData(ctx context.Context, data []byte, startOffset int64) (offset int64, size int, err error) { func (pages *ContinuousDirtyPages) ReadDirtyData(ctx context.Context, data []byte, startOffset int64) (offset int64, size int) {
bufSize := int64(len(data))
pages.lock.Lock() pages.lock.Lock()
defer pages.lock.Unlock() defer pages.lock.Unlock()
if startOffset+bufSize < pages.Offset { return pages.intervals.ReadData(data, startOffset)
return
}
if startOffset >= pages.Offset+pages.Size {
return
}
offset = max(pages.Offset, startOffset)
stopOffset := min(pages.Offset+pages.Size, startOffset+bufSize)
size = int(stopOffset - offset)
copy(data[offset-startOffset:], pages.Data[offset-pages.Offset:stopOffset-pages.Offset])
return
} }

View file

@ -0,0 +1,190 @@
package filesys
import (
"bytes"
"io"
"math"
"github.com/chrislusf/seaweedfs/weed/glog"
)
type IntervalNode struct {
Data []byte
Offset int64
Size int64
Next *IntervalNode
}
type IntervalLinkedList struct {
Head *IntervalNode
Tail *IntervalNode
}
type ContinuousIntervals struct {
lists []*IntervalLinkedList
}
func (list *IntervalLinkedList) Offset() int64 {
return list.Head.Offset
}
func (list *IntervalLinkedList) Size() int64 {
return list.Tail.Offset + list.Tail.Size - list.Head.Offset
}
func (list *IntervalLinkedList) addNodeToTail(node *IntervalNode) {
// glog.V(0).Infof("add to tail [%d,%d) + [%d,%d) => [%d,%d)", list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, node.Offset+node.Size, list.Head.Offset, node.Offset+node.Size)
list.Tail.Next = node
list.Tail = node
}
func (list *IntervalLinkedList) addNodeToHead(node *IntervalNode) {
// glog.V(0).Infof("add to head [%d,%d) + [%d,%d) => [%d,%d)", node.Offset, node.Offset+node.Size, list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, list.Tail.Offset+list.Tail.Size)
node.Next = list.Head
list.Head = node
}
func (list *IntervalLinkedList) ReadData(buf []byte, start, stop int64) {
t := list.Head
for {
nodeStart, nodeStop := max(start, t.Offset), min(stop, t.Offset+t.Size)
if nodeStart < nodeStop {
glog.V(0).Infof("copying start=%d stop=%d t=[%d,%d) t.data=%d => bufSize=%d nodeStart=%d, nodeStop=%d",
start, stop, t.Offset, t.Offset+t.Size, len(t.Data),
len(buf), nodeStart, nodeStop)
copy(buf[nodeStart-start:], t.Data[nodeStart-t.Offset:nodeStop-t.Offset])
}
if t.Next == nil {
break
}
t = t.Next
}
}
func (c *ContinuousIntervals) TotalSize() (total int64) {
for _, list := range c.lists {
total += list.Size()
}
return
}
func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap bool) {
interval := &IntervalNode{Data: data, Offset: offset, Size: int64(len(data))}
var prevList, nextList *IntervalLinkedList
for _, list := range c.lists {
if list.Head.Offset == interval.Offset+interval.Size {
nextList = list
break
}
}
for _, list := range c.lists {
if list.Head.Offset+list.Size() == offset {
list.addNodeToTail(interval)
prevList = list
break
}
if list.Head.Offset <= offset && offset < list.Head.Offset+list.Size() {
if list.Tail.Offset <= offset {
dataStartIndex := list.Tail.Offset + list.Tail.Size - offset
// glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):]))
interval.Data = interval.Data[dataStartIndex:]
interval.Size -= dataStartIndex
interval.Offset = offset + dataStartIndex
// glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
list.addNodeToTail(interval)
prevList = list
break
}
glog.V(4).Infof("overlapped! interval is [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
hasOverlap = true
return
}
}
if prevList != nil && nextList != nil {
// glog.V(4).Infof("connecting [%d,%d) + [%d,%d) => [%d,%d)", prevList.Head.Offset, prevList.Tail.Offset+prevList.Tail.Size, nextList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size, prevList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size)
prevList.Tail.Next = nextList.Head
prevList.Tail = nextList.Tail
c.removeList(nextList)
} else if nextList != nil {
// add to head was not done when checking
nextList.addNodeToHead(interval)
}
if prevList == nil && nextList == nil {
c.lists = append(c.lists, &IntervalLinkedList{
Head: interval,
Tail: interval,
})
}
return
}
func (c *ContinuousIntervals) RemoveLargestIntervalLinkedList() *IntervalLinkedList {
var maxSize int64
maxIndex := -1
for k, list := range c.lists {
if maxSize <= list.Size() {
maxSize = list.Size()
maxIndex = k
}
}
if maxSize <= 0 {
return nil
}
t := c.lists[maxIndex]
c.lists = append(c.lists[0:maxIndex], c.lists[maxIndex+1:]...)
return t
}
func (c *ContinuousIntervals) removeList(target *IntervalLinkedList) {
index := -1
for k, list := range c.lists {
if list.Offset() == target.Offset() {
index = k
}
}
if index < 0 {
return
}
c.lists = append(c.lists[0:index], c.lists[index+1:]...)
}
func (c *ContinuousIntervals) ReadData(data []byte, startOffset int64) (offset int64, size int) {
var minOffset int64 = math.MaxInt64
var maxStop int64
for _, list := range c.lists {
start := max(startOffset, list.Offset())
stop := min(startOffset+int64(len(data)), list.Offset()+list.Size())
if start <= stop {
list.ReadData(data[start-startOffset:], start, stop)
minOffset = min(minOffset, start)
maxStop = max(maxStop, stop)
}
}
if minOffset == math.MaxInt64 {
return 0, 0
}
offset = minOffset
size = int(maxStop - offset)
return
}
func (l *IntervalLinkedList) ToReader() io.Reader {
var readers []io.Reader
t := l.Head
readers = append(readers, bytes.NewReader(t.Data))
for t.Next != nil {
t = t.Next
readers = append(readers, bytes.NewReader(t.Data))
}
return io.MultiReader(readers...)
}

View file

@ -230,12 +230,6 @@ func (file *File) maybeLoadEntry(ctx context.Context) error {
return nil return nil
} }
func (file *File) addChunk(chunk *filer_pb.FileChunk) {
if chunk != nil {
file.addChunks([]*filer_pb.FileChunk{chunk})
}
}
func (file *File) addChunks(chunks []*filer_pb.FileChunk) { func (file *File) addChunks(chunks []*filer_pb.FileChunk) {
sort.Slice(chunks, func(i, j int) bool { sort.Slice(chunks, func(i, j int) bool {

View file

@ -55,8 +55,8 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus
totalRead, err := fh.readFromChunks(ctx, buff, req.Offset) totalRead, err := fh.readFromChunks(ctx, buff, req.Offset)
if err == nil { if err == nil {
dirtyOffset, dirtySize, dirtyReadErr := fh.readFromDirtyPages(ctx, buff, req.Offset) dirtyOffset, dirtySize := fh.readFromDirtyPages(ctx, buff, req.Offset)
if dirtyReadErr == nil && totalRead+req.Offset < dirtyOffset+int64(dirtySize) { if totalRead+req.Offset < dirtyOffset+int64(dirtySize) {
totalRead = dirtyOffset + int64(dirtySize) - req.Offset totalRead = dirtyOffset + int64(dirtySize) - req.Offset
} }
} }
@ -70,7 +70,7 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus
return err return err
} }
func (fh *FileHandle) readFromDirtyPages(ctx context.Context, buff []byte, startOffset int64) (offset int64, size int, err error) { func (fh *FileHandle) readFromDirtyPages(ctx context.Context, buff []byte, startOffset int64) (offset int64, size int) {
return fh.dirtyPages.ReadDirtyData(ctx, buff, startOffset) return fh.dirtyPages.ReadDirtyData(ctx, buff, startOffset)
} }
@ -102,8 +102,6 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
// write the request to volume servers // write the request to volume servers
glog.V(4).Infof("%+v/%v write fh %d: [%d,%d)", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)))
chunks, err := fh.dirtyPages.AddPage(ctx, req.Offset, req.Data) chunks, err := fh.dirtyPages.AddPage(ctx, req.Offset, req.Data)
if err != nil { if err != nil {
glog.Errorf("%+v/%v write fh %d: [%d,%d): %v", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)), err) glog.Errorf("%+v/%v write fh %d: [%d,%d): %v", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)), err)
@ -152,13 +150,16 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
// send the data to the OS // send the data to the OS
glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req) glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req)
chunk, err := fh.dirtyPages.FlushToStorage(ctx) chunks, err := fh.dirtyPages.FlushToStorage(ctx)
if err != nil { if err != nil {
glog.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err) glog.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err)
return fmt.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err) return fmt.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err)
} }
fh.f.addChunk(chunk) fh.f.addChunks(chunks)
if len(chunks) > 0 {
fh.dirtyMetadata = true
}
if !fh.dirtyMetadata { if !fh.dirtyMetadata {
return nil return nil