caching visible intervals for read

speeds up 4x in single thread mode
speeds up 30% in 32 threads mode
This commit is contained in:
Chris Lu 2018-12-30 00:51:44 -08:00
parent 55f7d8f801
commit 0388d421d2
5 changed files with 54 additions and 37 deletions

View file

@ -33,7 +33,7 @@ func ETag(chunks []*filer_pb.FileChunk) (etag string) {
func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) { func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
visibles := nonOverlappingVisibleIntervals(chunks) visibles := NonOverlappingVisibleIntervals(chunks)
fileIds := make(map[string]bool) fileIds := make(map[string]bool)
for _, interval := range visibles { for _, interval := range visibles {
@ -75,7 +75,13 @@ type ChunkView struct {
func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) { func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
visibles := nonOverlappingVisibleIntervals(chunks) visibles := NonOverlappingVisibleIntervals(chunks)
return ViewFromVisibleIntervals(visibles, offset, size)
}
func ViewFromVisibleIntervals(visibles []*VisibleInterval, offset int64, size int) (views []*ChunkView) {
stop := offset + int64(size) stop := offset + int64(size)
@ -97,7 +103,7 @@ func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views
} }
func logPrintf(name string, visibles []*visibleInterval) { func logPrintf(name string, visibles []*VisibleInterval) {
/* /*
log.Printf("%s len %d", name, len(visibles)) log.Printf("%s len %d", name, len(visibles))
for _, v := range visibles { for _, v := range visibles {
@ -108,11 +114,11 @@ func logPrintf(name string, visibles []*visibleInterval) {
var bufPool = sync.Pool{ var bufPool = sync.Pool{
New: func() interface{} { New: func() interface{} {
return new(visibleInterval) return new(VisibleInterval)
}, },
} }
func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb.FileChunk) []*visibleInterval { func mergeIntoVisibles(visibles, newVisibles []*VisibleInterval, chunk *filer_pb.FileChunk) []*VisibleInterval {
newV := newVisibleInterval( newV := newVisibleInterval(
chunk.Offset, chunk.Offset,
@ -173,13 +179,13 @@ func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb
return newVisibles return newVisibles
} }
func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) { func NonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*VisibleInterval) {
sort.Slice(chunks, func(i, j int) bool { sort.Slice(chunks, func(i, j int) bool {
return chunks[i].Mtime < chunks[j].Mtime return chunks[i].Mtime < chunks[j].Mtime
}) })
var newVislbles []*visibleInterval var newVislbles []*VisibleInterval
for _, chunk := range chunks { for _, chunk := range chunks {
newVislbles = mergeIntoVisibles(visibles, newVislbles, chunk) newVislbles = mergeIntoVisibles(visibles, newVislbles, chunk)
t := visibles[:0] t := visibles[:0]
@ -196,7 +202,7 @@ func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*v
// find non-overlapping visible intervals // find non-overlapping visible intervals
// visible interval map to one file chunk // visible interval map to one file chunk
type visibleInterval struct { type VisibleInterval struct {
start int64 start int64
stop int64 stop int64
modifiedTime int64 modifiedTime int64
@ -204,8 +210,8 @@ type visibleInterval struct {
isFullChunk bool isFullChunk bool
} }
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool) *visibleInterval { func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool) *VisibleInterval {
return &visibleInterval{ return &VisibleInterval{
start: start, start: start,
stop: stop, stop: stop,
fileId: fileId, fileId: fileId,

View file

@ -63,7 +63,7 @@ func TestIntervalMerging(t *testing.T) {
testcases := []struct { testcases := []struct {
Chunks []*filer_pb.FileChunk Chunks []*filer_pb.FileChunk
Expected []*visibleInterval Expected []*VisibleInterval
}{ }{
// case 0: normal // case 0: normal
{ {
@ -72,7 +72,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 100, Size: 100, FileId: "asdf", Mtime: 134}, {Offset: 100, Size: 100, FileId: "asdf", Mtime: 134},
{Offset: 200, Size: 100, FileId: "fsad", Mtime: 353}, {Offset: 200, Size: 100, FileId: "fsad", Mtime: 353},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 100, fileId: "abc"}, {start: 0, stop: 100, fileId: "abc"},
{start: 100, stop: 200, fileId: "asdf"}, {start: 100, stop: 200, fileId: "asdf"},
{start: 200, stop: 300, fileId: "fsad"}, {start: 200, stop: 300, fileId: "fsad"},
@ -84,7 +84,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 200, fileId: "asdf"}, {start: 0, stop: 200, fileId: "asdf"},
}, },
}, },
@ -94,7 +94,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 50, FileId: "asdf", Mtime: 134}, {Offset: 0, Size: 50, FileId: "asdf", Mtime: 134},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 50, fileId: "asdf"}, {start: 0, stop: 50, fileId: "asdf"},
{start: 50, stop: 100, fileId: "abc"}, {start: 50, stop: 100, fileId: "abc"},
}, },
@ -106,7 +106,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134},
{Offset: 50, Size: 250, FileId: "xxxx", Mtime: 154}, {Offset: 50, Size: 250, FileId: "xxxx", Mtime: 154},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 50, fileId: "asdf"}, {start: 0, stop: 50, fileId: "asdf"},
{start: 50, stop: 300, fileId: "xxxx"}, {start: 50, stop: 300, fileId: "xxxx"},
}, },
@ -118,7 +118,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134},
{Offset: 250, Size: 250, FileId: "xxxx", Mtime: 154}, {Offset: 250, Size: 250, FileId: "xxxx", Mtime: 154},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 200, fileId: "asdf"}, {start: 0, stop: 200, fileId: "asdf"},
{start: 250, stop: 500, fileId: "xxxx"}, {start: 250, stop: 500, fileId: "xxxx"},
}, },
@ -131,7 +131,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 70, Size: 150, FileId: "abc", Mtime: 143}, {Offset: 70, Size: 150, FileId: "abc", Mtime: 143},
{Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134}, {Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 200, fileId: "asdf"}, {start: 0, stop: 200, fileId: "asdf"},
{start: 200, stop: 220, fileId: "abc"}, {start: 200, stop: 220, fileId: "abc"},
}, },
@ -143,7 +143,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 100, fileId: "abc"}, {start: 0, stop: 100, fileId: "abc"},
}, },
}, },
@ -157,7 +157,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 8388608, Size: 3145728, FileId: "5,02982f80de50", Mtime: 160}, {Offset: 8388608, Size: 3145728, FileId: "5,02982f80de50", Mtime: 160},
{Offset: 11534336, Size: 2842193, FileId: "7,0299ad723803", Mtime: 170}, {Offset: 11534336, Size: 2842193, FileId: "7,0299ad723803", Mtime: 170},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 2097152, fileId: "3,029565bf3092"}, {start: 0, stop: 2097152, fileId: "3,029565bf3092"},
{start: 2097152, stop: 5242880, fileId: "6,029632f47ae2"}, {start: 2097152, stop: 5242880, fileId: "6,029632f47ae2"},
{start: 5242880, stop: 8388608, fileId: "2,029734c5aa10"}, {start: 5242880, stop: 8388608, fileId: "2,029734c5aa10"},
@ -174,7 +174,7 @@ func TestIntervalMerging(t *testing.T) {
{Offset: 208896, Size: 339968 - 208896, FileId: "2,0b4031a72689", Mtime: 150}, {Offset: 208896, Size: 339968 - 208896, FileId: "2,0b4031a72689", Mtime: 150},
{Offset: 339968, Size: 471040 - 339968, FileId: "3,0b416a557362", Mtime: 160}, {Offset: 339968, Size: 471040 - 339968, FileId: "3,0b416a557362", Mtime: 160},
}, },
Expected: []*visibleInterval{ Expected: []*VisibleInterval{
{start: 0, stop: 77824, fileId: "4,0b3df938e301"}, {start: 0, stop: 77824, fileId: "4,0b3df938e301"},
{start: 77824, stop: 208896, fileId: "4,0b3f0c7202f0"}, {start: 77824, stop: 208896, fileId: "4,0b3f0c7202f0"},
{start: 208896, stop: 339968, fileId: "2,0b4031a72689"}, {start: 208896, stop: 339968, fileId: "2,0b4031a72689"},
@ -186,7 +186,7 @@ func TestIntervalMerging(t *testing.T) {
for i, testcase := range testcases { for i, testcase := range testcases {
log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i) log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i)
intervals := nonOverlappingVisibleIntervals(testcase.Chunks) intervals := NonOverlappingVisibleIntervals(testcase.Chunks)
for x, interval := range intervals { for x, interval := range intervals {
log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s", log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s",
i, x, interval.start, interval.stop, interval.fileId) i, x, interval.start, interval.stop, interval.fileId)

View file

@ -7,11 +7,11 @@ import (
"path/filepath" "path/filepath"
"time" "time"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
) )
type Dir struct { type Dir struct {
@ -101,10 +101,11 @@ func (dir *Dir) Attr(context context.Context, attr *fuse.Attr) error {
func (dir *Dir) newFile(name string, entry *filer_pb.Entry) *File { func (dir *Dir) newFile(name string, entry *filer_pb.Entry) *File {
return &File{ return &File{
Name: name, Name: name,
dir: dir, dir: dir,
wfs: dir.wfs, wfs: dir.wfs,
entry: entry, entry: entry,
entryViewCache: nil,
} }
} }

View file

@ -1,12 +1,12 @@
package filesys package filesys
import ( import (
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"context" "context"
"github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"os" "os"
"path/filepath" "path/filepath"
"time" "time"
@ -20,11 +20,12 @@ var _ = fs.NodeFsyncer(&File{})
var _ = fs.NodeSetattrer(&File{}) var _ = fs.NodeSetattrer(&File{})
type File struct { type File struct {
Name string Name string
dir *Dir dir *Dir
wfs *WFS wfs *WFS
entry *filer_pb.Entry entry *filer_pb.Entry
isOpen bool entryViewCache []*filer2.VisibleInterval
isOpen bool
} }
func (file *File) fullpath() string { func (file *File) fullpath() string {
@ -82,6 +83,7 @@ func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *f
if req.Size == 0 { if req.Size == 0 {
// fmt.Printf("truncate %v \n", fullPath) // fmt.Printf("truncate %v \n", fullPath)
file.entry.Chunks = nil file.entry.Chunks = nil
file.entryViewCache = nil
} }
file.entry.Attributes.FileSize = req.Size file.entry.Attributes.FileSize = req.Size
} }
@ -138,6 +140,7 @@ func (file *File) maybeLoadAttributes(ctx context.Context) error {
if item != nil && !item.Expired() { if item != nil && !item.Expired() {
entry := item.Value().(*filer_pb.Entry) entry := item.Value().(*filer_pb.Entry)
file.entry = entry file.entry = entry
file.entryViewCache = nil
// glog.V(1).Infof("file attr read cached %v attributes", file.Name) // glog.V(1).Infof("file attr read cached %v attributes", file.Name)
} else { } else {
err := file.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { err := file.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error {
@ -154,6 +157,7 @@ func (file *File) maybeLoadAttributes(ctx context.Context) error {
} }
file.entry = resp.Entry file.entry = resp.Entry
file.entryViewCache = nil
glog.V(3).Infof("file attr %v %+v: %d", file.fullpath(), file.entry.Attributes, filer2.TotalSize(file.entry.Chunks)) glog.V(3).Infof("file attr %v %+v: %d", file.fullpath(), file.entry.Attributes, filer2.TotalSize(file.entry.Chunks))

View file

@ -1,14 +1,14 @@
package filesys package filesys
import ( import (
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"context" "context"
"fmt" "fmt"
"github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"net/http" "net/http"
"strings" "strings"
"sync" "sync"
@ -58,7 +58,11 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus
buff := make([]byte, req.Size) buff := make([]byte, req.Size)
chunkViews := filer2.ViewFromChunks(fh.f.entry.Chunks, req.Offset, req.Size) if fh.f.entryViewCache == nil {
fh.f.entryViewCache = filer2.NonOverlappingVisibleIntervals(fh.f.entry.Chunks)
}
chunkViews := filer2.ViewFromVisibleIntervals(fh.f.entryViewCache, req.Offset, req.Size)
var vids []string var vids []string
for _, chunkView := range chunkViews { for _, chunkView := range chunkViews {
@ -154,6 +158,7 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
for _, chunk := range chunks { for _, chunk := range chunks {
fh.f.entry.Chunks = append(fh.f.entry.Chunks, chunk) fh.f.entry.Chunks = append(fh.f.entry.Chunks, chunk)
fh.f.entryViewCache = nil
glog.V(1).Infof("uploaded %s/%s to %s [%d,%d)", fh.f.dir.Path, fh.f.Name, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size)) glog.V(1).Infof("uploaded %s/%s to %s [%d,%d)", fh.f.dir.Path, fh.f.Name, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
fh.dirtyMetadata = true fh.dirtyMetadata = true
} }
@ -188,6 +193,7 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
} }
if chunk != nil { if chunk != nil {
fh.f.entry.Chunks = append(fh.f.entry.Chunks, chunk) fh.f.entry.Chunks = append(fh.f.entry.Chunks, chunk)
fh.f.entryViewCache = nil
} }
if !fh.dirtyMetadata { if !fh.dirtyMetadata {