mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
optimization: improve random range query for large files
This commit is contained in:
parent
b938df97a2
commit
7ab389e7ec
|
@ -39,9 +39,14 @@ func SeparateManifestChunks(chunks []*filer_pb.FileChunk) (manifestChunks, nonMa
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (dataChunks, manifestChunks []*filer_pb.FileChunk, manifestResolveErr error) {
|
func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset, stopOffset int64) (dataChunks, manifestChunks []*filer_pb.FileChunk, manifestResolveErr error) {
|
||||||
// TODO maybe parallel this
|
// TODO maybe parallel this
|
||||||
for _, chunk := range chunks {
|
for _, chunk := range chunks {
|
||||||
|
|
||||||
|
if max(chunk.Offset, startOffset) >= min(chunk.Offset+int64(chunk.Size), stopOffset) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if !chunk.IsChunkManifest {
|
if !chunk.IsChunkManifest {
|
||||||
dataChunks = append(dataChunks, chunk)
|
dataChunks = append(dataChunks, chunk)
|
||||||
continue
|
continue
|
||||||
|
@ -54,7 +59,7 @@ func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chun
|
||||||
|
|
||||||
manifestChunks = append(manifestChunks, chunk)
|
manifestChunks = append(manifestChunks, chunk)
|
||||||
// recursive
|
// recursive
|
||||||
dchunks, mchunks, subErr := ResolveChunkManifest(lookupFileIdFn, resolvedChunks)
|
dchunks, mchunks, subErr := ResolveChunkManifest(lookupFileIdFn, resolvedChunks, startOffset, stopOffset)
|
||||||
if subErr != nil {
|
if subErr != nil {
|
||||||
return chunks, nil, subErr
|
return chunks, nil, subErr
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,7 @@ func ETagChunks(chunks []*filer_pb.FileChunk) (etag string) {
|
||||||
|
|
||||||
func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
|
func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
|
||||||
|
|
||||||
visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks)
|
visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, 0, math.MaxInt64)
|
||||||
|
|
||||||
fileIds := make(map[string]bool)
|
fileIds := make(map[string]bool)
|
||||||
for _, interval := range visibles {
|
for _, interval := range visibles {
|
||||||
|
@ -72,11 +72,11 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks
|
||||||
|
|
||||||
func MinusChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk, err error) {
|
func MinusChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk, err error) {
|
||||||
|
|
||||||
aData, aMeta, aErr := ResolveChunkManifest(lookupFileIdFn, as)
|
aData, aMeta, aErr := ResolveChunkManifest(lookupFileIdFn, as, 0, math.MaxInt64)
|
||||||
if aErr != nil {
|
if aErr != nil {
|
||||||
return nil, aErr
|
return nil, aErr
|
||||||
}
|
}
|
||||||
bData, bMeta, bErr := ResolveChunkManifest(lookupFileIdFn, bs)
|
bData, bMeta, bErr := ResolveChunkManifest(lookupFileIdFn, bs, 0, math.MaxInt64)
|
||||||
if bErr != nil {
|
if bErr != nil {
|
||||||
return nil, bErr
|
return nil, bErr
|
||||||
}
|
}
|
||||||
|
@ -117,7 +117,7 @@ func (cv *ChunkView) IsFullChunk() bool {
|
||||||
|
|
||||||
func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) {
|
func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) {
|
||||||
|
|
||||||
visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks)
|
visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, offset, offset+size)
|
||||||
|
|
||||||
return ViewFromVisibleIntervals(visibles, offset, size)
|
return ViewFromVisibleIntervals(visibles, offset, size)
|
||||||
|
|
||||||
|
@ -221,9 +221,9 @@ func MergeIntoVisibles(visibles []VisibleInterval, chunk *filer_pb.FileChunk) (n
|
||||||
|
|
||||||
// NonOverlappingVisibleIntervals translates the file chunk into VisibleInterval in memory
|
// NonOverlappingVisibleIntervals translates the file chunk into VisibleInterval in memory
|
||||||
// If the file chunk content is a chunk manifest
|
// If the file chunk content is a chunk manifest
|
||||||
func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (visibles []VisibleInterval, err error) {
|
func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles []VisibleInterval, err error) {
|
||||||
|
|
||||||
chunks, _, err = ResolveChunkManifest(lookupFileIdFn, chunks)
|
chunks, _, err = ResolveChunkManifest(lookupFileIdFn, chunks, startOffset, stopOffset)
|
||||||
|
|
||||||
sort.Slice(chunks, func(i, j int) bool {
|
sort.Slice(chunks, func(i, j int) bool {
|
||||||
if chunks[i].Mtime == chunks[j].Mtime {
|
if chunks[i].Mtime == chunks[j].Mtime {
|
||||||
|
|
|
@ -90,7 +90,7 @@ func TestRandomFileChunksCompact(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
visibles, _ := NonOverlappingVisibleIntervals(nil, chunks)
|
visibles, _ := NonOverlappingVisibleIntervals(nil, chunks, 0, math.MaxInt64)
|
||||||
|
|
||||||
for _, v := range visibles {
|
for _, v := range visibles {
|
||||||
for x := v.start; x < v.stop; x++ {
|
for x := v.start; x < v.stop; x++ {
|
||||||
|
@ -227,7 +227,7 @@ func TestIntervalMerging(t *testing.T) {
|
||||||
|
|
||||||
for i, testcase := range testcases {
|
for i, testcase := range testcases {
|
||||||
log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i)
|
log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i)
|
||||||
intervals, _ := NonOverlappingVisibleIntervals(nil, testcase.Chunks)
|
intervals, _ := NonOverlappingVisibleIntervals(nil, testcase.Chunks, 0, math.MaxInt64)
|
||||||
for x, interval := range intervals {
|
for x, interval := range intervals {
|
||||||
log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s",
|
log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s",
|
||||||
i, x, interval.start, interval.stop, interval.fileId)
|
i, x, interval.start, interval.stop, interval.fileId)
|
||||||
|
|
|
@ -130,7 +130,7 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
|
||||||
|
|
||||||
var chunkResolveErr error
|
var chunkResolveErr error
|
||||||
if fh.entryViewCache == nil {
|
if fh.entryViewCache == nil {
|
||||||
fh.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.f.wfs.LookupFn(), entry.Chunks)
|
fh.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.f.wfs.LookupFn(), entry.Chunks, 0, math.MaxInt64)
|
||||||
if chunkResolveErr != nil {
|
if chunkResolveErr != nil {
|
||||||
return 0, fmt.Errorf("fail to resolve chunk manifest: %v", chunkResolveErr)
|
return 0, fmt.Errorf("fail to resolve chunk manifest: %v", chunkResolveErr)
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/chrislusf/seaweedfs/weed/pb"
|
"github.com/chrislusf/seaweedfs/weed/pb"
|
||||||
"github.com/chrislusf/seaweedfs/weed/wdclient"
|
"github.com/chrislusf/seaweedfs/weed/wdclient"
|
||||||
|
"math"
|
||||||
|
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
|
|
||||||
|
@ -228,11 +229,11 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParent
|
||||||
|
|
||||||
}
|
}
|
||||||
func compareChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, oldEntry, newEntry *filer_pb.Entry) (deletedChunks, newChunks []*filer_pb.FileChunk, err error) {
|
func compareChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, oldEntry, newEntry *filer_pb.Entry) (deletedChunks, newChunks []*filer_pb.FileChunk, err error) {
|
||||||
aData, aMeta, aErr := filer.ResolveChunkManifest(lookupFileIdFn, oldEntry.Chunks)
|
aData, aMeta, aErr := filer.ResolveChunkManifest(lookupFileIdFn, oldEntry.Chunks, 0, math.MaxInt64)
|
||||||
if aErr != nil {
|
if aErr != nil {
|
||||||
return nil, nil, aErr
|
return nil, nil, aErr
|
||||||
}
|
}
|
||||||
bData, bMeta, bErr := filer.ResolveChunkManifest(lookupFileIdFn, newEntry.Chunks)
|
bData, bMeta, bErr := filer.ResolveChunkManifest(lookupFileIdFn, newEntry.Chunks, 0, math.MaxInt64)
|
||||||
if bErr != nil {
|
if bErr != nil {
|
||||||
return nil, nil, bErr
|
return nil, nil, bErr
|
||||||
}
|
}
|
||||||
|
|
|
@ -532,7 +532,7 @@ func (f *WebDavFile) Read(p []byte) (readSize int, err error) {
|
||||||
return 0, io.EOF
|
return 0, io.EOF
|
||||||
}
|
}
|
||||||
if f.entryViewCache == nil {
|
if f.entryViewCache == nil {
|
||||||
f.entryViewCache, _ = filer.NonOverlappingVisibleIntervals(filer.LookupFn(f.fs), f.entry.Chunks)
|
f.entryViewCache, _ = filer.NonOverlappingVisibleIntervals(filer.LookupFn(f.fs), f.entry.Chunks, 0, math.MaxInt64)
|
||||||
f.reader = nil
|
f.reader = nil
|
||||||
}
|
}
|
||||||
if f.reader == nil {
|
if f.reader == nil {
|
||||||
|
|
|
@ -164,7 +164,7 @@ func (c *commandVolumeFsck) collectFilerFileIdAndPaths(volumeIdToServer map[uint
|
||||||
if verbose && entry.Entry.IsDirectory {
|
if verbose && entry.Entry.IsDirectory {
|
||||||
fmt.Fprintf(writer, "checking directory %s\n", util.NewFullPath(entry.Dir, entry.Entry.Name))
|
fmt.Fprintf(writer, "checking directory %s\n", util.NewFullPath(entry.Dir, entry.Entry.Name))
|
||||||
}
|
}
|
||||||
dChunks, mChunks, resolveErr := filer.ResolveChunkManifest(filer.LookupFn(c.env), entry.Entry.Chunks)
|
dChunks, mChunks, resolveErr := filer.ResolveChunkManifest(filer.LookupFn(c.env), entry.Entry.Chunks, 0, math.MaxInt64)
|
||||||
if resolveErr != nil {
|
if resolveErr != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -311,7 +311,7 @@ func (c *commandVolumeFsck) collectFilerFileIds(tempFolder string, volumeIdToSer
|
||||||
files[i.vid].Write(buffer)
|
files[i.vid].Write(buffer)
|
||||||
}
|
}
|
||||||
}, func(entry *filer_pb.FullEntry, outputChan chan interface{}) (err error) {
|
}, func(entry *filer_pb.FullEntry, outputChan chan interface{}) (err error) {
|
||||||
dChunks, mChunks, resolveErr := filer.ResolveChunkManifest(filer.LookupFn(c.env), entry.Entry.Chunks)
|
dChunks, mChunks, resolveErr := filer.ResolveChunkManifest(filer.LookupFn(c.env), entry.Entry.Chunks, 0, math.MaxInt64)
|
||||||
if resolveErr != nil {
|
if resolveErr != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue