diff --git a/weed/filer2/filechunks.go b/weed/filer2/filechunks.go index b248a8c59..7fd4af5df 100644 --- a/weed/filer2/filechunks.go +++ b/weed/filer2/filechunks.go @@ -3,7 +3,6 @@ package filer2 import ( "fmt" "hash/fnv" - "math" "sort" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" @@ -103,137 +102,54 @@ func logPrintf(name string, visibles []*visibleInterval) { */ } -func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) { - - sort.Slice(chunks, func(i, j int) bool { - if chunks[i].Offset < chunks[j].Offset { - return true - } - if chunks[i].Offset == chunks[j].Offset { - return chunks[i].Mtime < chunks[j].Mtime - } - return false - }) - - if len(chunks) == 0 { - return - } - - var parallelIntervals, intervals []*visibleInterval - var minStopInterval, upToDateInterval *visibleInterval - watermarkStart := chunks[0].Offset - for _, chunk := range chunks { - // log.Printf("checking chunk: [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size)) - logPrintf("parallelIntervals", parallelIntervals) - for len(parallelIntervals) > 0 && watermarkStart < chunk.Offset { - logPrintf("parallelIntervals loop 1", parallelIntervals) - logPrintf("parallelIntervals loop 1 intervals", intervals) - minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals) - nextStop := min(minStopInterval.stop, chunk.Offset) - intervals = append(intervals, newVisibleInterval( - max(watermarkStart, minStopInterval.start), - nextStop, - upToDateInterval.fileId, - upToDateInterval.modifiedTime, +func mergeIntoVisibles(visibles []*visibleInterval, chunk *filer_pb.FileChunk) (newVisibles []*visibleInterval) { + for _, v := range visibles { + if v.start < chunk.Offset && chunk.Offset < v.stop { + newVisibles = append(newVisibles, newVisibleInterval( + v.start, + chunk.Offset, + v.fileId, + v.modifiedTime, )) - watermarkStart = nextStop - logPrintf("parallelIntervals loop intervals =>", intervals) - - // remove processed intervals, possibly multiple - var remaining []*visibleInterval - for _, interval := range parallelIntervals { - if interval.stop != watermarkStart { - remaining = append(remaining, interval) - } - } - parallelIntervals = remaining - logPrintf("parallelIntervals loop 2", parallelIntervals) - logPrintf("parallelIntervals loop 2 intervals", intervals) } - parallelIntervals = append(parallelIntervals, newVisibleInterval( - chunk.Offset, - chunk.Offset+int64(chunk.Size), - chunk.FileId, - chunk.Mtime, - )) - } - - logPrintf("parallelIntervals loop 3", parallelIntervals) - logPrintf("parallelIntervals loop 3 intervals", intervals) - for len(parallelIntervals) > 0 { - minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals) - intervals = append(intervals, newVisibleInterval( - max(watermarkStart, minStopInterval.start), - minStopInterval.stop, - upToDateInterval.fileId, - upToDateInterval.modifiedTime, - )) - watermarkStart = minStopInterval.stop - - // remove processed intervals, possibly multiple - var remaining []*visibleInterval - for _, interval := range parallelIntervals { - if interval.stop != watermarkStart { - remaining = append(remaining, interval) - } - } - parallelIntervals = remaining - } - logPrintf("parallelIntervals loop 4", parallelIntervals) - logPrintf("intervals", intervals) - - // merge connected intervals, now the intervals are non-intersecting - var lastIntervalIndex int - var prevIntervalIndex int - for i, interval := range intervals { - if i == 0 { - prevIntervalIndex = i - lastIntervalIndex = i - continue - } - if intervals[i-1].fileId != interval.fileId || - intervals[i-1].stop < intervals[i].start { - visibles = append(visibles, newVisibleInterval( - intervals[prevIntervalIndex].start, - intervals[i-1].stop, - intervals[prevIntervalIndex].fileId, - intervals[prevIntervalIndex].modifiedTime, + chunkStop := chunk.Offset + int64(chunk.Size) + if v.start < chunkStop && chunkStop < v.stop { + newVisibles = append(newVisibles, newVisibleInterval( + chunkStop, + v.stop, + v.fileId, + v.modifiedTime, )) - prevIntervalIndex = i } - lastIntervalIndex = i - logPrintf("intervals loop 1 visibles", visibles) + if chunkStop < v.start || v.stop <= chunk.Offset { + newVisibles = append(newVisibles, v) + } } - - visibles = append(visibles, newVisibleInterval( - intervals[prevIntervalIndex].start, - intervals[lastIntervalIndex].stop, - intervals[prevIntervalIndex].fileId, - intervals[prevIntervalIndex].modifiedTime, + newVisibles = append(newVisibles, newVisibleInterval( + chunk.Offset, + chunk.Offset+int64(chunk.Size), + chunk.FileId, + chunk.Mtime, )) - - logPrintf("visibles", visibles) - return } -func findMinStopInterval(intervals []*visibleInterval) (minStopInterval, upToDateInterval *visibleInterval) { - var latestMtime int64 - latestIntervalIndex := 0 - minStop := int64(math.MaxInt64) - minIntervalIndex := 0 - for i, interval := range intervals { - if minStop > interval.stop { - minIntervalIndex = i - minStop = interval.stop - } - if latestMtime < interval.modifiedTime { - latestMtime = interval.modifiedTime - latestIntervalIndex = i - } +func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) { + + sort.Slice(chunks, func(i, j int) bool { + return chunks[i].Mtime < chunks[j].Mtime + }) + + for _, chunk := range chunks { + visibles = mergeIntoVisibles(visibles, chunk) } - minStopInterval = intervals[minIntervalIndex] - upToDateInterval = intervals[latestIntervalIndex] + + sort.Slice(visibles, func(i, j int) bool { + return visibles[i].start < visibles[j].start + }) + + logPrintf("visibles", visibles) + return } @@ -257,10 +173,3 @@ func min(x, y int64) int64 { } return y } - -func max(x, y int64) int64 { - if x > y { - return x - } - return y -}