package filer2 import ( "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "sort" "log" "math" ) func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) { for _, c := range chunks { t := uint64(c.Offset + int64(c.Size)) if size < t { size = t } } return } func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) { return } func mergeToVisibleIntervals(visibles []*visibleInterval, chunk *filer_pb.FileChunk) (merged []*visibleInterval) { if len(visibles) == 0 { return []*visibleInterval{newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId, chunk.Mtime)} } log.Printf("merge chunk %+v => %d", chunk, len(visibles)) for _, v := range visibles { log.Printf("=> %+v", v) } var nonOverlappingStop int // find merge candidates var mergeCandidates []int for t := len(visibles) - 1; t >= 0; t-- { if visibles[t].stop > chunk.Offset { mergeCandidates = append(mergeCandidates, t) } else { nonOverlappingStop = t break } } log.Printf("merged candidates: %+v, starting from %d", mergeCandidates, nonOverlappingStop) if len(mergeCandidates) == 0 { merged = append(visibles, newVisibleInterval( chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId, chunk.Mtime, )) return } // reverse merge candidates i, j := 0, len(mergeCandidates)-1 for i < j { mergeCandidates[i], mergeCandidates[j] = mergeCandidates[j], mergeCandidates[i] i++ j-- } log.Printf("reversed merged candidates: %+v", mergeCandidates) // add chunk into a possibly connected intervals var overlappingIntevals []*visibleInterval for i = 0; i < len(mergeCandidates); i++ { interval := visibles[mergeCandidates[i]] if interval.modifiedTime >= chunk.Mtime { log.Printf("overlappingIntevals add existing interval: [%d,%d)", interval.start, interval.stop) overlappingIntevals = append(overlappingIntevals, interval) } else { start := max(interval.start, chunk.Offset) stop := min(interval.stop, chunk.Offset+int64(chunk.Size)) if interval.start <= chunk.Offset { if interval.start < start { log.Printf("overlappingIntevals add 1: [%d,%d)", interval.start, start) overlappingIntevals = append(overlappingIntevals, newVisibleInterval( interval.start, start, interval.fileId, interval.modifiedTime, )) } log.Printf("overlappingIntevals add 2: [%d,%d)", start, stop) overlappingIntevals = append(overlappingIntevals, newVisibleInterval( start, stop, chunk.FileId, chunk.Mtime, )) if interval.stop < stop { log.Printf("overlappingIntevals add 3: [%d,%d)", interval.stop, stop) overlappingIntevals = append(overlappingIntevals, newVisibleInterval( interval.stop, stop, interval.fileId, interval.modifiedTime, )) } } } } logPrintf("overlappingIntevals", overlappingIntevals) // merge connected intervals merged = visibles[:nonOverlappingStop] var lastInterval *visibleInterval var prevIntervalIndex int for i, interval := range overlappingIntevals { if i == 0 { prevIntervalIndex = i continue } if overlappingIntevals[prevIntervalIndex].fileId != interval.fileId { merged = append(merged, newVisibleInterval( overlappingIntevals[prevIntervalIndex].start, interval.start, overlappingIntevals[prevIntervalIndex].fileId, overlappingIntevals[prevIntervalIndex].modifiedTime, )) prevIntervalIndex = i } } if lastInterval != nil { merged = append(merged, newVisibleInterval( overlappingIntevals[prevIntervalIndex].start, lastInterval.start, overlappingIntevals[prevIntervalIndex].fileId, overlappingIntevals[prevIntervalIndex].modifiedTime, )) } logPrintf("merged", merged) return } func logPrintf(name string, visibles []*visibleInterval) { log.Printf("%s len %d", name, len(visibles)) for _, v := range visibles { log.Printf("%s: => %+v", name, v) } } func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) { sort.Slice(chunks, func(i, j int) bool { if chunks[i].Offset < chunks[j].Offset { return true } if chunks[i].Offset == chunks[j].Offset { return chunks[i].Mtime < chunks[j].Mtime } return false }) if len(chunks) == 0 { return } var parallelIntervals, intervals []*visibleInterval var minStopInterval, upToDateInterval *visibleInterval watermarkStart := chunks[0].Offset for _, chunk := range chunks { log.Printf("checking chunk: [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size)) logPrintf("parallelIntervals", parallelIntervals) for len(parallelIntervals) > 0 && watermarkStart < chunk.Offset { logPrintf("parallelIntervals loop 1", parallelIntervals) logPrintf("parallelIntervals loop 1 intervals", intervals) minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals) nextStop := min(minStopInterval.stop, chunk.Offset) intervals = append(intervals, newVisibleInterval( max(watermarkStart, minStopInterval.start), nextStop, upToDateInterval.fileId, upToDateInterval.modifiedTime, )) watermarkStart = nextStop logPrintf("parallelIntervals loop intervals =>", intervals) // remove processed intervals, possibly multiple var remaining []*visibleInterval for _, interval := range parallelIntervals { if interval.stop != watermarkStart { remaining = append(remaining, newVisibleInterval( interval.start, interval.stop, interval.fileId, interval.modifiedTime, )) } } parallelIntervals = remaining logPrintf("parallelIntervals loop 2", parallelIntervals) logPrintf("parallelIntervals loop 2 intervals", intervals) } parallelIntervals = append(parallelIntervals, newVisibleInterval( chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId, chunk.Mtime, )) } logPrintf("parallelIntervals loop 3", parallelIntervals) logPrintf("parallelIntervals loop 3 intervals", intervals) for len(parallelIntervals) > 0 { minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals) intervals = append(intervals, newVisibleInterval( max(watermarkStart, minStopInterval.start), minStopInterval.stop, upToDateInterval.fileId, upToDateInterval.modifiedTime, )) watermarkStart = minStopInterval.stop // remove processed intervals, possibly multiple var remaining []*visibleInterval for _, interval := range parallelIntervals { if interval.stop != watermarkStart { remaining = append(remaining, newVisibleInterval( interval.start, interval.stop, interval.fileId, interval.modifiedTime, )) } } parallelIntervals = remaining } logPrintf("parallelIntervals loop 4", parallelIntervals) logPrintf("intervals", intervals) // merge connected intervals, now the intervals are non-intersecting var lastInterval *visibleInterval var prevIntervalIndex int for i, interval := range intervals { if i == 0 { prevIntervalIndex = i continue } if intervals[i-1].fileId != interval.fileId || intervals[i-1].stop < intervals[i].start { visibles = append(visibles, newVisibleInterval( intervals[prevIntervalIndex].start, intervals[i-1].stop, intervals[prevIntervalIndex].fileId, intervals[prevIntervalIndex].modifiedTime, )) prevIntervalIndex = i } lastInterval = intervals[i] logPrintf("intervals loop 1 visibles", visibles) } if lastInterval != nil { visibles = append(visibles, newVisibleInterval( intervals[prevIntervalIndex].start, lastInterval.stop, intervals[prevIntervalIndex].fileId, intervals[prevIntervalIndex].modifiedTime, )) } logPrintf("visibles", visibles) return } func findMinStopInterval(intervals []*visibleInterval) (minStopInterval, upToDateInterval *visibleInterval) { var latestMtime int64 latestIntervalIndex := 0 minStop := int64(math.MaxInt64) minIntervalIndex := 0 for i, interval := range intervals { if minStop > interval.stop { minIntervalIndex = i minStop = interval.stop } if latestMtime < interval.modifiedTime { latestMtime = interval.modifiedTime latestIntervalIndex = i } } minStopInterval = intervals[minIntervalIndex] upToDateInterval = intervals[latestIntervalIndex] return } func nonOverlappingVisibleIntervals0(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) { sort.Slice(chunks, func(i, j int) bool { if chunks[i].Offset < chunks[j].Offset { return true } if chunks[i].Offset == chunks[j].Offset { return chunks[i].Mtime < chunks[j].Mtime } return false }) for _, c := range chunks { visibles = mergeToVisibleIntervals(visibles, c) } return } // find non-overlapping visible intervals // visible interval map to one file chunk type visibleInterval struct { start int64 stop int64 modifiedTime int64 fileId string } func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval { return &visibleInterval{start: start, stop: stop, fileId: fileId, modifiedTime: modifiedTime} } type stackOfChunkIds struct { ids []int } func (s *stackOfChunkIds) isEmpty() bool { return len(s.ids) == 0 } func (s *stackOfChunkIds) pop() int { t := s.ids[len(s.ids)-1] s.ids = s.ids[:len(s.ids)-1] return t } func (s *stackOfChunkIds) push(x int) { s.ids = append(s.ids, x) } func (s *stackOfChunkIds) peek() int { return s.ids[len(s.ids)-1] } func min(x, y int64) int64 { if x <= y { return x } return y } func max(x, y int64) int64 { if x > y { return x } return y }