seaweedfs/weed/mount/page_writer/upload_pipeline.go

204 lines
6.4 KiB
Go
Raw Normal View History

2022-02-14 06:50:44 +00:00
package page_writer
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
2022-02-14 06:50:44 +00:00
"sync"
"sync/atomic"
)
type LogicChunkIndex int
type UploadPipeline struct {
uploaderCount int32
uploaderCountCond *sync.Cond
filepath util.FullPath
ChunkSize int64
uploaders *util.LimitedConcurrentExecutor
saveToStorageFn SaveToStorageFunc
writableChunkLimit int
swapFile *SwapFile
chunksLock sync.Mutex
writableChunks map[LogicChunkIndex]PageChunk
sealedChunks map[LogicChunkIndex]*SealedChunk
activeReadChunks map[LogicChunkIndex]int
readerCountCond *sync.Cond
2022-02-14 06:50:44 +00:00
}
type SealedChunk struct {
chunk PageChunk
referenceCounter int // track uploading or reading processes
}
func (sc *SealedChunk) FreeReference(messageOnFree string) {
sc.referenceCounter--
if sc.referenceCounter == 0 {
glog.V(4).Infof("Free sealed chunk: %s", messageOnFree)
sc.chunk.FreeResource()
}
}
func NewUploadPipeline(writers *util.LimitedConcurrentExecutor, chunkSize int64, saveToStorageFn SaveToStorageFunc, bufferChunkLimit int, swapFileDir string) *UploadPipeline {
t := &UploadPipeline{
2022-03-14 01:34:57 +00:00
ChunkSize: chunkSize,
writableChunks: make(map[LogicChunkIndex]PageChunk),
sealedChunks: make(map[LogicChunkIndex]*SealedChunk),
uploaders: writers,
uploaderCountCond: sync.NewCond(&sync.Mutex{}),
saveToStorageFn: saveToStorageFn,
activeReadChunks: make(map[LogicChunkIndex]int),
writableChunkLimit: bufferChunkLimit,
swapFile: NewSwapFile(swapFileDir, chunkSize),
2022-02-14 06:50:44 +00:00
}
t.readerCountCond = sync.NewCond(&t.chunksLock)
return t
2022-02-14 06:50:44 +00:00
}
2022-12-23 04:39:24 +00:00
func (up *UploadPipeline) SaveDataAt(p []byte, off int64, isSequential bool, tsNs int64) (n int) {
up.chunksLock.Lock()
defer up.chunksLock.Unlock()
2022-02-14 06:50:44 +00:00
logicChunkIndex := LogicChunkIndex(off / up.ChunkSize)
pageChunk, found := up.writableChunks[logicChunkIndex]
2022-02-14 06:50:44 +00:00
if !found {
2022-03-14 01:34:57 +00:00
if len(up.writableChunks) > up.writableChunkLimit {
// if current file chunks is over the per file buffer count limit
oldestChunkIndex, oldestTs := LogicChunkIndex(-1), int64(0)
2022-02-14 06:50:44 +00:00
for lci, mc := range up.writableChunks {
chunkModifiedTsNs := mc.LastModifiedTsNs()
if oldestTs < chunkModifiedTsNs {
oldestChunkIndex = lci
oldestTs = chunkModifiedTsNs
2022-02-14 06:50:44 +00:00
}
}
up.moveToSealed(up.writableChunks[oldestChunkIndex], oldestChunkIndex)
// fmt.Printf("flush chunk %d with %d bytes written\n", logicChunkIndex, oldestTs)
}
2022-12-23 04:39:24 +00:00
if false && isSequential &&
2022-03-14 01:34:57 +00:00
len(up.writableChunks) < up.writableChunkLimit &&
atomic.LoadInt64(&memChunkCounter) < 4*int64(up.writableChunkLimit) {
pageChunk = NewMemChunk(logicChunkIndex, up.ChunkSize)
} else {
pageChunk = up.swapFile.NewTempFileChunk(logicChunkIndex)
2022-02-14 06:50:44 +00:00
}
up.writableChunks[logicChunkIndex] = pageChunk
2022-02-14 06:50:44 +00:00
}
2022-12-25 08:26:51 +00:00
//if _, foundSealed := up.sealedChunks[logicChunkIndex]; foundSealed {
// println("found already sealed chunk", logicChunkIndex)
//}
//if _, foundReading := up.activeReadChunks[logicChunkIndex]; foundReading {
// println("found active read chunk", logicChunkIndex)
//}
2022-12-23 04:39:24 +00:00
n = pageChunk.WriteDataAt(p, off, tsNs)
up.maybeMoveToSealed(pageChunk, logicChunkIndex)
2022-02-14 06:50:44 +00:00
return
}
2022-12-23 04:39:24 +00:00
func (up *UploadPipeline) MaybeReadDataAt(p []byte, off int64, tsNs int64) (maxStop int64) {
2022-02-14 06:50:44 +00:00
logicChunkIndex := LogicChunkIndex(off / up.ChunkSize)
up.chunksLock.Lock()
defer func() {
up.readerCountCond.Signal()
up.chunksLock.Unlock()
}()
2022-02-14 06:50:44 +00:00
// read from sealed chunks first
sealedChunk, found := up.sealedChunks[logicChunkIndex]
if found {
sealedChunk.referenceCounter++
}
if found {
2022-12-23 04:39:24 +00:00
maxStop = sealedChunk.chunk.ReadDataAt(p, off, tsNs)
2022-02-14 06:50:44 +00:00
glog.V(4).Infof("%s read sealed memchunk [%d,%d)", up.filepath, off, maxStop)
sealedChunk.FreeReference(fmt.Sprintf("%s finish reading chunk %d", up.filepath, logicChunkIndex))
}
// read from writable chunks last
writableChunk, found := up.writableChunks[logicChunkIndex]
if !found {
return
}
2022-12-23 04:39:24 +00:00
writableMaxStop := writableChunk.ReadDataAt(p, off, tsNs)
2022-02-14 06:50:44 +00:00
glog.V(4).Infof("%s read writable memchunk [%d,%d)", up.filepath, off, writableMaxStop)
maxStop = max(maxStop, writableMaxStop)
return
}
func (up *UploadPipeline) FlushAll() {
up.chunksLock.Lock()
defer up.chunksLock.Unlock()
2022-02-14 06:50:44 +00:00
for logicChunkIndex, memChunk := range up.writableChunks {
up.moveToSealed(memChunk, logicChunkIndex)
}
up.waitForCurrentWritersToComplete()
}
func (up *UploadPipeline) maybeMoveToSealed(memChunk PageChunk, logicChunkIndex LogicChunkIndex) {
if memChunk.IsComplete() {
up.moveToSealed(memChunk, logicChunkIndex)
}
}
func (up *UploadPipeline) moveToSealed(memChunk PageChunk, logicChunkIndex LogicChunkIndex) {
atomic.AddInt32(&up.uploaderCount, 1)
glog.V(4).Infof("%s uploaderCount %d ++> %d", up.filepath, up.uploaderCount-1, up.uploaderCount)
if oldMemChunk, found := up.sealedChunks[logicChunkIndex]; found {
oldMemChunk.FreeReference(fmt.Sprintf("%s replace chunk %d", up.filepath, logicChunkIndex))
}
sealedChunk := &SealedChunk{
chunk: memChunk,
referenceCounter: 1, // default 1 is for uploading process
}
up.sealedChunks[logicChunkIndex] = sealedChunk
delete(up.writableChunks, logicChunkIndex)
// unlock before submitting the uploading jobs
up.chunksLock.Unlock()
2022-02-14 06:50:44 +00:00
up.uploaders.Execute(func() {
// first add to the file chunks
sealedChunk.chunk.SaveContent(up.saveToStorageFn)
// notify waiting process
atomic.AddInt32(&up.uploaderCount, -1)
glog.V(4).Infof("%s uploaderCount %d --> %d", up.filepath, up.uploaderCount+1, up.uploaderCount)
// Lock and Unlock are not required,
// but it may signal multiple times during one wakeup,
// and the waiting goroutine may miss some of them!
up.uploaderCountCond.L.Lock()
up.uploaderCountCond.Broadcast()
up.uploaderCountCond.L.Unlock()
// wait for readers
up.chunksLock.Lock()
defer up.chunksLock.Unlock()
2022-02-14 06:50:44 +00:00
for up.IsLocked(logicChunkIndex) {
up.readerCountCond.Wait()
2022-02-14 06:50:44 +00:00
}
// then remove from sealed chunks
delete(up.sealedChunks, logicChunkIndex)
sealedChunk.FreeReference(fmt.Sprintf("%s finished uploading chunk %d", up.filepath, logicChunkIndex))
})
up.chunksLock.Lock()
2022-02-14 06:50:44 +00:00
}
func (up *UploadPipeline) Shutdown() {
up.swapFile.FreeResource()
up.chunksLock.Lock()
defer up.chunksLock.Unlock()
2022-03-07 22:01:24 +00:00
for logicChunkIndex, sealedChunk := range up.sealedChunks {
sealedChunk.FreeReference(fmt.Sprintf("%s uploadpipeline shutdown chunk %d", up.filepath, logicChunkIndex))
}
2022-02-14 06:50:44 +00:00
}