mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
add stream writer
this should improve streaming write performance, which is common in many cases, e.g., copying large files.
This is additional to improved random read write operations: 3e69d19380...19084d8791
This commit is contained in:
parent
255a1c7dcd
commit
083d8e9ece
107
weed/filesys/dirty_pages_stream.go
Normal file
107
weed/filesys/dirty_pages_stream.go
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
package filesys
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/filesys/page_writer"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
|
||||||
|
"io"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type StreamDirtyPages struct {
|
||||||
|
f *File
|
||||||
|
writeWaitGroup sync.WaitGroup
|
||||||
|
pageAddLock sync.Mutex
|
||||||
|
chunkAddLock sync.Mutex
|
||||||
|
lastErr error
|
||||||
|
collection string
|
||||||
|
replication string
|
||||||
|
chunkedStream *page_writer.ChunkedStreamWriter
|
||||||
|
}
|
||||||
|
|
||||||
|
func newStreamDirtyPages(file *File, chunkSize int64) *StreamDirtyPages {
|
||||||
|
|
||||||
|
dirtyPages := &StreamDirtyPages{
|
||||||
|
f: file,
|
||||||
|
chunkedStream: page_writer.NewChunkedStreamWriter(chunkSize),
|
||||||
|
}
|
||||||
|
|
||||||
|
dirtyPages.chunkedStream.SetSaveToStorageFunction(dirtyPages.saveChunkedFileIntevalToStorage)
|
||||||
|
|
||||||
|
return dirtyPages
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages *StreamDirtyPages) AddPage(offset int64, data []byte) {
|
||||||
|
|
||||||
|
pages.pageAddLock.Lock()
|
||||||
|
defer pages.pageAddLock.Unlock()
|
||||||
|
|
||||||
|
glog.V(4).Infof("%v stream AddPage [%d, %d)", pages.f.fullpath(), offset, offset+int64(len(data)))
|
||||||
|
if _, err := pages.chunkedStream.WriteAt(data, offset); err != nil {
|
||||||
|
pages.lastErr = err
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages *StreamDirtyPages) FlushData() error {
|
||||||
|
pages.saveChunkedFileToStorage()
|
||||||
|
pages.writeWaitGroup.Wait()
|
||||||
|
if pages.lastErr != nil {
|
||||||
|
return fmt.Errorf("flush data: %v", pages.lastErr)
|
||||||
|
}
|
||||||
|
pages.chunkedStream.Reset()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages *StreamDirtyPages) ReadDirtyDataAt(data []byte, startOffset int64) (maxStop int64) {
|
||||||
|
return pages.chunkedStream.ReadDataAt(data, startOffset)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages *StreamDirtyPages) GetStorageOptions() (collection, replication string) {
|
||||||
|
return pages.collection, pages.replication
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages *StreamDirtyPages) saveChunkedFileToStorage() {
|
||||||
|
|
||||||
|
pages.chunkedStream.FlushAll()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages *StreamDirtyPages) saveChunkedFileIntevalToStorage(reader io.Reader, offset int64, size int64, cleanupFn func()) {
|
||||||
|
|
||||||
|
mtime := time.Now().UnixNano()
|
||||||
|
pages.writeWaitGroup.Add(1)
|
||||||
|
writer := func() {
|
||||||
|
defer pages.writeWaitGroup.Done()
|
||||||
|
defer cleanupFn()
|
||||||
|
|
||||||
|
chunk, collection, replication, err := pages.f.wfs.saveDataAsChunk(pages.f.fullpath())(reader, pages.f.Name, offset)
|
||||||
|
if err != nil {
|
||||||
|
glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), offset, offset+size, err)
|
||||||
|
pages.lastErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
chunk.Mtime = mtime
|
||||||
|
pages.collection, pages.replication = collection, replication
|
||||||
|
pages.chunkAddLock.Lock()
|
||||||
|
pages.f.addChunks([]*filer_pb.FileChunk{chunk})
|
||||||
|
glog.V(3).Infof("%s saveToStorage %s [%d,%d)", pages.f.fullpath(), chunk.FileId, offset, offset+size)
|
||||||
|
pages.chunkAddLock.Unlock()
|
||||||
|
|
||||||
|
cleanupFn()
|
||||||
|
}
|
||||||
|
|
||||||
|
if pages.f.wfs.concurrentWriters != nil {
|
||||||
|
pages.f.wfs.concurrentWriters.Execute(writer)
|
||||||
|
} else {
|
||||||
|
go writer()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pages StreamDirtyPages) Destroy() {
|
||||||
|
pages.chunkedStream.Reset()
|
||||||
|
}
|
|
@ -37,6 +37,7 @@ func (pages *TempFileDirtyPages) AddPage(offset int64, data []byte) {
|
||||||
pages.pageAddLock.Lock()
|
pages.pageAddLock.Lock()
|
||||||
defer pages.pageAddLock.Unlock()
|
defer pages.pageAddLock.Unlock()
|
||||||
|
|
||||||
|
glog.V(4).Infof("%v tempfile AddPage [%d, %d)", pages.f.fullpath(), offset, offset+int64(len(data)))
|
||||||
if _, err := pages.chunkedFile.WriteAt(data, offset); err != nil {
|
if _, err := pages.chunkedFile.WriteAt(data, offset); err != nil {
|
||||||
pages.lastErr = err
|
pages.lastErr = err
|
||||||
}
|
}
|
||||||
|
@ -50,6 +51,7 @@ func (pages *TempFileDirtyPages) FlushData() error {
|
||||||
if pages.lastErr != nil {
|
if pages.lastErr != nil {
|
||||||
return fmt.Errorf("flush data: %v", pages.lastErr)
|
return fmt.Errorf("flush data: %v", pages.lastErr)
|
||||||
}
|
}
|
||||||
|
pages.chunkedFile.Reset()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +67,7 @@ func (pages *TempFileDirtyPages) saveChunkedFileToStorage() {
|
||||||
|
|
||||||
pages.chunkedFile.ProcessEachInterval(func(file *os.File, logicChunkIndex page_writer.LogicChunkIndex, interval *page_writer.ChunkWrittenInterval) {
|
pages.chunkedFile.ProcessEachInterval(func(file *os.File, logicChunkIndex page_writer.LogicChunkIndex, interval *page_writer.ChunkWrittenInterval) {
|
||||||
reader := page_writer.NewFileIntervalReader(pages.chunkedFile, logicChunkIndex, interval)
|
reader := page_writer.NewFileIntervalReader(pages.chunkedFile, logicChunkIndex, interval)
|
||||||
pages.saveChunkedFileIntevalToStorage(reader, int64(logicChunkIndex)*pages.chunkedFile.ChunkSize, interval.Size())
|
pages.saveChunkedFileIntevalToStorage(reader, int64(logicChunkIndex)*pages.chunkedFile.ChunkSize+interval.StartOffset, interval.Size())
|
||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -100,5 +102,5 @@ func (pages *TempFileDirtyPages) saveChunkedFileIntevalToStorage(reader io.Reade
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pages TempFileDirtyPages) Destroy() {
|
func (pages TempFileDirtyPages) Destroy() {
|
||||||
pages.chunkedFile.Destroy()
|
pages.chunkedFile.Reset()
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ type FileHandle struct {
|
||||||
contentType string
|
contentType string
|
||||||
handle uint64
|
handle uint64
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
|
sync.WaitGroup
|
||||||
|
|
||||||
f *File
|
f *File
|
||||||
RequestId fuse.RequestID // unique ID for request
|
RequestId fuse.RequestID // unique ID for request
|
||||||
|
@ -41,7 +42,7 @@ func newFileHandle(file *File, uid, gid uint32) *FileHandle {
|
||||||
fh := &FileHandle{
|
fh := &FileHandle{
|
||||||
f: file,
|
f: file,
|
||||||
// dirtyPages: newContinuousDirtyPages(file, writeOnly),
|
// dirtyPages: newContinuousDirtyPages(file, writeOnly),
|
||||||
dirtyPages: newPageWriter(file, 2*1024*1024),
|
dirtyPages: newPageWriter(file, file.wfs.option.CacheSizeMB*1024*1024),
|
||||||
Uid: uid,
|
Uid: uid,
|
||||||
Gid: gid,
|
Gid: gid,
|
||||||
}
|
}
|
||||||
|
@ -63,6 +64,9 @@ var _ = fs.HandleReleaser(&FileHandle{})
|
||||||
|
|
||||||
func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
|
func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
|
||||||
|
|
||||||
|
fh.Add(1)
|
||||||
|
defer fh.Done()
|
||||||
|
|
||||||
fh.Lock()
|
fh.Lock()
|
||||||
defer fh.Unlock()
|
defer fh.Unlock()
|
||||||
|
|
||||||
|
@ -170,6 +174,9 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
|
||||||
// Write to the file handle
|
// Write to the file handle
|
||||||
func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *fuse.WriteResponse) error {
|
func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *fuse.WriteResponse) error {
|
||||||
|
|
||||||
|
fh.Add(1)
|
||||||
|
defer fh.Done()
|
||||||
|
|
||||||
fh.Lock()
|
fh.Lock()
|
||||||
defer fh.Unlock()
|
defer fh.Unlock()
|
||||||
|
|
||||||
|
@ -209,8 +216,7 @@ func (fh *FileHandle) Release(ctx context.Context, req *fuse.ReleaseRequest) err
|
||||||
|
|
||||||
glog.V(4).Infof("Release %v fh %d open=%d", fh.f.fullpath(), fh.handle, fh.f.isOpen)
|
glog.V(4).Infof("Release %v fh %d open=%d", fh.f.fullpath(), fh.handle, fh.f.isOpen)
|
||||||
|
|
||||||
fh.Lock()
|
fh.Wait()
|
||||||
defer fh.Unlock()
|
|
||||||
|
|
||||||
fh.f.wfs.handlesLock.Lock()
|
fh.f.wfs.handlesLock.Lock()
|
||||||
fh.f.isOpen--
|
fh.f.isOpen--
|
||||||
|
@ -243,6 +249,9 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fh.Add(1)
|
||||||
|
defer fh.Done()
|
||||||
|
|
||||||
fh.Lock()
|
fh.Lock()
|
||||||
defer fh.Unlock()
|
defer fh.Unlock()
|
||||||
|
|
||||||
|
@ -251,7 +260,6 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.V(4).Infof("Flush %v fh %d success", fh.f.fullpath(), fh.handle)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,19 +24,21 @@ func newPageWriter(file *File, chunkSize int64) *PageWriter {
|
||||||
pw := &PageWriter{
|
pw := &PageWriter{
|
||||||
f: file,
|
f: file,
|
||||||
chunkSize: chunkSize,
|
chunkSize: chunkSize,
|
||||||
|
writerPattern: NewWriterPattern(chunkSize),
|
||||||
randomWriter: newTempFileDirtyPages(file, chunkSize),
|
randomWriter: newTempFileDirtyPages(file, chunkSize),
|
||||||
streamWriter: newContinuousDirtyPages(file),
|
streamWriter: newStreamDirtyPages(file, chunkSize),
|
||||||
writerPattern: NewWriterPattern(file.Name, chunkSize),
|
//streamWriter: newContinuousDirtyPages(file),
|
||||||
|
//streamWriter: nil,
|
||||||
}
|
}
|
||||||
return pw
|
return pw
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pw *PageWriter) AddPage(offset int64, data []byte) {
|
func (pw *PageWriter) AddPage(offset int64, data []byte) {
|
||||||
|
|
||||||
glog.V(4).Infof("AddPage %v [%d, %d) streaming:%v", pw.f.fullpath(), offset, offset+int64(len(data)), pw.writerPattern.IsStreamingMode())
|
|
||||||
|
|
||||||
pw.writerPattern.MonitorWriteAt(offset, len(data))
|
pw.writerPattern.MonitorWriteAt(offset, len(data))
|
||||||
|
|
||||||
|
glog.V(4).Infof("%v AddPage [%d, %d) streaming:%v", pw.f.fullpath(), offset, offset+int64(len(data)), pw.writerPattern.IsStreamingMode())
|
||||||
|
|
||||||
chunkIndex := offset / pw.chunkSize
|
chunkIndex := offset / pw.chunkSize
|
||||||
for i := chunkIndex; len(data) > 0; i++ {
|
for i := chunkIndex; len(data) > 0; i++ {
|
||||||
writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)
|
writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)
|
||||||
|
@ -48,7 +50,7 @@ func (pw *PageWriter) AddPage(offset int64, data []byte) {
|
||||||
|
|
||||||
func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte) {
|
func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte) {
|
||||||
if chunkIndex > 0 {
|
if chunkIndex > 0 {
|
||||||
if pw.writerPattern.IsStreamingMode() {
|
if pw.writerPattern.IsStreamingMode() && pw.streamWriter != nil {
|
||||||
pw.streamWriter.AddPage(offset, data)
|
pw.streamWriter.AddPage(offset, data)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -57,9 +59,12 @@ func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pw *PageWriter) FlushData() error {
|
func (pw *PageWriter) FlushData() error {
|
||||||
|
pw.writerPattern.Reset()
|
||||||
|
if pw.streamWriter != nil {
|
||||||
if err := pw.streamWriter.FlushData(); err != nil {
|
if err := pw.streamWriter.FlushData(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return pw.randomWriter.FlushData()
|
return pw.randomWriter.FlushData()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,10 +75,12 @@ func (pw *PageWriter) ReadDirtyDataAt(data []byte, offset int64) (maxStop int64)
|
||||||
for i := chunkIndex; len(data) > 0; i++ {
|
for i := chunkIndex; len(data) > 0; i++ {
|
||||||
readSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)
|
readSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)
|
||||||
|
|
||||||
|
if pw.streamWriter != nil {
|
||||||
m1 := pw.streamWriter.ReadDirtyDataAt(data[:readSize], offset)
|
m1 := pw.streamWriter.ReadDirtyDataAt(data[:readSize], offset)
|
||||||
|
maxStop = max(maxStop, m1)
|
||||||
|
}
|
||||||
m2 := pw.randomWriter.ReadDirtyDataAt(data[:readSize], offset)
|
m2 := pw.randomWriter.ReadDirtyDataAt(data[:readSize], offset)
|
||||||
|
maxStop = max(maxStop, m2)
|
||||||
maxStop = max(maxStop, max(m1, m2))
|
|
||||||
|
|
||||||
offset += readSize
|
offset += readSize
|
||||||
data = data[readSize:]
|
data = data[readSize:]
|
||||||
|
@ -83,13 +90,16 @@ func (pw *PageWriter) ReadDirtyDataAt(data []byte, offset int64) (maxStop int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pw *PageWriter) GetStorageOptions() (collection, replication string) {
|
func (pw *PageWriter) GetStorageOptions() (collection, replication string) {
|
||||||
if pw.writerPattern.IsStreamingMode() {
|
if pw.writerPattern.IsStreamingMode() && pw.streamWriter != nil {
|
||||||
return pw.streamWriter.GetStorageOptions()
|
return pw.streamWriter.GetStorageOptions()
|
||||||
}
|
}
|
||||||
return pw.randomWriter.GetStorageOptions()
|
return pw.randomWriter.GetStorageOptions()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pw *PageWriter) Destroy() {
|
func (pw *PageWriter) Destroy() {
|
||||||
|
if pw.streamWriter != nil {
|
||||||
|
pw.streamWriter.Destroy()
|
||||||
|
}
|
||||||
pw.randomWriter.Destroy()
|
pw.randomWriter.Destroy()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,14 +4,18 @@ import "math"
|
||||||
|
|
||||||
// ChunkWrittenInterval mark one written interval within one page chunk
|
// ChunkWrittenInterval mark one written interval within one page chunk
|
||||||
type ChunkWrittenInterval struct {
|
type ChunkWrittenInterval struct {
|
||||||
startOffset int64
|
StartOffset int64
|
||||||
stopOffset int64
|
stopOffset int64
|
||||||
prev *ChunkWrittenInterval
|
prev *ChunkWrittenInterval
|
||||||
next *ChunkWrittenInterval
|
next *ChunkWrittenInterval
|
||||||
}
|
}
|
||||||
|
|
||||||
func (interval *ChunkWrittenInterval) Size() int64 {
|
func (interval *ChunkWrittenInterval) Size() int64 {
|
||||||
return interval.stopOffset - interval.startOffset
|
return interval.stopOffset - interval.StartOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
func (interval *ChunkWrittenInterval) isComplete(chunkSize int64) bool {
|
||||||
|
return interval.stopOffset-interval.StartOffset == chunkSize
|
||||||
}
|
}
|
||||||
|
|
||||||
// ChunkWrittenIntervalList mark written intervals within one page chunk
|
// ChunkWrittenIntervalList mark written intervals within one page chunk
|
||||||
|
@ -23,11 +27,11 @@ type ChunkWrittenIntervalList struct {
|
||||||
func newChunkWrittenIntervalList() *ChunkWrittenIntervalList {
|
func newChunkWrittenIntervalList() *ChunkWrittenIntervalList {
|
||||||
list := &ChunkWrittenIntervalList{
|
list := &ChunkWrittenIntervalList{
|
||||||
head: &ChunkWrittenInterval{
|
head: &ChunkWrittenInterval{
|
||||||
startOffset: -1,
|
StartOffset: -1,
|
||||||
stopOffset: -1,
|
stopOffset: -1,
|
||||||
},
|
},
|
||||||
tail: &ChunkWrittenInterval{
|
tail: &ChunkWrittenInterval{
|
||||||
startOffset: math.MaxInt64,
|
StartOffset: math.MaxInt64,
|
||||||
stopOffset: math.MaxInt64,
|
stopOffset: math.MaxInt64,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -38,35 +42,40 @@ func newChunkWrittenIntervalList() *ChunkWrittenIntervalList {
|
||||||
|
|
||||||
func (list *ChunkWrittenIntervalList) MarkWritten(startOffset, stopOffset int64) {
|
func (list *ChunkWrittenIntervalList) MarkWritten(startOffset, stopOffset int64) {
|
||||||
interval := &ChunkWrittenInterval{
|
interval := &ChunkWrittenInterval{
|
||||||
startOffset: startOffset,
|
StartOffset: startOffset,
|
||||||
stopOffset: stopOffset,
|
stopOffset: stopOffset,
|
||||||
}
|
}
|
||||||
list.addInterval(interval)
|
list.addInterval(interval)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (list *ChunkWrittenIntervalList) IsComplete(chunkSize int64) bool {
|
||||||
|
return list.size() == 1 && list.head.next.isComplete(chunkSize)
|
||||||
|
}
|
||||||
|
|
||||||
func (list *ChunkWrittenIntervalList) addInterval(interval *ChunkWrittenInterval) {
|
func (list *ChunkWrittenIntervalList) addInterval(interval *ChunkWrittenInterval) {
|
||||||
|
|
||||||
p := list.head
|
p := list.head
|
||||||
for ; p.next != nil && p.next.startOffset <= interval.startOffset; p = p.next {
|
for ; p.next != nil && p.next.StartOffset <= interval.StartOffset; p = p.next {
|
||||||
}
|
}
|
||||||
q := list.tail
|
q := list.tail
|
||||||
for ; q.prev != nil && q.prev.stopOffset >= interval.stopOffset; q = q.prev {
|
for ; q.prev != nil && q.prev.stopOffset >= interval.stopOffset; q = q.prev {
|
||||||
}
|
}
|
||||||
|
|
||||||
if interval.startOffset <= p.stopOffset && q.startOffset <= interval.stopOffset {
|
if interval.StartOffset <= p.stopOffset && q.StartOffset <= interval.stopOffset {
|
||||||
// merge p and q together
|
// merge p and q together
|
||||||
p.stopOffset = q.stopOffset
|
p.stopOffset = q.stopOffset
|
||||||
unlinkNodesBetween(p, q.next)
|
unlinkNodesBetween(p, q.next)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if interval.startOffset <= p.stopOffset {
|
if interval.StartOffset <= p.stopOffset {
|
||||||
// merge new interval into p
|
// merge new interval into p
|
||||||
p.stopOffset = interval.stopOffset
|
p.stopOffset = interval.stopOffset
|
||||||
unlinkNodesBetween(p, q)
|
unlinkNodesBetween(p, q)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if q.startOffset <= interval.stopOffset {
|
if q.StartOffset <= interval.stopOffset {
|
||||||
// merge new interval into q
|
// merge new interval into q
|
||||||
q.startOffset = interval.startOffset
|
q.StartOffset = interval.StartOffset
|
||||||
unlinkNodesBetween(p, q)
|
unlinkNodesBetween(p, q)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,7 @@ func (cw *ChunkedFileWriter) ReadDataAt(p []byte, off int64) (maxStop int64) {
|
||||||
actualChunkIndex, chunkUsage := cw.toActualReadOffset(off)
|
actualChunkIndex, chunkUsage := cw.toActualReadOffset(off)
|
||||||
if chunkUsage != nil {
|
if chunkUsage != nil {
|
||||||
for t := chunkUsage.head.next; t != chunkUsage.tail; t = t.next {
|
for t := chunkUsage.head.next; t != chunkUsage.tail; t = t.next {
|
||||||
logicStart := max(off, logicChunkIndex*cw.ChunkSize+t.startOffset)
|
logicStart := max(off, logicChunkIndex*cw.ChunkSize+t.StartOffset)
|
||||||
logicStop := min(off+int64(len(p)), logicChunkIndex*cw.ChunkSize+t.stopOffset)
|
logicStop := min(off+int64(len(p)), logicChunkIndex*cw.ChunkSize+t.stopOffset)
|
||||||
if logicStart < logicStop {
|
if logicStart < logicStop {
|
||||||
actualStart := logicStart - logicChunkIndex*cw.ChunkSize + int64(actualChunkIndex)*cw.ChunkSize
|
actualStart := logicStart - logicChunkIndex*cw.ChunkSize + int64(actualChunkIndex)*cw.ChunkSize
|
||||||
|
@ -110,11 +110,16 @@ func (cw *ChunkedFileWriter) ProcessEachInterval(process func(file *os.File, log
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func (cw *ChunkedFileWriter) Destroy() {
|
|
||||||
|
// Reset releases used resources
|
||||||
|
func (cw *ChunkedFileWriter) Reset() {
|
||||||
if cw.file != nil {
|
if cw.file != nil {
|
||||||
cw.file.Close()
|
cw.file.Close()
|
||||||
os.Remove(cw.file.Name())
|
os.Remove(cw.file.Name())
|
||||||
|
cw.file = nil
|
||||||
}
|
}
|
||||||
|
cw.logicToActualChunkIndex = make(map[LogicChunkIndex]ActualChunkIndex)
|
||||||
|
cw.chunkUsages = cw.chunkUsages[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
type FileIntervalReader struct {
|
type FileIntervalReader struct {
|
||||||
|
@ -134,7 +139,7 @@ func NewFileIntervalReader(cw *ChunkedFileWriter, logicChunkIndex LogicChunkInde
|
||||||
}
|
}
|
||||||
return &FileIntervalReader{
|
return &FileIntervalReader{
|
||||||
f: cw.file,
|
f: cw.file,
|
||||||
startOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.startOffset,
|
startOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.StartOffset,
|
||||||
stopOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.stopOffset,
|
stopOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.stopOffset,
|
||||||
position: 0,
|
position: 0,
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,9 +35,9 @@ func writeToFile(cw *ChunkedFileWriter, startOffset int64, stopOffset int64) {
|
||||||
|
|
||||||
func TestWriteChunkedFile(t *testing.T) {
|
func TestWriteChunkedFile(t *testing.T) {
|
||||||
x := NewChunkedFileWriter(os.TempDir(), 20)
|
x := NewChunkedFileWriter(os.TempDir(), 20)
|
||||||
defer x.Destroy()
|
defer x.Reset()
|
||||||
y := NewChunkedFileWriter(os.TempDir(), 12)
|
y := NewChunkedFileWriter(os.TempDir(), 12)
|
||||||
defer y.Destroy()
|
defer y.Reset()
|
||||||
|
|
||||||
batchSize := 4
|
batchSize := 4
|
||||||
buf := make([]byte, batchSize)
|
buf := make([]byte, batchSize)
|
||||||
|
|
119
weed/filesys/page_writer/chunked_stream_writer.go
Normal file
119
weed/filesys/page_writer/chunked_stream_writer.go
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
package page_writer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/util"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/util/mem"
|
||||||
|
"io"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SaveToStorageFunc func(reader io.Reader, offset int64, size int64, cleanupFn func())
|
||||||
|
|
||||||
|
// ChunkedStreamWriter assumes the write requests will come in within chunks and in streaming mode
|
||||||
|
type ChunkedStreamWriter struct {
|
||||||
|
activeChunks map[LogicChunkIndex]*MemChunk
|
||||||
|
activeChunksLock sync.Mutex
|
||||||
|
ChunkSize int64
|
||||||
|
saveToStorageFn SaveToStorageFunc
|
||||||
|
sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
type MemChunk struct {
|
||||||
|
buf []byte
|
||||||
|
usage *ChunkWrittenIntervalList
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = io.WriterAt(&ChunkedStreamWriter{})
|
||||||
|
|
||||||
|
func NewChunkedStreamWriter(chunkSize int64) *ChunkedStreamWriter {
|
||||||
|
return &ChunkedStreamWriter{
|
||||||
|
ChunkSize: chunkSize,
|
||||||
|
activeChunks: make(map[LogicChunkIndex]*MemChunk),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cw *ChunkedStreamWriter) SetSaveToStorageFunction(saveToStorageFn SaveToStorageFunc) {
|
||||||
|
cw.saveToStorageFn = saveToStorageFn
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cw *ChunkedStreamWriter) WriteAt(p []byte, off int64) (n int, err error) {
|
||||||
|
cw.Lock()
|
||||||
|
defer cw.Unlock()
|
||||||
|
|
||||||
|
logicChunkIndex := LogicChunkIndex(off / cw.ChunkSize)
|
||||||
|
offsetRemainder := off % cw.ChunkSize
|
||||||
|
|
||||||
|
memChunk, found := cw.activeChunks[logicChunkIndex]
|
||||||
|
if !found {
|
||||||
|
memChunk = &MemChunk{
|
||||||
|
buf: mem.Allocate(int(cw.ChunkSize)),
|
||||||
|
usage: newChunkWrittenIntervalList(),
|
||||||
|
}
|
||||||
|
cw.activeChunks[logicChunkIndex] = memChunk
|
||||||
|
}
|
||||||
|
n = copy(memChunk.buf[offsetRemainder:], p)
|
||||||
|
memChunk.usage.MarkWritten(offsetRemainder, offsetRemainder+int64(n))
|
||||||
|
if memChunk.usage.IsComplete(cw.ChunkSize) {
|
||||||
|
if cw.saveToStorageFn != nil {
|
||||||
|
cw.saveOneChunk(memChunk, logicChunkIndex)
|
||||||
|
delete(cw.activeChunks, logicChunkIndex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cw *ChunkedStreamWriter) ReadDataAt(p []byte, off int64) (maxStop int64) {
|
||||||
|
cw.Lock()
|
||||||
|
defer cw.Unlock()
|
||||||
|
|
||||||
|
logicChunkIndex := LogicChunkIndex(off / cw.ChunkSize)
|
||||||
|
memChunkBaseOffset := int64(logicChunkIndex) * cw.ChunkSize
|
||||||
|
memChunk, found := cw.activeChunks[logicChunkIndex]
|
||||||
|
if !found {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for t := memChunk.usage.head.next; t != memChunk.usage.tail; t = t.next {
|
||||||
|
logicStart := max(off, int64(logicChunkIndex)*cw.ChunkSize+t.StartOffset)
|
||||||
|
logicStop := min(off+int64(len(p)), memChunkBaseOffset+t.stopOffset)
|
||||||
|
if logicStart < logicStop {
|
||||||
|
copy(p[logicStart-off:logicStop-off], memChunk.buf[logicStart-memChunkBaseOffset:logicStop-memChunkBaseOffset])
|
||||||
|
maxStop = max(maxStop, logicStop)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cw *ChunkedStreamWriter) FlushAll() {
|
||||||
|
cw.Lock()
|
||||||
|
defer cw.Unlock()
|
||||||
|
for logicChunkIndex, memChunk := range cw.activeChunks {
|
||||||
|
if cw.saveToStorageFn != nil {
|
||||||
|
cw.saveOneChunk(memChunk, logicChunkIndex)
|
||||||
|
delete(cw.activeChunks, logicChunkIndex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cw *ChunkedStreamWriter) saveOneChunk(memChunk *MemChunk, logicChunkIndex LogicChunkIndex) {
|
||||||
|
var referenceCounter = int32(memChunk.usage.size())
|
||||||
|
for t := memChunk.usage.head.next; t != memChunk.usage.tail; t = t.next {
|
||||||
|
reader := util.NewBytesReader(memChunk.buf[t.StartOffset:t.stopOffset])
|
||||||
|
cw.saveToStorageFn(reader, int64(logicChunkIndex)*cw.ChunkSize+t.StartOffset, t.Size(), func() {
|
||||||
|
atomic.AddInt32(&referenceCounter, -1)
|
||||||
|
if atomic.LoadInt32(&referenceCounter) == 0 {
|
||||||
|
mem.Free(memChunk.buf)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset releases used resources
|
||||||
|
func (cw *ChunkedStreamWriter) Reset() {
|
||||||
|
for t, memChunk := range cw.activeChunks {
|
||||||
|
mem.Free(memChunk.buf)
|
||||||
|
delete(cw.activeChunks, t)
|
||||||
|
}
|
||||||
|
}
|
33
weed/filesys/page_writer/chunked_stream_writer_test.go
Normal file
33
weed/filesys/page_writer/chunked_stream_writer_test.go
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
package page_writer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestWriteChunkedStream(t *testing.T) {
|
||||||
|
x := NewChunkedStreamWriter(20)
|
||||||
|
defer x.Reset()
|
||||||
|
y := NewChunkedFileWriter(os.TempDir(), 12)
|
||||||
|
defer y.Reset()
|
||||||
|
|
||||||
|
batchSize := 4
|
||||||
|
buf := make([]byte, batchSize)
|
||||||
|
for i := 0; i < 256; i++ {
|
||||||
|
for x := 0; x < batchSize; x++ {
|
||||||
|
buf[x] = byte(i)
|
||||||
|
}
|
||||||
|
x.WriteAt(buf, int64(i*batchSize))
|
||||||
|
y.WriteAt(buf, int64((255-i)*batchSize))
|
||||||
|
}
|
||||||
|
|
||||||
|
a := make([]byte, 1)
|
||||||
|
b := make([]byte, 1)
|
||||||
|
for i := 0; i < 256*batchSize; i++ {
|
||||||
|
x.ReadDataAt(a, int64(i))
|
||||||
|
y.ReadDataAt(b, int64(256*batchSize-1-i))
|
||||||
|
assert.Equal(t, a[0], b[0], "same read")
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -4,19 +4,17 @@ type WriterPattern struct {
|
||||||
isStreaming bool
|
isStreaming bool
|
||||||
lastWriteOffset int64
|
lastWriteOffset int64
|
||||||
chunkSize int64
|
chunkSize int64
|
||||||
fileName string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// For streaming write: only cache the first chunk
|
// For streaming write: only cache the first chunk
|
||||||
// For random write: fall back to temp file approach
|
// For random write: fall back to temp file approach
|
||||||
// writes can only change from streaming mode to non-streaming mode
|
// writes can only change from streaming mode to non-streaming mode
|
||||||
|
|
||||||
func NewWriterPattern(fileName string, chunkSize int64) *WriterPattern {
|
func NewWriterPattern(chunkSize int64) *WriterPattern {
|
||||||
return &WriterPattern{
|
return &WriterPattern{
|
||||||
isStreaming: true,
|
isStreaming: true,
|
||||||
lastWriteOffset: -1,
|
lastWriteOffset: -1,
|
||||||
chunkSize: chunkSize,
|
chunkSize: chunkSize,
|
||||||
fileName: fileName,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,3 +37,8 @@ func (rp *WriterPattern) IsStreamingMode() bool {
|
||||||
func (rp *WriterPattern) IsRandomMode() bool {
|
func (rp *WriterPattern) IsRandomMode() bool {
|
||||||
return !rp.isStreaming
|
return !rp.isStreaming
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (rp *WriterPattern) Reset() {
|
||||||
|
rp.isStreaming = true
|
||||||
|
rp.lastWriteOffset = -1
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue