From 042de9359c49bb0090afbc211c28789f88709718 Mon Sep 17 00:00:00 2001 From: Nathan Hawkins Date: Wed, 28 Apr 2021 19:13:37 -0400 Subject: [PATCH] make reader_at handle random reads more efficiently for FUSE --- weed/filer/reader_at.go | 22 ++++++- weed/filer/reader_at_test.go | 5 ++ weed/util/chunk_cache/chunk_cache.go | 65 +++++++++++++++++++ .../util/chunk_cache/chunk_cache_in_memory.go | 14 ++++ weed/util/chunk_cache/chunk_cache_on_disk.go | 31 +++++++-- weed/util/chunk_cache/on_disk_cache_layer.go | 22 +++++++ 6 files changed, 152 insertions(+), 7 deletions(-) diff --git a/weed/filer/reader_at.go b/weed/filer/reader_at.go index a1e989684..b03b3bbb4 100644 --- a/weed/filer/reader_at.go +++ b/weed/filer/reader_at.go @@ -139,13 +139,15 @@ func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) { } glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.LogicOffset-chunk.Offset, chunk.LogicOffset-chunk.Offset+int64(chunk.Size)) var buffer []byte - buffer, err = c.readFromWholeChunkData(chunk, nextChunk) + bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset + bufferLength := chunkStop - chunkStart + buffer, err = c.readChunkSlice(chunk, nextChunk, uint64(bufferOffset), uint64(bufferLength)) if err != nil { glog.Errorf("fetching chunk %+v: %v\n", chunk, err) return } - bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset - copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer[bufferOffset:bufferOffset+chunkStop-chunkStart]) + + copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer) n += copied startOffset, remaining = startOffset+int64(copied), remaining-int64(copied) } @@ -167,6 +169,20 @@ func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) { } +func (c *ChunkReadAt) readChunkSlice(chunkView *ChunkView, nextChunkViews *ChunkView, offset, length uint64) ([]byte, error) { + + chunkSlice := c.chunkCache.GetChunkSlice(chunkView.FileId, offset, length) + if len(chunkSlice) > 0 { + return chunkSlice, nil + } + chunkData, err := c.readFromWholeChunkData(chunkView, nextChunkViews) + if err != nil { + return nil, err + } + wanted := min(int64(length), int64(len(chunkData))-int64(offset)) + return chunkData[offset : int64(offset)+wanted], nil +} + func (c *ChunkReadAt) readFromWholeChunkData(chunkView *ChunkView, nextChunkViews ...*ChunkView) (chunkData []byte, err error) { if c.lastChunkFileId == chunkView.FileId { diff --git a/weed/filer/reader_at_test.go b/weed/filer/reader_at_test.go index 37a34f4ea..a31319082 100644 --- a/weed/filer/reader_at_test.go +++ b/weed/filer/reader_at_test.go @@ -20,6 +20,11 @@ func (m *mockChunkCache) GetChunk(fileId string, minSize uint64) (data []byte) { } return data } + +func(m *mockChunkCache) GetChunkSlice(fileId string, offset, length uint64) []byte { + return nil +} + func (m *mockChunkCache) SetChunk(fileId string, data []byte) { } diff --git a/weed/util/chunk_cache/chunk_cache.go b/weed/util/chunk_cache/chunk_cache.go index 3615aee0e..40d24b322 100644 --- a/weed/util/chunk_cache/chunk_cache.go +++ b/weed/util/chunk_cache/chunk_cache.go @@ -1,14 +1,18 @@ package chunk_cache import ( + "errors" "sync" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage/needle" ) +var ErrorOutOfBounds = errors.New("attempt to read out of bounds") + type ChunkCache interface { GetChunk(fileId string, minSize uint64) (data []byte) + GetChunkSlice(fileId string, offset, length uint64) []byte SetChunk(fileId string, data []byte) } @@ -22,6 +26,8 @@ type TieredChunkCache struct { onDiskCacheSizeLimit2 uint64 } +var _ ChunkCache = &TieredChunkCache{} + func NewTieredChunkCache(maxEntries int64, dir string, diskSizeInUnit int64, unitSize int64) *TieredChunkCache { c := &TieredChunkCache{ @@ -87,6 +93,58 @@ func (c *TieredChunkCache) doGetChunk(fileId string, minSize uint64) (data []byt } +func (c *TieredChunkCache) GetChunkSlice(fileId string, offset, length uint64) []byte { + if c == nil { + return nil + } + + c.RLock() + defer c.RUnlock() + + return c.doGetChunkSlice(fileId, offset, length) +} + +func (c *TieredChunkCache) doGetChunkSlice(fileId string, offset, length uint64) (data []byte) { + + minSize := offset + length + if minSize <= c.onDiskCacheSizeLimit0 { + data, err := c.memCache.getChunkSlice(fileId, offset, length) + if err != nil { + glog.Errorf("failed to read from memcache: %s", err) + } + if len(data) >= int(minSize) { + return data + } + } + + fid, err := needle.ParseFileIdFromString(fileId) + if err != nil { + glog.Errorf("failed to parse file id %s", fileId) + return nil + } + + if minSize <= c.onDiskCacheSizeLimit0 { + data = c.diskCaches[0].getChunkSlice(fid.Key, offset, length) + if len(data) >= int(minSize) { + return data + } + } + if minSize <= c.onDiskCacheSizeLimit1 { + data = c.diskCaches[1].getChunkSlice(fid.Key, offset, length) + if len(data) >= int(minSize) { + return data + } + } + { + data = c.diskCaches[2].getChunkSlice(fid.Key, offset, length) + if len(data) >= int(minSize) { + return data + } + } + + return nil +} + func (c *TieredChunkCache) SetChunk(fileId string, data []byte) { if c == nil { return @@ -131,3 +189,10 @@ func (c *TieredChunkCache) Shutdown() { diskCache.shutdown() } } + +func min(x, y int) int { + if x < y { + return x + } + return y +} diff --git a/weed/util/chunk_cache/chunk_cache_in_memory.go b/weed/util/chunk_cache/chunk_cache_in_memory.go index 5f26b8c78..d725f8a16 100644 --- a/weed/util/chunk_cache/chunk_cache_in_memory.go +++ b/weed/util/chunk_cache/chunk_cache_in_memory.go @@ -31,6 +31,20 @@ func (c *ChunkCacheInMemory) GetChunk(fileId string) []byte { return data } +func (c *ChunkCacheInMemory) getChunkSlice(fileId string, offset, length uint64) ([]byte, error) { + item := c.cache.Get(fileId) + if item == nil { + return nil, nil + } + data := item.Value().([]byte) + item.Extend(time.Hour) + wanted := min(int(length), len(data)-int(offset)) + if wanted < 0 { + return nil, ErrorOutOfBounds + } + return data[offset : int(offset)+wanted], nil +} + func (c *ChunkCacheInMemory) SetChunk(fileId string, data []byte) { localCopy := make([]byte, len(data)) copy(localCopy, data) diff --git a/weed/util/chunk_cache/chunk_cache_on_disk.go b/weed/util/chunk_cache/chunk_cache_on_disk.go index 6f87a9a06..36de5c972 100644 --- a/weed/util/chunk_cache/chunk_cache_on_disk.go +++ b/weed/util/chunk_cache/chunk_cache_on_disk.go @@ -90,11 +90,11 @@ func (v *ChunkCacheVolume) Shutdown() { func (v *ChunkCacheVolume) doReset() { v.Shutdown() - os.Truncate(v.fileName + ".dat", 0) - os.Truncate(v.fileName + ".idx", 0) - glog.V(4).Infof("cache removeAll %s ...", v.fileName + ".ldb") + os.Truncate(v.fileName+".dat", 0) + os.Truncate(v.fileName+".idx", 0) + glog.V(4).Infof("cache removeAll %s ...", v.fileName+".ldb") os.RemoveAll(v.fileName + ".ldb") - glog.V(4).Infof("cache removed %s", v.fileName + ".ldb") + glog.V(4).Infof("cache removed %s", v.fileName+".ldb") } func (v *ChunkCacheVolume) Reset() (*ChunkCacheVolume, error) { @@ -121,6 +121,29 @@ func (v *ChunkCacheVolume) GetNeedle(key types.NeedleId) ([]byte, error) { return data, nil } +func (v *ChunkCacheVolume) getNeedleSlice(key types.NeedleId, offset, length uint64) ([]byte, error) { + nv, ok := v.nm.Get(key) + if !ok { + return nil, storage.ErrorNotFound + } + wanted := min(int(length), int(nv.Size)-int(offset)) + if wanted < 0 { + // should never happen, but better than panicing + return nil, ErrorOutOfBounds + } + data := make([]byte, wanted) + if readSize, readErr := v.DataBackend.ReadAt(data, nv.Offset.ToActualOffset()+int64(offset)); readErr != nil { + return nil, fmt.Errorf("read %s.dat [%d,%d): %v", + v.fileName, nv.Offset.ToActualOffset()+int64(offset), int(nv.Offset.ToActualOffset())+int(offset)+wanted, readErr) + } else { + if readSize != wanted { + return nil, fmt.Errorf("read %d, expected %d", readSize, wanted) + } + } + + return data, nil +} + func (v *ChunkCacheVolume) WriteNeedle(key types.NeedleId, data []byte) error { offset := v.fileSize diff --git a/weed/util/chunk_cache/on_disk_cache_layer.go b/weed/util/chunk_cache/on_disk_cache_layer.go index eebd89798..a4b3b6994 100644 --- a/weed/util/chunk_cache/on_disk_cache_layer.go +++ b/weed/util/chunk_cache/on_disk_cache_layer.go @@ -82,6 +82,28 @@ func (c *OnDiskCacheLayer) getChunk(needleId types.NeedleId) (data []byte) { } +func (c *OnDiskCacheLayer) getChunkSlice(needleId types.NeedleId, offset, length uint64) (data []byte) { + + var err error + + for _, diskCache := range c.diskCaches { + data, err = diskCache.getNeedleSlice(needleId, offset, length) + if err == storage.ErrorNotFound { + continue + } + if err != nil { + glog.Errorf("failed to read cache file %s id %d", diskCache.fileName, needleId) + continue + } + if len(data) != 0 { + return + } + } + + return nil + +} + func (c *OnDiskCacheLayer) shutdown() { for _, diskCache := range c.diskCaches {