seaweedfs/weed/filer/reader_cache.go

package filer

import (
	"fmt"
	"sync"
	"sync/atomic"
	"time"

	"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
	"github.com/seaweedfs/seaweedfs/weed/util/mem"
	"github.com/seaweedfs/seaweedfs/weed/wdclient"
)

type ReaderCache struct {
	chunkCache     chunk_cache.ChunkCache
	lookupFileIdFn wdclient.LookupFileIdFunctionType
	sync.Mutex
	downloaders map[string]*SingleChunkCacher
	limit       int
}

type SingleChunkCacher struct {
	sync.Mutex
	parent           *ReaderCache
	chunkFileId      string
	data             []byte
	err              error
	cipherKey        []byte
	isGzipped        bool
	chunkSize        int
	shouldCache      bool
	wg               sync.WaitGroup
	cacheStartedCh   chan struct{}
	completedTimeNew int64
}

func newReaderCache(limit int, chunkCache chunk_cache.ChunkCache, lookupFileIdFn wdclient.LookupFileIdFunctionType) *ReaderCache {
	return &ReaderCache{
		limit:          limit,
		chunkCache:     chunkCache,
		lookupFileIdFn: lookupFileIdFn,
		downloaders:    make(map[string]*SingleChunkCacher),
	}
}

func (rc *ReaderCache) MaybeCache(chunkViews []*ChunkView) {
	if rc.lookupFileIdFn == nil {
		return
	}

	rc.Lock()
	defer rc.Unlock()

	if len(rc.downloaders) >= rc.limit {
		return
	}

	for _, chunkView := range chunkViews {
		if _, found := rc.downloaders[chunkView.FileId]; found {
			continue
		}

		if len(rc.downloaders) >= rc.limit {
			// abort when slots are filled
			return
		}

		// glog.V(4).Infof("prefetch %s offset %d", chunkView.FileId, chunkView.LogicOffset)
		// cache this chunk if not yet
		cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), false)
		go cacher.startCaching()
		<-cacher.cacheStartedCh
		rc.downloaders[chunkView.FileId] = cacher

	}

	return
}

func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byte, isGzipped bool, offset int64, chunkSize int, shouldCache bool) (int, error) {
	rc.Lock()

	if cacher, found := rc.downloaders[fileId]; found {
		if n, err := cacher.readChunkAt(buffer, offset); n != 0 && err == nil {
			rc.Unlock()
			return n, err
		}
	}
	if shouldCache || rc.lookupFileIdFn == nil {
		n, err := rc.chunkCache.ReadChunkAt(buffer, fileId, uint64(offset))
		if n > 0 {
			rc.Unlock()
			return n, err
		}
	}

	// clean up old downloaders
	if len(rc.downloaders) >= rc.limit {
		oldestFid, oldestTime := "", time.Now().UnixNano()
		for fid, downloader := range rc.downloaders {
			completedTime := atomic.LoadInt64(&downloader.completedTimeNew)
			if completedTime > 0 && completedTime < oldestTime {
				oldestFid, oldestTime = fid, completedTime
			}
		}
		if oldestFid != "" {
			oldDownloader := rc.downloaders[oldestFid]
			delete(rc.downloaders, oldestFid)
			oldDownloader.destroy()
		}
	}

	// glog.V(4).Infof("cache1 %s", fileId)

	cacher := newSingleChunkCacher(rc, fileId, cipherKey, isGzipped, chunkSize, shouldCache)
	go cacher.startCaching()
	<-cacher.cacheStartedCh
	rc.downloaders[fileId] = cacher
	rc.Unlock()

	return cacher.readChunkAt(buffer, offset)
}

func (rc *ReaderCache) UnCache(fileId string) {
	rc.Lock()
	defer rc.Unlock()
	// glog.V(4).Infof("uncache %s", fileId)
	if downloader, found := rc.downloaders[fileId]; found {
		downloader.destroy()
		delete(rc.downloaders, fileId)
	}
}

func (rc *ReaderCache) destroy() {
	rc.Lock()
	defer rc.Unlock()

	for _, downloader := range rc.downloaders {
		downloader.destroy()
	}

}

func newSingleChunkCacher(parent *ReaderCache, fileId string, cipherKey []byte, isGzipped bool, chunkSize int, shouldCache bool) *SingleChunkCacher {
	return &SingleChunkCacher{
		parent:         parent,
		chunkFileId:    fileId,
		cipherKey:      cipherKey,
		isGzipped:      isGzipped,
		chunkSize:      chunkSize,
		shouldCache:    shouldCache,
		cacheStartedCh: make(chan struct{}),
	}
}

func (s *SingleChunkCacher) startCaching() {
	s.wg.Add(1)
	defer s.wg.Done()
	s.Lock()
	defer s.Unlock()

	s.cacheStartedCh <- struct{}{} // means this has been started

	urlStrings, err := s.parent.lookupFileIdFn(s.chunkFileId)
	if err != nil {
		s.err = fmt.Errorf("operation LookupFileId %s failed, err: %v", s.chunkFileId, err)
		return
	}

	s.data = mem.Allocate(s.chunkSize)

	_, s.err = retriedFetchChunkData(s.data, urlStrings, s.cipherKey, s.isGzipped, true, 0)
	if s.err != nil {
		mem.Free(s.data)
		s.data = nil
		return
	}

	if s.shouldCache {
		s.parent.chunkCache.SetChunk(s.chunkFileId, s.data)
	}
	atomic.StoreInt64(&s.completedTimeNew, time.Now().UnixNano())

	return
}

func (s *SingleChunkCacher) destroy() {
	// wait for all reads to finish before destroying the data
	s.wg.Wait()
	s.Lock()
	defer s.Unlock()

	if s.data != nil {
		mem.Free(s.data)
		s.data = nil
		close(s.cacheStartedCh)
	}
}

func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {
	s.wg.Add(1)
	defer s.wg.Done()
	s.Lock()
	defer s.Unlock()

	if s.err != nil {
		return 0, s.err
	}

	if len(s.data) == 0 {
		return 0, nil
	}

	return copy(buf, s.data[offset:]), nil

}
better control for reader caching 2022-02-26 10:16:47 +00:00			`package filer`

			`import (`
			`"fmt"`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`"sync"`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`"sync/atomic"`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`"time"`

move to https://github.com/seaweedfs/seaweedfs 2022-07-29 07:17:28 +00:00			`"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"`
			`"github.com/seaweedfs/seaweedfs/weed/util/mem"`
			`"github.com/seaweedfs/seaweedfs/weed/wdclient"`
better control for reader caching 2022-02-26 10:16:47 +00:00			`)`

			`type ReaderCache struct {`
			`chunkCache chunk_cache.ChunkCache`
			`lookupFileIdFn wdclient.LookupFileIdFunctionType`
			`sync.Mutex`
			`downloaders map[string]*SingleChunkCacher`
			`limit int`
			`}`

			`type SingleChunkCacher struct {`
Avoid fatal error: sync: Unlock of unlocked RWMutex fix https://github.com/chrislusf/seaweedfs/issues/3306 2022-07-13 07:58:15 +00:00			`sync.Mutex`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`parent *ReaderCache`
			`chunkFileId string`
			`data []byte`
			`err error`
			`cipherKey []byte`
			`isGzipped bool`
			`chunkSize int`
			`shouldCache bool`
			`wg sync.WaitGroup`
			`cacheStartedCh chan struct{}`
			`completedTimeNew int64`
better control for reader caching 2022-02-26 10:16:47 +00:00			`}`

			`func newReaderCache(limit int, chunkCache chunk_cache.ChunkCache, lookupFileIdFn wdclient.LookupFileIdFunctionType) *ReaderCache {`
			`return &ReaderCache{`
			`limit: limit,`
			`chunkCache: chunkCache,`
			`lookupFileIdFn: lookupFileIdFn,`
			`downloaders: make(map[string]*SingleChunkCacher),`
			`}`
			`}`

prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`func (rc ReaderCache) MaybeCache(chunkViews []ChunkView) {`
better control for reader caching 2022-02-26 10:16:47 +00:00			`if rc.lookupFileIdFn == nil {`
			`return`
			`}`

prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`rc.Lock()`
			`defer rc.Unlock()`

Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`if len(rc.downloaders) >= rc.limit {`
			`return`
			`}`

prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`for _, chunkView := range chunkViews {`
			`if _, found := rc.downloaders[chunkView.FileId]; found {`
			`continue`
better control for reader caching 2022-02-26 10:16:47 +00:00			`}`
prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00
			`if len(rc.downloaders) >= rc.limit {`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`// abort when slots are filled`
better control for reader caching 2022-02-26 10:16:47 +00:00			`return`
			`}`

prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`// glog.V(4).Infof("prefetch %s offset %d", chunkView.FileId, chunkView.LogicOffset)`
			`// cache this chunk if not yet`
			`cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), false)`
			`go cacher.startCaching()`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`<-cacher.cacheStartedCh`
prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`rc.downloaders[chunkView.FileId] = cacher`

			`}`
better control for reader caching 2022-02-26 10:16:47 +00:00
			`return`
			`}`

			`func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byte, isGzipped bool, offset int64, chunkSize int, shouldCache bool) (int, error) {`
			`rc.Lock()`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00
better control for reader caching 2022-02-26 10:16:47 +00:00			`if cacher, found := rc.downloaders[fileId]; found {`
mount: fix bug during busy writes fix https://github.com/chrislusf/seaweedfs/issues/3315 2022-07-15 08:03:17 +00:00			`if n, err := cacher.readChunkAt(buffer, offset); n != 0 && err == nil {`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`rc.Unlock()`
mount: fix bug during busy writes fix https://github.com/chrislusf/seaweedfs/issues/3315 2022-07-15 08:03:17 +00:00			`return n, err`
			`}`
better control for reader caching 2022-02-26 10:16:47 +00:00			`}`
			`if shouldCache \|\| rc.lookupFileIdFn == nil {`
			`n, err := rc.chunkCache.ReadChunkAt(buffer, fileId, uint64(offset))`
			`if n > 0 {`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`rc.Unlock()`
better control for reader caching 2022-02-26 10:16:47 +00:00			`return n, err`
			`}`
			`}`

Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`// clean up old downloaders`
better control for reader caching 2022-02-26 10:16:47 +00:00			`if len(rc.downloaders) >= rc.limit {`
nano level precision 2022-08-26 23:55:15 +00:00			`oldestFid, oldestTime := "", time.Now().UnixNano()`
better control for reader caching 2022-02-26 10:16:47 +00:00			`for fid, downloader := range rc.downloaders {`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`completedTime := atomic.LoadInt64(&downloader.completedTimeNew)`
			`if completedTime > 0 && completedTime < oldestTime {`
			`oldestFid, oldestTime = fid, completedTime`
better control for reader caching 2022-02-26 10:16:47 +00:00			`}`
			`}`
			`if oldestFid != "" {`
			`oldDownloader := rc.downloaders[oldestFid]`
			`delete(rc.downloaders, oldestFid)`
			`oldDownloader.destroy()`
			`}`
			`}`

prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`// glog.V(4).Infof("cache1 %s", fileId)`

better control for reader caching 2022-02-26 10:16:47 +00:00			`cacher := newSingleChunkCacher(rc, fileId, cipherKey, isGzipped, chunkSize, shouldCache)`
			`go cacher.startCaching()`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`<-cacher.cacheStartedCh`
better control for reader caching 2022-02-26 10:16:47 +00:00			`rc.downloaders[fileId] = cacher`
Fix a few data races when reading files in mount (#3527) 2022-08-26 23:41:37 +00:00			`rc.Unlock()`
better control for reader caching 2022-02-26 10:16:47 +00:00
			`return cacher.readChunkAt(buffer, offset)`
			`}`

prefetch other chunks when stream reading 2022-02-27 07:20:45 +00:00			`func (rc *ReaderCache) UnCache(fileId string) {`
			`rc.Lock()`
			`defer rc.Unlock()`
			`// glog.V(4).Infof("uncache %s", fileId)`
			`if downloader, found := rc.downloaders[fileId]; found {`
			`downloader.destroy()`
			`delete(rc.downloaders, fileId)`
			`}`
			`}`

better control for reader caching 2022-02-26 10:16:47 +00:00			`func (rc *ReaderCache) destroy() {`
			`rc.Lock()`
			`defer rc.Unlock()`

			`for _, downloader := range rc.downloaders {`
			`downloader.destroy()`
			`}`

			`}`

			`func newSingleChunkCacher(parent ReaderCache, fileId string, cipherKey []byte, isGzipped bool, chunkSize int, shouldCache bool) SingleChunkCacher {`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`return &SingleChunkCacher{`
			`parent: parent,`
			`chunkFileId: fileId,`
			`cipherKey: cipherKey,`
			`isGzipped: isGzipped,`
			`chunkSize: chunkSize,`
			`shouldCache: shouldCache,`
			`cacheStartedCh: make(chan struct{}),`
better control for reader caching 2022-02-26 10:16:47 +00:00			`}`
			`}`

			`func (s *SingleChunkCacher) startCaching() {`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`s.wg.Add(1)`
			`defer s.wg.Done()`
better control for reader caching 2022-02-26 10:16:47 +00:00			`s.Lock()`
			`defer s.Unlock()`

Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`s.cacheStartedCh <- struct{}{} // means this has been started`
better control for reader caching 2022-02-26 10:16:47 +00:00
			`urlStrings, err := s.parent.lookupFileIdFn(s.chunkFileId)`
			`if err != nil {`
			`s.err = fmt.Errorf("operation LookupFileId %s failed, err: %v", s.chunkFileId, err)`
			`return`
			`}`

			`s.data = mem.Allocate(s.chunkSize)`

			`_, s.err = retriedFetchChunkData(s.data, urlStrings, s.cipherKey, s.isGzipped, true, 0)`
			`if s.err != nil {`
			`mem.Free(s.data)`
			`s.data = nil`
			`return`
			`}`

			`if s.shouldCache {`
			`s.parent.chunkCache.SetChunk(s.chunkFileId, s.data)`
			`}`
nano level precision 2022-08-26 23:55:15 +00:00			`atomic.StoreInt64(&s.completedTimeNew, time.Now().UnixNano())`
better control for reader caching 2022-02-26 10:16:47 +00:00
			`return`
			`}`

			`func (s *SingleChunkCacher) destroy() {`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`// wait for all reads to finish before destroying the data`
			`s.wg.Wait()`
mount: fix bug during busy writes fix https://github.com/chrislusf/seaweedfs/issues/3315 2022-07-15 08:03:17 +00:00			`s.Lock()`
			`defer s.Unlock()`

better control for reader caching 2022-02-26 10:16:47 +00:00			`if s.data != nil {`
			`mem.Free(s.data)`
			`s.data = nil`
Fix race conditions during in-flight size checks (#3505) 2022-08-25 03:03:34 +00:00			`close(s.cacheStartedCh)`
better control for reader caching 2022-02-26 10:16:47 +00:00			`}`
			`}`

			`func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {`
Fix hanging reads in chunk cacher (#3473) Sometimes when an unexpected error occurs the cacher would set an error and return. However, it would not broadcast the condition signal in that case, therefore leaving the goroutine that runs readChunkAt stuck forever. I figured that the condition is unnecessary because readChunkAt is acquiring a lock that is still held by the cacher goroutine anyway. Callees of startCaching have to wait for a WaitGroup which makes sure that readChunkAt can't acquire the lock before startCaching. This way readChunkAt can execute normally and check for the error. 2022-08-21 18:54:02 +00:00			`s.wg.Add(1)`
			`defer s.wg.Done()`
Avoid fatal error: sync: Unlock of unlocked RWMutex fix https://github.com/chrislusf/seaweedfs/issues/3306 2022-07-13 07:58:15 +00:00			`s.Lock()`
			`defer s.Unlock()`
better control for reader caching 2022-02-26 10:16:47 +00:00
mount: avoid possible index out of bounds error 2022-03-05 06:36:01 +00:00			`if s.err != nil {`
			`return 0, s.err`
			`}`

mount: fix bug during busy writes fix https://github.com/chrislusf/seaweedfs/issues/3315 2022-07-15 08:03:17 +00:00			`if len(s.data) == 0 {`
			`return 0, nil`
			`}`

mount: avoid possible index out of bounds error 2022-03-05 06:36:01 +00:00			`return copy(buf, s.data[offset:]), nil`
better control for reader caching 2022-02-26 10:16:47 +00:00
			`}`