2020-09-01 07:21:19 +00:00
|
|
|
package filer
|
2019-06-05 08:30:24 +00:00
|
|
|
|
|
|
|
import (
|
2020-03-21 03:31:11 +00:00
|
|
|
"bytes"
|
2020-10-13 20:53:34 +00:00
|
|
|
"fmt"
|
2019-06-05 08:30:24 +00:00
|
|
|
"io"
|
2020-03-21 03:31:11 +00:00
|
|
|
"math"
|
2021-08-07 22:35:27 +00:00
|
|
|
"sort"
|
2020-03-22 08:00:36 +00:00
|
|
|
"strings"
|
2021-05-24 07:14:50 +00:00
|
|
|
"time"
|
2019-06-05 08:30:24 +00:00
|
|
|
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
|
2021-05-24 07:14:50 +00:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/stats"
|
2019-06-05 08:30:24 +00:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/util"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/wdclient"
|
|
|
|
)
|
|
|
|
|
2021-08-16 02:46:45 +00:00
|
|
|
func HasData(entry *filer_pb.Entry) bool {
|
|
|
|
|
|
|
|
if len(entry.Content) > 0 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return len(entry.Chunks) > 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsSameData(a, b *filer_pb.Entry) bool {
|
|
|
|
|
|
|
|
if len(a.Content) > 0 || len(b.Content) > 0 {
|
|
|
|
return bytes.Equal(a.Content, b.Content)
|
|
|
|
}
|
|
|
|
|
|
|
|
return isSameChunks(a.Chunks, b.Chunks)
|
|
|
|
}
|
|
|
|
|
|
|
|
func isSameChunks(a, b []*filer_pb.FileChunk) bool {
|
|
|
|
if len(a) != len(b) {
|
|
|
|
return false
|
|
|
|
}
|
2021-08-16 03:23:41 +00:00
|
|
|
sort.Slice(a, func(i, j int) bool {
|
|
|
|
return strings.Compare(a[i].ETag, a[j].ETag) < 0
|
|
|
|
})
|
|
|
|
sort.Slice(b, func(i, j int) bool {
|
|
|
|
return strings.Compare(b[i].ETag, b[j].ETag) < 0
|
|
|
|
})
|
2021-08-16 02:46:45 +00:00
|
|
|
for i := 0; i < len(a); i++ {
|
2021-08-16 03:23:41 +00:00
|
|
|
if a[i].ETag != b[i].ETag {
|
2021-08-16 02:46:45 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewFileReader(filerClient filer_pb.FilerClient, entry *filer_pb.Entry) io.Reader {
|
|
|
|
if len(entry.Content) > 0 {
|
|
|
|
return bytes.NewReader(entry.Content)
|
|
|
|
}
|
|
|
|
return NewChunkStreamReader(filerClient, entry.Chunks)
|
|
|
|
}
|
|
|
|
|
2021-08-13 18:00:11 +00:00
|
|
|
func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
|
2019-06-05 08:30:24 +00:00
|
|
|
|
2021-03-11 17:38:59 +00:00
|
|
|
glog.V(9).Infof("start to stream content for chunks: %+v\n", chunks)
|
2021-01-06 12:21:34 +00:00
|
|
|
chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
|
2019-06-05 08:30:24 +00:00
|
|
|
|
2020-10-08 05:49:04 +00:00
|
|
|
fileId2Url := make(map[string][]string)
|
2019-06-05 08:30:24 +00:00
|
|
|
|
|
|
|
for _, chunkView := range chunkViews {
|
|
|
|
|
2021-01-06 12:21:34 +00:00
|
|
|
urlStrings, err := masterClient.GetLookupFileIdFunction()(chunkView.FileId)
|
2019-06-05 08:30:24 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
|
|
|
|
return err
|
2021-03-11 18:34:36 +00:00
|
|
|
} else if len(urlStrings) == 0 {
|
|
|
|
glog.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
|
|
|
|
return fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
|
2019-06-05 08:30:24 +00:00
|
|
|
}
|
2020-10-08 05:49:04 +00:00
|
|
|
fileId2Url[chunkView.FileId] = urlStrings
|
2019-06-05 08:30:24 +00:00
|
|
|
}
|
|
|
|
|
2021-03-16 09:15:17 +00:00
|
|
|
for _, chunkView := range chunkViews {
|
|
|
|
|
|
|
|
urlStrings := fileId2Url[chunkView.FileId]
|
2021-05-24 07:14:50 +00:00
|
|
|
start := time.Now()
|
2021-08-13 18:00:11 +00:00
|
|
|
err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size))
|
2021-05-24 07:14:50 +00:00
|
|
|
stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
|
2020-10-10 23:02:10 +00:00
|
|
|
if err != nil {
|
2021-05-24 07:14:50 +00:00
|
|
|
stats.FilerRequestCounter.WithLabelValues("chunkDownloadError").Inc()
|
2020-10-13 20:53:34 +00:00
|
|
|
return fmt.Errorf("read chunk: %v", err)
|
|
|
|
}
|
2021-05-24 07:14:50 +00:00
|
|
|
stats.FilerRequestCounter.WithLabelValues("chunkDownload").Inc()
|
2019-06-05 08:30:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
|
|
}
|
2020-03-21 03:31:11 +00:00
|
|
|
|
2020-04-28 07:05:47 +00:00
|
|
|
// ---------------- ReadAllReader ----------------------------------
|
|
|
|
|
|
|
|
func ReadAll(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) ([]byte, error) {
|
|
|
|
|
|
|
|
buffer := bytes.Buffer{}
|
|
|
|
|
2020-10-08 05:49:04 +00:00
|
|
|
lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
|
2020-04-28 07:05:47 +00:00
|
|
|
return masterClient.LookupFileId(fileId)
|
|
|
|
}
|
|
|
|
|
2020-07-20 00:59:43 +00:00
|
|
|
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
|
|
|
|
|
2020-04-28 07:05:47 +00:00
|
|
|
for _, chunkView := range chunkViews {
|
2020-10-08 05:49:04 +00:00
|
|
|
urlStrings, err := lookupFileIdFn(chunkView.FileId)
|
2020-04-28 07:05:47 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-10-09 06:19:42 +00:00
|
|
|
|
2021-03-23 05:13:19 +00:00
|
|
|
data, err := retriedFetchChunkData(urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size))
|
2020-10-09 06:19:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2020-04-28 07:05:47 +00:00
|
|
|
}
|
2020-10-09 06:19:42 +00:00
|
|
|
buffer.Write(data)
|
2020-04-28 07:05:47 +00:00
|
|
|
}
|
|
|
|
return buffer.Bytes(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ---------------- ChunkStreamReader ----------------------------------
|
2020-03-21 03:31:11 +00:00
|
|
|
type ChunkStreamReader struct {
|
2021-08-07 22:35:27 +00:00
|
|
|
chunkViews []*ChunkView
|
|
|
|
totalSize int64
|
2021-08-08 08:21:42 +00:00
|
|
|
logicOffset int64
|
2021-08-07 22:35:27 +00:00
|
|
|
buffer []byte
|
|
|
|
bufferOffset int64
|
|
|
|
bufferPos int
|
|
|
|
nextChunkViewIndex int
|
|
|
|
lookupFileId wdclient.LookupFileIdFunctionType
|
2020-03-21 03:31:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var _ = io.ReadSeeker(&ChunkStreamReader{})
|
2021-08-07 22:41:07 +00:00
|
|
|
var _ = io.ReaderAt(&ChunkStreamReader{})
|
2020-03-21 03:31:11 +00:00
|
|
|
|
2021-08-07 22:35:27 +00:00
|
|
|
func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
|
2020-07-20 00:59:43 +00:00
|
|
|
|
|
|
|
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
|
2021-08-07 22:35:27 +00:00
|
|
|
sort.Slice(chunkViews, func(i, j int) bool {
|
|
|
|
return chunkViews[i].LogicOffset < chunkViews[j].LogicOffset
|
|
|
|
})
|
2020-03-21 03:31:11 +00:00
|
|
|
|
2021-08-07 21:46:23 +00:00
|
|
|
var totalSize int64
|
|
|
|
for _, chunk := range chunkViews {
|
|
|
|
totalSize += int64(chunk.Size)
|
|
|
|
}
|
|
|
|
|
2020-03-21 03:31:11 +00:00
|
|
|
return &ChunkStreamReader{
|
2020-07-20 00:59:43 +00:00
|
|
|
chunkViews: chunkViews,
|
|
|
|
lookupFileId: lookupFileIdFn,
|
2021-08-07 21:46:23 +00:00
|
|
|
totalSize: totalSize,
|
2020-03-22 08:00:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-07 22:35:27 +00:00
|
|
|
func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
|
2020-04-30 00:40:08 +00:00
|
|
|
|
2021-08-07 22:35:27 +00:00
|
|
|
lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
|
|
|
|
return masterClient.LookupFileId(fileId)
|
|
|
|
}
|
2020-07-20 00:59:43 +00:00
|
|
|
|
2021-08-07 22:35:27 +00:00
|
|
|
return doNewChunkStreamReader(lookupFileIdFn, chunks)
|
|
|
|
}
|
2020-04-30 00:40:08 +00:00
|
|
|
|
2021-08-07 22:35:27 +00:00
|
|
|
func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
|
2021-08-07 21:46:23 +00:00
|
|
|
|
2021-08-07 22:35:27 +00:00
|
|
|
lookupFileIdFn := LookupFn(filerClient)
|
|
|
|
|
|
|
|
return doNewChunkStreamReader(lookupFileIdFn, chunks)
|
2020-04-30 00:40:08 +00:00
|
|
|
}
|
|
|
|
|
2021-08-07 22:41:07 +00:00
|
|
|
func (c *ChunkStreamReader) ReadAt(p []byte, off int64) (n int, err error) {
|
2021-08-15 18:55:58 +00:00
|
|
|
if err = c.prepareBufferFor(off); err != nil {
|
2021-08-07 22:41:07 +00:00
|
|
|
return
|
|
|
|
}
|
2021-08-15 18:55:58 +00:00
|
|
|
c.logicOffset = off
|
2021-08-07 22:41:07 +00:00
|
|
|
return c.Read(p)
|
|
|
|
}
|
|
|
|
|
2020-03-21 03:31:11 +00:00
|
|
|
func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
|
2020-04-29 09:42:58 +00:00
|
|
|
for n < len(p) {
|
|
|
|
if c.isBufferEmpty() {
|
2021-08-07 22:35:27 +00:00
|
|
|
if c.nextChunkViewIndex >= len(c.chunkViews) {
|
2020-04-29 09:42:58 +00:00
|
|
|
return n, io.EOF
|
|
|
|
}
|
2021-08-07 22:35:27 +00:00
|
|
|
chunkView := c.chunkViews[c.nextChunkViewIndex]
|
2021-08-08 08:21:42 +00:00
|
|
|
if err = c.fetchChunkToBuffer(chunkView); err != nil {
|
|
|
|
return
|
|
|
|
}
|
2021-08-07 22:35:27 +00:00
|
|
|
c.nextChunkViewIndex++
|
2020-03-21 03:31:11 +00:00
|
|
|
}
|
2020-04-29 09:42:58 +00:00
|
|
|
t := copy(p[n:], c.buffer[c.bufferPos:])
|
|
|
|
c.bufferPos += t
|
|
|
|
n += t
|
2021-08-08 08:21:42 +00:00
|
|
|
c.logicOffset += int64(t)
|
2020-03-21 03:31:11 +00:00
|
|
|
}
|
2020-03-22 05:16:00 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *ChunkStreamReader) isBufferEmpty() bool {
|
|
|
|
return len(c.buffer) <= c.bufferPos
|
2020-03-21 03:31:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
|
2020-03-22 05:16:00 +00:00
|
|
|
|
|
|
|
var err error
|
|
|
|
switch whence {
|
|
|
|
case io.SeekStart:
|
|
|
|
case io.SeekCurrent:
|
2021-08-08 08:21:42 +00:00
|
|
|
offset += c.logicOffset
|
2020-03-22 05:16:00 +00:00
|
|
|
case io.SeekEnd:
|
2021-08-07 21:46:23 +00:00
|
|
|
offset = c.totalSize + offset
|
2020-03-22 05:16:00 +00:00
|
|
|
}
|
2021-08-07 21:46:23 +00:00
|
|
|
if offset > c.totalSize {
|
2020-03-22 05:16:00 +00:00
|
|
|
err = io.ErrUnexpectedEOF
|
2021-08-08 08:21:42 +00:00
|
|
|
} else {
|
|
|
|
c.logicOffset = offset
|
2020-03-22 05:16:00 +00:00
|
|
|
}
|
|
|
|
|
2021-08-08 08:21:42 +00:00
|
|
|
return offset, err
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
|
2021-08-07 22:35:27 +00:00
|
|
|
// stay in the same chunk
|
|
|
|
if !c.isBufferEmpty() {
|
|
|
|
if c.bufferOffset <= offset && offset < c.bufferOffset+int64(len(c.buffer)) {
|
|
|
|
c.bufferPos = int(offset - c.bufferOffset)
|
2021-08-08 08:21:42 +00:00
|
|
|
return nil
|
2021-08-07 22:35:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// need to seek to a different chunk
|
|
|
|
currentChunkIndex := sort.Search(len(c.chunkViews), func(i int) bool {
|
2021-08-16 06:07:58 +00:00
|
|
|
return offset < c.chunkViews[i].LogicOffset
|
2021-08-07 22:35:27 +00:00
|
|
|
})
|
|
|
|
if currentChunkIndex == len(c.chunkViews) {
|
2021-08-16 06:07:58 +00:00
|
|
|
// not found
|
|
|
|
if c.chunkViews[0].LogicOffset <= offset {
|
|
|
|
currentChunkIndex = 0
|
|
|
|
} else if c.chunkViews[len(c.chunkViews)-1].LogicOffset <= offset {
|
|
|
|
currentChunkIndex = len(c.chunkViews) -1
|
|
|
|
} else {
|
|
|
|
return io.EOF
|
|
|
|
}
|
|
|
|
} else if currentChunkIndex > 0 {
|
|
|
|
if c.chunkViews[currentChunkIndex-1].LogicOffset <= offset {
|
|
|
|
currentChunkIndex -= 1
|
|
|
|
} else {
|
|
|
|
return fmt.Errorf("unexpected1 offset %d", offset)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return fmt.Errorf("unexpected2 offset %d", offset)
|
2021-08-07 22:35:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// positioning within the new chunk
|
|
|
|
chunk := c.chunkViews[currentChunkIndex]
|
|
|
|
if chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size) {
|
|
|
|
if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset {
|
2021-08-08 08:21:42 +00:00
|
|
|
if err = c.fetchChunkToBuffer(chunk); err != nil {
|
|
|
|
return
|
|
|
|
}
|
2021-08-07 22:35:27 +00:00
|
|
|
c.nextChunkViewIndex = currentChunkIndex + 1
|
2020-03-22 05:16:00 +00:00
|
|
|
}
|
2021-08-07 22:35:27 +00:00
|
|
|
c.bufferPos = int(offset - c.bufferOffset)
|
2020-03-22 05:16:00 +00:00
|
|
|
}
|
2021-08-08 08:21:42 +00:00
|
|
|
return
|
2020-03-21 03:31:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
|
2020-10-08 05:49:04 +00:00
|
|
|
urlStrings, err := c.lookupFileId(chunkView.FileId)
|
2020-03-21 03:31:11 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
|
|
|
|
return err
|
|
|
|
}
|
2020-03-22 05:16:00 +00:00
|
|
|
var buffer bytes.Buffer
|
2020-10-13 07:29:46 +00:00
|
|
|
var shouldRetry bool
|
2020-10-08 05:49:04 +00:00
|
|
|
for _, urlString := range urlStrings {
|
2021-03-16 07:33:14 +00:00
|
|
|
shouldRetry, err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
|
2020-10-08 05:49:04 +00:00
|
|
|
buffer.Write(data)
|
|
|
|
})
|
2020-10-13 07:29:46 +00:00
|
|
|
if !shouldRetry {
|
|
|
|
break
|
|
|
|
}
|
2020-10-08 05:49:04 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
|
|
|
|
buffer.Reset()
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2020-03-21 03:31:11 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-03-22 05:16:00 +00:00
|
|
|
c.buffer = buffer.Bytes()
|
|
|
|
c.bufferPos = 0
|
|
|
|
c.bufferOffset = chunkView.LogicOffset
|
|
|
|
|
2020-03-22 08:00:36 +00:00
|
|
|
// glog.V(0).Infof("read %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
|
|
|
|
|
2020-03-21 03:31:11 +00:00
|
|
|
return nil
|
|
|
|
}
|
2020-03-22 08:00:36 +00:00
|
|
|
|
2020-04-28 07:05:47 +00:00
|
|
|
func (c *ChunkStreamReader) Close() {
|
|
|
|
// TODO try to release and reuse buffer
|
|
|
|
}
|
|
|
|
|
2020-03-23 07:01:34 +00:00
|
|
|
func VolumeId(fileId string) string {
|
|
|
|
lastCommaIndex := strings.LastIndex(fileId, ",")
|
|
|
|
if lastCommaIndex > 0 {
|
|
|
|
return fileId[:lastCommaIndex]
|
2020-03-22 08:00:36 +00:00
|
|
|
}
|
2020-03-23 07:01:34 +00:00
|
|
|
return fileId
|
2020-03-22 08:00:36 +00:00
|
|
|
}
|