update ChunkedFile to seekable reader, so we can use io.* to read data

This commit is contained in:
tnextday 2015-12-01 20:23:50 +08:00
parent f825d23789
commit 6b0894d806
5 changed files with 313 additions and 65 deletions

View file

@ -8,10 +8,18 @@ import (
"net/http"
"sort"
"sync"
"github.com/chrislusf/seaweedfs/go/glog"
"github.com/chrislusf/seaweedfs/go/util"
)
var ErrOutOfRange = errors.New("Out of Range")
var (
// when the remote server does not allow range requests (Accept-Ranges was not set)
ErrRangeRequestsNotSupported = errors.New("Range requests are not supported by the remote server")
// ErrInvalidRange is returned by Read when trying to read past the end of the file
ErrInvalidRange = errors.New("Invalid range")
)
type ChunkInfo struct {
Fid string `json:"fid,omitempty"`
@ -21,41 +29,74 @@ type ChunkInfo struct {
type ChunkList []*ChunkInfo
type ChunkedFile struct {
Name string `json:"name,omitempty"`
Mime string `json:"mime,omitempty"`
Size int64 `json:"size,omitempty"`
Chunks ChunkList `json:"chunks,omitempty"`
type ChunkManifest struct {
Name string `json:"name,omitempty"`
Mime string `json:"mime,omitempty"`
Size int64 `json:"size,omitempty"`
Chunks ChunkList `json:"chunks,omitempty"`
}
master string `json:"-"`
// seekable chunked file reader
type ChunkedFileReader struct {
Manifest *ChunkManifest
Master string
pos int64
pr *io.PipeReader
pw *io.PipeWriter
mutex sync.Mutex
}
func (s ChunkList) Len() int { return len(s) }
func (s ChunkList) Less(i, j int) bool { return s[i].Offset < s[j].Offset }
func (s ChunkList) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func NewChunkedNeedle(buffer []byte, master string) (*ChunkedFile, error) {
c := ChunkedFile{}
if e := json.Unmarshal(buffer, c); e != nil {
func LoadChunkedManifest(buffer []byte) (*ChunkManifest, error) {
cm := ChunkManifest{}
if e := json.Unmarshal(buffer, cm); e != nil {
return nil, e
}
sort.Sort(c.Chunks)
c.master = master
return &c, nil
sort.Sort(cm.Chunks)
return &cm, nil
}
func (c *ChunkedFile) Marshal() ([]byte, error) {
return json.Marshal(c)
func (cm *ChunkManifest) GetData() ([]byte, error) {
return json.Marshal(cm)
}
func copyChunk(fileUrl string, w io.Writer, startOffset, size int64) (written int64, e error) {
func (cm *ChunkManifest) DeleteChunks(master string) error {
fileIds := make([]string, 0, len(cm.Chunks))
for _, ci := range cm.Chunks {
fileIds = append(fileIds, ci.Fid)
}
results, e := DeleteFiles(master, fileIds)
if e != nil {
return e
}
deleteError := 0
for _, ret := range results.Results {
if ret.Error != "" {
deleteError++
glog.V(0).Infoln("delete error:", ret.Error, ret.Fid)
}
}
if deleteError > 0 {
return errors.New("Not all chunks deleted.")
}
return nil
}
func (cm *ChunkManifest) StoredHelper() error {
//TODO
return nil
}
func httpRangeDownload(fileUrl string, w io.Writer, offset int64) (written int64, e error) {
req, err := http.NewRequest("GET", fileUrl, nil)
if err != nil {
return written, err
}
if startOffset > 0 {
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", startOffset))
if offset > 0 {
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
}
resp, err := util.Do(req)
@ -63,65 +104,117 @@ func copyChunk(fileUrl string, w io.Writer, startOffset, size int64) (written in
return written, err
}
defer resp.Body.Close()
if startOffset > 0 && resp.StatusCode != 206 {
return written, fmt.Errorf("Cannot Read Needle Position: %d [%s]", startOffset, fileUrl)
}
if size > 0 {
return io.CopyN(w, resp.Body, size)
} else {
return io.Copy(w, resp.Body)
switch resp.StatusCode {
case http.StatusRequestedRangeNotSatisfiable:
return written, ErrInvalidRange
case http.StatusOK:
if offset > 0 {
return written, ErrRangeRequestsNotSupported
}
case http.StatusPartialContent:
break
default:
return written, fmt.Errorf("Read Needle http error: [%d] %s", resp.StatusCode, fileUrl)
}
return io.Copy(w, resp.Body)
}
func (c *ChunkedFile) WriteBuffer(w io.Writer, offset, size int64) (written int64, e error) {
if offset >= c.Size || offset+size > c.Size {
return written, ErrOutOfRange
func (cf *ChunkedFileReader) Seek(offset int64, whence int) (int64, error) {
var err error
switch whence {
case 0:
case 1:
offset += cf.pos
case 2:
offset = cf.Manifest.Size - offset
}
if offset > cf.Manifest.Size {
err = ErrInvalidRange
}
if cf.pos != offset {
cf.Close()
}
cf.pos = offset
return cf.pos, err
}
func (cf *ChunkedFileReader) WriteTo(w io.Writer) (n int64, err error) {
cm := cf.Manifest
chunkIndex := -1
chunkStartOffset := int64(0)
for i, ci := range c.Chunks {
if offset >= ci.Offset && offset < ci.Offset+ci.Size {
for i, ci := range cm.Chunks {
if cf.pos >= ci.Offset && cf.pos < ci.Offset+ci.Size {
chunkIndex = i
chunkStartOffset = offset - ci.Offset
chunkStartOffset = cf.pos - ci.Offset
break
}
}
if chunkIndex < 0 {
return written, ErrOutOfRange
return n, ErrInvalidRange
}
//preload next chunk?
for ; chunkIndex < c.Chunks.Len(); chunkIndex++ {
ci := c.Chunks[chunkIndex]
fileUrl, lookupError := LookupFileId(c.master, ci.Fid)
for ; chunkIndex < cm.Chunks.Len(); chunkIndex++ {
ci := cm.Chunks[chunkIndex]
// if we need read date from local volume server first?
fileUrl, lookupError := LookupFileId(cf.Master, ci.Fid)
if lookupError != nil {
return written, lookupError
return n, lookupError
}
rsize := int64(0)
if size > 0 {
rsize = size - written
}
if n, e := copyChunk(fileUrl, w, chunkStartOffset, rsize); e != nil {
return written, e
if wn, e := httpRangeDownload(fileUrl, w, chunkStartOffset); e != nil {
return n, e
} else {
written += n
n += wn
cf.pos += wn
}
if size > 0 && written >= size {
break
}
chunkStartOffset = 0
}
return written, nil
return n, nil
}
func (c *ChunkedFile) DeleteHelper() error {
//TODO Delete all chunks
return nil
func (cf *ChunkedFileReader) ReadAt(p []byte, off int64) (n int, err error) {
cf.Seek(off, 0)
return cf.Read(p)
}
func (c *ChunkedFile) StoredHelper() error {
//TODO
return nil
func (cf *ChunkedFileReader) Read(p []byte) (int, error) {
return cf.getPipeReader().Read(p)
}
func (cf *ChunkedFileReader) Close() (e error) {
cf.mutex.Lock()
defer cf.mutex.Unlock()
return cf.closePipe()
}
func (cf *ChunkedFileReader) closePipe() (e error) {
if cf.pr != nil {
if err := cf.pr.Close(); err != nil {
e = err
}
}
cf.pr = nil
if cf.pw != nil {
if err := cf.pw.Close(); err != nil {
e = err
}
}
cf.pw = nil
return e
}
func (cf *ChunkedFileReader) getPipeReader() io.Reader {
cf.mutex.Lock()
defer cf.mutex.Unlock()
if cf.pr != nil && cf.pw != nil {
return cf.pr
}
cf.closePipe()
cf.pr, cf.pw = io.Pipe()
go func(pw *io.PipeWriter) {
_, e := cf.WriteTo(pw)
pw.CloseWithError(e)
}(cf.pw)
return cf.pr
}

View file

@ -52,7 +52,7 @@ func (n *Needle) String() (str string) {
return
}
func ParseUpload(r *http.Request) (fileName string, data []byte, mimeType string, isGzipped bool, modifiedTime uint64, ttl *TTL, e error) {
func ParseUpload(r *http.Request) (fileName string, data []byte, mimeType string, isGzipped bool, modifiedTime uint64, ttl *TTL, isChunkedFile bool, e error) {
form, fe := r.MultipartReader()
if fe != nil {
glog.V(0).Infoln("MultipartReader [ERROR]", fe)
@ -132,12 +132,13 @@ func ParseUpload(r *http.Request) (fileName string, data []byte, mimeType string
}
modifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64)
ttl, _ = ReadTTL(r.FormValue("ttl"))
isChunkedFile, _ = strconv.ParseBool(r.FormValue("cf"))
return
}
func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) {
fname, mimeType, isGzipped := "", "", false
fname, mimeType, isGzipped, isChunkedFile := "", "", false, false
n = new(Needle)
fname, n.Data, mimeType, isGzipped, n.LastModified, n.Ttl, e = ParseUpload(r)
fname, n.Data, mimeType, isGzipped, n.LastModified, n.Ttl, isChunkedFile, e = ParseUpload(r)
if e != nil {
return
}
@ -160,6 +161,10 @@ func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) {
n.SetHasTtl()
}
if isChunkedFile {
n.SetChunkedFile()
}
if fixJpgOrientation {
loweredName := strings.ToLower(fname)
if mimeType == "image/jpeg" || strings.HasSuffix(loweredName, ".jpg") || strings.HasSuffix(loweredName, ".jpeg") {

View file

@ -16,7 +16,7 @@ const (
FlagHasMime = 0x04
FlagHasLastModifiedDate = 0x08
FlagHasTtl = 0x10
FlagChunkList = 0x80
FlagChunkedFile = 0x80
LastModifiedBytesLength = 5
TtlBytesLength = 2
)
@ -282,10 +282,10 @@ func (n *Needle) SetHasTtl() {
n.Flags = n.Flags | FlagHasTtl
}
func (n *Needle) IsChunkList() bool {
return n.Flags&FlagChunkList > 0
func (n *Needle) IsChunkedFile() bool {
return n.Flags&FlagChunkedFile > 0
}
func (n *Needle) SetChunkList() {
n.Flags = n.Flags | FlagChunkList
func (n *Needle) SetChunkedFile() {
n.Flags = n.Flags | FlagChunkedFile
}

View file

@ -86,7 +86,7 @@ func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl st
}
debug("parsing upload file...")
fname, data, mimeType, isGzipped, lastModified, _, pe := storage.ParseUpload(r)
fname, data, mimeType, isGzipped, lastModified, _, _, pe := storage.ParseUpload(r)
if pe != nil {
writeJsonError(w, r, http.StatusBadRequest, pe)
return

View file

@ -81,6 +81,11 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
return
}
w.Header().Set("Etag", etag)
if vs.tryHandleChunkedFile(n, filename, w, r) {
return
}
if n.NameSize > 0 && filename == "" {
filename = string(n.Name)
dotIndex := strings.LastIndex(filename, ".")
@ -215,3 +220,148 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
io.CopyN(w, sendContent, sendSize)
}
func (vs *VolumeServer) tryHandleChunkedFile(n *storage.Needle, fileName string, w http.ResponseWriter, r *http.Request) (processed bool) {
if !n.IsChunkedFile() {
return false
}
processed = true
raw, _ := strconv.ParseBool(r.FormValue("raw"))
if raw {
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Content-Length", strconv.Itoa(len(n.Data)))
if _, e := w.Write(n.Data); e != nil {
glog.V(0).Infoln("response write error:", e)
}
return true
}
chunkManifest, e := operation.LoadChunkedManifest(n.Data)
if e != nil {
return false
}
ext := ""
if fileName == "" && chunkManifest.Name != "" {
fileName = chunkManifest.Name
dotIndex := strings.LastIndex(fileName, ".")
if dotIndex > 0 {
ext = fileName[dotIndex:]
}
}
mtype := ""
if ext != "" {
mtype = mime.TypeByExtension(ext)
}
if chunkManifest.Mime != "" {
mt := chunkManifest.Mime
if !strings.HasPrefix(mt, "application/octet-stream") {
mtype = mt
}
}
if mtype != "" {
w.Header().Set("Content-Type", mtype)
}
if fileName != "" {
w.Header().Set("Content-Disposition", `filename="`+fileNameEscaper.Replace(fileName)+`"`)
}
w.Header().Set("X-File-Store", "chunked")
w.Header().Set("Accept-Ranges", "bytes")
if r.Method == "HEAD" {
w.Header().Set("Content-Length", strconv.FormatInt(chunkManifest.Size, 10))
return true
}
chunkedFileReader := operation.ChunkedFileReader{
Manifest: chunkManifest,
Master: vs.GetMasterNode(),
}
defer chunkedFileReader.Close()
rangeReq := r.Header.Get("Range")
if rangeReq == "" {
w.Header().Set("Content-Length", strconv.FormatInt(chunkManifest.Size, 10))
if _, e = io.Copy(w, chunkedFileReader); e != nil {
glog.V(0).Infoln("response write error:", e)
}
return true
}
//the rest is dealing with partial content request
//mostly copy from src/pkg/net/http/fs.go
size := chunkManifest.Size
ranges, err := parseRange(rangeReq, size)
if err != nil {
http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable)
return
}
if sumRangesSize(ranges) > size {
// The total number of bytes in all the ranges
// is larger than the size of the file by
// itself, so this is probably an attack, or a
// dumb client. Ignore the range request.
ranges = nil
return
}
if len(ranges) == 0 {
return
}
if len(ranges) == 1 {
// RFC 2616, Section 14.16:
// "When an HTTP message includes the content of a single
// range (for example, a response to a request for a
// single range, or to a request for a set of ranges
// that overlap without any holes), this content is
// transmitted with a Content-Range header, and a
// Content-Length header showing the number of bytes
// actually transferred.
// ...
// A response to a request for a single range MUST NOT
// be sent using the multipart/byteranges media type."
ra := ranges[0]
w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10))
w.Header().Set("Content-Range", ra.contentRange(size))
w.WriteHeader(http.StatusPartialContent)
if _, e = chunkedFileReader.Seek(ra.start, 0); e != nil {
glog.V(0).Infoln("response write error:", e)
}
if _, e = io.CopyN(w, chunkedFileReader, ra.length); e != nil {
glog.V(0).Infoln("response write error:", e)
}
return
}
// process multiple ranges
for _, ra := range ranges {
if ra.start > size {
http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable)
return
}
}
sendSize := rangesMIMESize(ranges, mtype, size)
pr, pw := io.Pipe()
mw := multipart.NewWriter(pw)
w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
sendContent := pr
defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
go func() {
for _, ra := range ranges {
part, err := mw.CreatePart(ra.mimeHeader(mtype, size))
if err != nil {
pw.CloseWithError(err)
return
}
if _, e = chunkedFileReader.Seek(ra.start, 0); e != nil {
glog.V(0).Infoln("response write error:", e)
}
if _, err = io.CopyN(part, chunkedFileReader, ra.length); err != nil {
pw.CloseWithError(err)
return
}
}
mw.Close()
pw.Close()
}()
if w.Header().Get("Content-Encoding") == "" {
w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
}
w.WriteHeader(http.StatusPartialContent)
io.CopyN(w, sendContent, sendSize)
return
}