filer: processing all response headers, no pass through to volume server

* filer calculate MD5 etag
* filer handle response headers, instread of pass it to volume servers
This commit is contained in:
Chris Lu 2020-03-08 15:42:44 -07:00
parent 11fceaf2f7
commit 9b3109a5d8
12 changed files with 141 additions and 254 deletions

View file

@ -99,9 +99,6 @@ spec:
{{- end }}
filer \
-port={{ .Values.filer.port }} \
{{- if .Values.filer.redirectOnRead }}
-redirectOnRead \
{{- end }}
{{- if .Values.filer.disableHttp }}
-disableHttp \
{{- end }}

View file

@ -177,9 +177,6 @@ filer:
grpcPort: 18888
loggingOverrideLevel: null
# Whether proxy or redirect to volume server during file GET request
redirectOnRead: false
# Limit sub dir listing size (default 100000)
dirListLimit: 100000

View file

@ -27,7 +27,6 @@ type FilerOptions struct {
publicPort *int
collection *string
defaultReplicaPlacement *string
redirectOnRead *bool
disableDirListing *bool
maxMB *int
dirListingLimit *int
@ -48,7 +47,6 @@ func init() {
f.port = cmdFiler.Flag.Int("port", 8888, "filer server http listen port")
f.publicPort = cmdFiler.Flag.Int("port.readonly", 0, "readonly port opened to public")
f.defaultReplicaPlacement = cmdFiler.Flag.String("defaultReplicaPlacement", "000", "default replication type if not specified")
f.redirectOnRead = cmdFiler.Flag.Bool("redirectOnRead", false, "whether proxy or redirect to volume server during file GET request")
f.disableDirListing = cmdFiler.Flag.Bool("disableDirListing", false, "turn off directory listing")
f.maxMB = cmdFiler.Flag.Int("maxMB", 32, "split files larger than the limit")
f.dirListingLimit = cmdFiler.Flag.Int("dirListLimit", 100000, "limit sub dir listing size")
@ -105,7 +103,6 @@ func (fo *FilerOptions) startFiler() {
Masters: strings.Split(*fo.masters, ","),
Collection: *fo.collection,
DefaultReplication: *fo.defaultReplicaPlacement,
RedirectOnRead: *fo.redirectOnRead,
DisableDirListing: *fo.disableDirListing,
MaxMB: *fo.maxMB,
DirListingLimit: *fo.dirListingLimit,

View file

@ -78,7 +78,6 @@ func init() {
filerOptions.port = cmdServer.Flag.Int("filer.port", 8888, "filer server http listen port")
filerOptions.publicPort = cmdServer.Flag.Int("filer.port.public", 0, "filer server public http listen port")
filerOptions.defaultReplicaPlacement = cmdServer.Flag.String("filer.defaultReplicaPlacement", "", "Default replication type if not specified during runtime.")
filerOptions.redirectOnRead = cmdServer.Flag.Bool("filer.redirectOnRead", false, "whether proxy or redirect to volume server during file GET request")
filerOptions.disableDirListing = cmdServer.Flag.Bool("filer.disableDirListing", false, "turn off directory listing")
filerOptions.maxMB = cmdServer.Flag.Int("filer.maxMB", 32, "split files larger than the limit")
filerOptions.dirListingLimit = cmdServer.Flag.Int("filer.dirListLimit", 1000, "limit sub dir listing size")
@ -115,10 +114,6 @@ func runServer(cmd *Command, args []string) bool {
defer pprof.StopCPUProfile()
}
if *filerOptions.redirectOnRead {
*isStartingFiler = true
}
if *isStartingS3 {
*isStartingFiler = true
}

View file

@ -223,7 +223,6 @@ func (s3a *S3ApiServer) proxyToFiler(w http.ResponseWriter, r *http.Request, des
proxyReq.Header.Set("Host", s3a.option.Filer)
proxyReq.Header.Set("X-Forwarded-For", r.RemoteAddr)
proxyReq.Header.Set("Etag-MD5", "True")
for header, values := range r.Header {
for _, value := range values {

View file

@ -5,6 +5,8 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"mime/multipart"
"net/http"
"path/filepath"
"strconv"
@ -210,3 +212,107 @@ func handleStaticResources2(r *mux.Router) {
r.Handle("/favicon.ico", http.FileServer(statikFS))
r.PathPrefix("/seaweedfsstatic/").Handler(http.StripPrefix("/seaweedfsstatic", http.FileServer(statikFS)))
}
func adjustHeadersAfterHEAD(w http.ResponseWriter, r *http.Request, filename string) {
if filename != "" {
contentDisposition := "inline"
if r.FormValue("dl") != "" {
if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl {
contentDisposition = "attachment"
}
}
w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`)
}
}
func processRangeRequst(r *http.Request, w http.ResponseWriter, totalSize int64, mimeType string, writeFn func(writer io.Writer, offset int64, size int64) error) {
rangeReq := r.Header.Get("Range")
if rangeReq == "" {
w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10))
if err := writeFn(w, 0, totalSize); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
return
}
//the rest is dealing with partial content request
//mostly copy from src/pkg/net/http/fs.go
ranges, err := parseRange(rangeReq, totalSize)
if err != nil {
http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable)
return
}
if sumRangesSize(ranges) > totalSize {
// The total number of bytes in all the ranges
// is larger than the size of the file by
// itself, so this is probably an attack, or a
// dumb client. Ignore the range request.
return
}
if len(ranges) == 0 {
return
}
if len(ranges) == 1 {
// RFC 2616, Section 14.16:
// "When an HTTP message includes the content of a single
// range (for example, a response to a request for a
// single range, or to a request for a set of ranges
// that overlap without any holes), this content is
// transmitted with a Content-Range header, and a
// Content-Length header showing the number of bytes
// actually transferred.
// ...
// A response to a request for a single range MUST NOT
// be sent using the multipart/byteranges media type."
ra := ranges[0]
w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10))
w.Header().Set("Content-Range", ra.contentRange(totalSize))
w.WriteHeader(http.StatusPartialContent)
err = writeFn(w, ra.start, ra.length)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
return
}
// process multiple ranges
for _, ra := range ranges {
if ra.start > totalSize {
http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable)
return
}
}
sendSize := rangesMIMESize(ranges, mimeType, totalSize)
pr, pw := io.Pipe()
mw := multipart.NewWriter(pw)
w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
sendContent := pr
defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
go func() {
for _, ra := range ranges {
part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize))
if e != nil {
pw.CloseWithError(e)
return
}
if e = writeFn(part, ra.start, ra.length); e != nil {
pw.CloseWithError(e)
return
}
}
mw.Close()
pw.Close()
}()
if w.Header().Get("Content-Encoding") == "" {
w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
}
w.WriteHeader(http.StatusPartialContent)
if _, err := io.CopyN(w, sendContent, sendSize); err != nil {
http.Error(w, "Internal Error", http.StatusInternalServerError)
return
}
}

View file

@ -37,7 +37,6 @@ type FilerOption struct {
Masters []string
Collection string
DefaultReplication string
RedirectOnRead bool
DisableDirListing bool
MaxMB int
DirListingLimit int

View file

@ -2,14 +2,10 @@ package weed_server
import (
"context"
"fmt"
"io"
"io/ioutil"
"mime"
"mime/multipart"
"net/http"
"net/url"
"path"
"path/filepath"
"strconv"
"strings"
@ -17,7 +13,6 @@ import (
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/util"
)
func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request, isGetMethod bool) {
@ -68,118 +63,30 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request,
}
w.Header().Set("Accept-Ranges", "bytes")
if r.Method == "HEAD" {
w.Header().Set("Content-Length", strconv.FormatInt(int64(filer2.TotalSize(entry.Chunks)), 10))
w.Header().Set("Last-Modified", entry.Attr.Mtime.Format(http.TimeFormat))
if entry.Attr.Mime != "" {
w.Header().Set("Content-Type", entry.Attr.Mime)
}
setEtag(w, filer2.ETag(entry.Chunks))
return
}
if len(entry.Chunks) == 1 {
fs.handleSingleChunk(w, r, entry)
return
}
fs.handleMultipleChunks(w, r, entry)
}
func (fs *FilerServer) handleSingleChunk(w http.ResponseWriter, r *http.Request, entry *filer2.Entry) {
fileId := entry.Chunks[0].GetFileIdString()
urlString, err := fs.filer.MasterClient.LookupFileId(fileId)
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", fileId, err)
w.WriteHeader(http.StatusNotFound)
return
}
if fs.option.RedirectOnRead && entry.Chunks[0].CipherKey == nil {
stats.FilerRequestCounter.WithLabelValues("redirect").Inc()
http.Redirect(w, r, urlString, http.StatusFound)
return
}
u, _ := url.Parse(urlString)
q := u.Query()
for key, values := range r.URL.Query() {
for _, value := range values {
q.Add(key, value)
}
}
u.RawQuery = q.Encode()
request := &http.Request{
Method: r.Method,
URL: u,
Proto: r.Proto,
ProtoMajor: r.ProtoMajor,
ProtoMinor: r.ProtoMinor,
Header: r.Header,
Body: r.Body,
Host: r.Host,
ContentLength: r.ContentLength,
}
glog.V(3).Infoln("retrieving from", u)
resp, do_err := util.Do(request)
if do_err != nil {
glog.V(0).Infoln("failing to connect to volume server", do_err.Error())
writeJsonError(w, r, http.StatusInternalServerError, do_err)
return
}
defer func() {
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
}()
for k, v := range resp.Header {
w.Header()[k] = v
}
if entry.Attr.Mime != "" {
w.Header().Set("Content-Type", entry.Attr.Mime)
}
if entry.Chunks[0].CipherKey == nil {
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
} else {
fs.writeEncryptedChunk(w, resp, entry)
}
}
func (fs *FilerServer) writeEncryptedChunk(w http.ResponseWriter, resp *http.Response, entry *filer2.Entry) {
chunk := entry.Chunks[0]
encryptedData, err := ioutil.ReadAll(resp.Body)
if err != nil {
glog.V(1).Infof("read encrypted %s failed, err: %v", chunk.FileId, err)
w.WriteHeader(http.StatusNotFound)
return
}
decryptedData, err := util.Decrypt(encryptedData, util.CipherKey(chunk.CipherKey))
if err != nil {
glog.V(1).Infof("decrypt %s failed, err: %v", chunk.FileId, err)
w.WriteHeader(http.StatusNotFound)
return
}
w.Header().Set("Content-Length", fmt.Sprintf("%d", chunk.Size))
w.WriteHeader(resp.StatusCode)
w.Write(decryptedData)
}
func (fs *FilerServer) handleMultipleChunks(w http.ResponseWriter, r *http.Request, entry *filer2.Entry) {
w.Header().Set("Last-Modified", entry.Attr.Mtime.Format(http.TimeFormat))
// mime type
mimeType := entry.Attr.Mime
if mimeType == "" {
if ext := path.Ext(entry.Name()); ext != "" {
if ext := filepath.Ext(entry.Name()); ext != "" {
mimeType = mime.TypeByExtension(ext)
}
}
if mimeType != "" {
w.Header().Set("Content-Type", mimeType)
}
// set etag
setEtag(w, filer2.ETag(entry.Chunks))
if r.Method == "HEAD" {
w.Header().Set("Content-Length", strconv.FormatInt(int64(filer2.TotalSize(entry.Chunks)), 10))
return
}
filename := entry.Name()
adjustHeadersAfterHEAD(w, r, filename)
totalSize := int64(filer2.TotalSize(entry.Chunks))
processRangeRequst(r, w, totalSize, mimeType, func(writer io.Writer, offset int64, size int64) error {
@ -188,94 +95,3 @@ func (fs *FilerServer) handleMultipleChunks(w http.ResponseWriter, r *http.Reque
}
func processRangeRequst(r *http.Request, w http.ResponseWriter, totalSize int64, mimeType string, writeFn func(writer io.Writer, offset int64, size int64) error) {
rangeReq := r.Header.Get("Range")
if rangeReq == "" {
w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10))
if err := writeFn(w, 0, totalSize); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
return
}
//the rest is dealing with partial content request
//mostly copy from src/pkg/net/http/fs.go
ranges, err := parseRange(rangeReq, totalSize)
if err != nil {
http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable)
return
}
if sumRangesSize(ranges) > totalSize {
// The total number of bytes in all the ranges
// is larger than the size of the file by
// itself, so this is probably an attack, or a
// dumb client. Ignore the range request.
return
}
if len(ranges) == 0 {
return
}
if len(ranges) == 1 {
// RFC 2616, Section 14.16:
// "When an HTTP message includes the content of a single
// range (for example, a response to a request for a
// single range, or to a request for a set of ranges
// that overlap without any holes), this content is
// transmitted with a Content-Range header, and a
// Content-Length header showing the number of bytes
// actually transferred.
// ...
// A response to a request for a single range MUST NOT
// be sent using the multipart/byteranges media type."
ra := ranges[0]
w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10))
w.Header().Set("Content-Range", ra.contentRange(totalSize))
w.WriteHeader(http.StatusPartialContent)
err = writeFn(w, ra.start, ra.length)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
return
}
// process multiple ranges
for _, ra := range ranges {
if ra.start > totalSize {
http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable)
return
}
}
sendSize := rangesMIMESize(ranges, mimeType, totalSize)
pr, pw := io.Pipe()
mw := multipart.NewWriter(pw)
w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
sendContent := pr
defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
go func() {
for _, ra := range ranges {
part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize))
if e != nil {
pw.CloseWithError(e)
return
}
if e = writeFn(part, ra.start, ra.length); e != nil {
pw.CloseWithError(e)
return
}
}
mw.Close()
pw.Close()
}()
if w.Header().Get("Content-Encoding") == "" {
w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
}
w.WriteHeader(http.StatusPartialContent)
if _, err := io.CopyN(w, sendContent, sendSize); err != nil {
http.Error(w, "Internal Error", http.StatusInternalServerError)
return
}
}

View file

@ -2,6 +2,7 @@ package weed_server
import (
"context"
"crypto/md5"
"encoding/json"
"errors"
"fmt"
@ -220,6 +221,8 @@ func (fs *FilerServer) uploadToVolumeServer(r *http.Request, u *url.URL, auth se
defer func() { stats.FilerRequestHistogram.WithLabelValues("postUpload").Observe(time.Since(start).Seconds()) }()
ret = &operation.UploadResult{}
hash := md5.New()
var body = ioutil.NopCloser(io.TeeReader(r.Body, hash))
request := &http.Request{
Method: r.Method,
@ -228,7 +231,7 @@ func (fs *FilerServer) uploadToVolumeServer(r *http.Request, u *url.URL, auth se
ProtoMajor: r.ProtoMajor,
ProtoMinor: r.ProtoMinor,
Header: r.Header,
Body: r.Body,
Body: body,
Host: r.Host,
ContentLength: r.ContentLength,
}
@ -247,7 +250,7 @@ func (fs *FilerServer) uploadToVolumeServer(r *http.Request, u *url.URL, auth se
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
}()
etag := resp.Header.Get("ETag")
respBody, raErr := ioutil.ReadAll(resp.Body)
if raErr != nil {
glog.V(0).Infoln("failing to upload to volume server", r.RequestURI, raErr.Error())
@ -255,6 +258,7 @@ func (fs *FilerServer) uploadToVolumeServer(r *http.Request, u *url.URL, auth se
err = raErr
return
}
glog.V(4).Infoln("post result", string(respBody))
unmarshalErr := json.Unmarshal(respBody, &ret)
if unmarshalErr != nil {
@ -282,9 +286,8 @@ func (fs *FilerServer) uploadToVolumeServer(r *http.Request, u *url.URL, auth se
return
}
}
if etag != "" {
ret.ETag = etag
}
// use filer calculated md5 ETag, instead of the volume server crc ETag
ret.ETag = fmt.Sprintf("%x", hash.Sum(nil))
return
}

View file

@ -102,8 +102,7 @@ func (fs *FilerServer) doAutoChunk(ctx context.Context, w http.ResponseWriter, r
}
// upload the chunk to the volume server
chunkName := fileName + "_chunk_" + strconv.FormatInt(int64(len(fileChunks)+1), 10)
uploadResult, uploadErr := fs.doUpload(urlLocation, w, r, limitedReader, chunkName, "", fileId, auth)
uploadResult, uploadErr := fs.doUpload(urlLocation, w, r, limitedReader, "", "", nil, auth)
if uploadErr != nil {
return nil, uploadErr
}
@ -175,8 +174,7 @@ func (fs *FilerServer) doAutoChunk(ctx context.Context, w http.ResponseWriter, r
return
}
func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request,
limitedReader io.Reader, fileName string, contentType string, fileId string, auth security.EncodedJwt) (*operation.UploadResult, error) {
func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error) {
stats.FilerRequestCounter.WithLabelValues("postAutoChunkUpload").Inc()
start := time.Now()
@ -184,5 +182,5 @@ func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *ht
stats.FilerRequestHistogram.WithLabelValues("postAutoChunkUpload").Observe(time.Since(start).Seconds())
}()
return operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, nil, auth)
return operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, pairMap, auth)
}

View file

@ -9,7 +9,7 @@ import (
"mime"
"net/http"
"net/url"
"path"
"path/filepath"
"strconv"
"strings"
"time"
@ -111,11 +111,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
w.WriteHeader(http.StatusNotModified)
return
}
if r.Header.Get("ETag-MD5") == "True" {
setEtag(w, n.MD5())
} else {
setEtag(w, n.Etag())
}
setEtag(w, n.Etag())
if n.HasPairs() {
pairMap := make(map[string]string)
@ -135,7 +131,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
if n.NameSize > 0 && filename == "" {
filename = string(n.Name)
if ext == "" {
ext = path.Ext(filename)
ext = filepath.Ext(filename)
}
}
mtype := ""
@ -179,7 +175,7 @@ func (vs *VolumeServer) tryHandleChunkedFile(n *needle.Needle, fileName string,
fileName = chunkManifest.Name
}
ext := path.Ext(fileName)
ext := filepath.Ext(fileName)
mType := ""
if chunkManifest.Mime != "" {
@ -226,28 +222,22 @@ func conditionallyResizeImages(originalDataReaderSeeker io.ReadSeeker, ext strin
func writeResponseContent(filename, mimeType string, rs io.ReadSeeker, w http.ResponseWriter, r *http.Request) error {
totalSize, e := rs.Seek(0, 2)
if mimeType == "" {
if ext := path.Ext(filename); ext != "" {
if ext := filepath.Ext(filename); ext != "" {
mimeType = mime.TypeByExtension(ext)
}
}
if mimeType != "" {
w.Header().Set("Content-Type", mimeType)
}
if filename != "" {
contentDisposition := "inline"
if r.FormValue("dl") != "" {
if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl {
contentDisposition = "attachment"
}
}
w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`)
}
w.Header().Set("Accept-Ranges", "bytes")
if r.Method == "HEAD" {
w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10))
return nil
}
adjustHeadersAfterHEAD(w, r, filename)
processRangeRequst(r, w, totalSize, mimeType, func(writer io.Writer, offset int64, size int64) error {
if _, e = rs.Seek(offset, 0); e != nil {
return e

View file

@ -1,11 +1,11 @@
package needle
import (
"crypto/md5"
"fmt"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/klauspost/crc32"
"github.com/chrislusf/seaweedfs/weed/util"
)
var table = crc32.MakeTable(crc32.Castagnoli)
@ -29,13 +29,3 @@ func (n *Needle) Etag() string {
util.Uint32toBytes(bits, uint32(n.Checksum))
return fmt.Sprintf("%x", bits)
}
func (n *Needle) MD5() string {
hash := md5.New()
hash.Write(n.Data)
return fmt.Sprintf("%x", hash.Sum(nil))
}