volume: proxy writes to remote volume server, with replication or not

the panic is triggered by uploading a file to a volume server not holding the designated replica.
2020-03-15 10:20:14.365488 I | http: panic serving 127.0.0.1:57124: runtime error: invalid memory address or nil pointer dereference
goroutine 119 [running]:
net/http.(*conn).serve.func1(0xc0001a8000)
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:1772 +0x139
panic(0x2316fe0, 0x3662900)
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/runtime/panic.go:973 +0x396
github.com/chrislusf/seaweedfs/weed/topology.getWritableRemoteReplications(0xc00009c000, 0x2, 0x7ffeefbffbd2, 0xe, 0x0, 0xa, 0x0, 0x0, 0xbb4bf1f7)
	/home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/topology/store_replicate.go:157 +0x53
github.com/chrislusf/seaweedfs/weed/topology.ReplicatedWrite(0x7ffeefbffbd2, 0xe, 0xc00009c000, 0xc000000002, 0xc000472750, 0xc0001b2200, 0x0, 0x1, 0x0)
	/home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/topology/store_replicate.go:29 +0xc7
github.com/chrislusf/seaweedfs/weed/server.(*VolumeServer).PostHandler(0xc0001513f0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200)
	/home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/server/volume_server_handlers_write.go:52 +0x56f
github.com/chrislusf/seaweedfs/weed/server.(*VolumeServer).privateStoreHandler(0xc0001513f0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200)
	/home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/server/volume_server_handlers.go:37 +0x21f
net/http.HandlerFunc.ServeHTTP(0xc0004420e0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200)
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2012 +0x44
net/http.(*ServeMux).ServeHTTP(0xc0001fc800, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200)
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2387 +0x1a5
net/http.serverHandler.ServeHTTP(0xc0001781c0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200)
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2807 +0xa3
net/http.(*conn).serve(0xc0001a8000, 0x2934420, 0xc000212400)
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:1895 +0x86c
created by net/http.(*Server).Serve
	/home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2933 +0x35c
Eg:
server A (datacenter 1) and server B (datacenter 2) hold replica (100) for volume 1.
If you upload a file with a key 1,xxxxx to server C (datacenter 3) will trigger the panic on server C.
The server C should either proxy upload file to the correct volume server or should return an HTTP error code and not panic.
This commit is contained in:
Chris Lu 2020-03-15 02:50:42 -07:00
parent 7b37178716
commit 7edbee6f57

View file

@ -154,18 +154,15 @@ func distributedOperation(locations []operation.Location, store *storage.Store,
func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, masterNode string) (
remoteLocations []operation.Location, err error) {
v := s.GetVolume(volumeId)
if v == nil {
return nil, fmt.Errorf("fail to find volume %d", volumeId)
}
copyCount := v.ReplicaPlacement.GetCopyCount()
if copyCount > 1 {
if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil {
if len(lookupResult.Locations) < copyCount {
err = fmt.Errorf("replicating opetations [%d] is less than volume %d replication copy count [%d]",
len(lookupResult.Locations), volumeId, copyCount)
if v != nil && v.ReplicaPlacement.GetCopyCount() == 1 {
return
}
// not on local store, or has replications
lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String())
if lookupErr == nil {
selfUrl := s.Ip + ":" + strconv.Itoa(s.Port)
for _, location := range lookupResult.Locations {
if location.Url != selfUrl {
@ -176,6 +173,14 @@ func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, m
err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr)
return
}
if v != nil {
// has one local and has remote replications
copyCount := v.ReplicaPlacement.GetCopyCount()
if len(lookupResult.Locations) < copyCount {
err = fmt.Errorf("replicating opetations [%d] is less than volume %d replication copy count [%d]",
len(lookupResult.Locations), volumeId, copyCount)
}
}
return