Merge pull request #1149 from song-zhang/improve-replications-consistency

improve data consistency when replication write
This commit is contained in:
Chris Lu 2019-12-09 20:13:03 -08:00 committed by GitHub
commit 3727d2488f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -25,21 +25,24 @@ func ReplicatedWrite(masterNode string, s *storage.Store,
//check JWT //check JWT
jwt := security.GetJwt(r) jwt := security.GetJwt(r)
var remoteLocations []operation.Location
if r.FormValue("type") != "replicate" {
remoteLocations, err = getWritableRemoteReplications(s, volumeId, masterNode)
if err != nil {
glog.V(0).Infoln(err)
return
}
}
size, isUnchanged, err = s.WriteVolumeNeedle(volumeId, n) size, isUnchanged, err = s.WriteVolumeNeedle(volumeId, n)
if err != nil { if err != nil {
err = fmt.Errorf("failed to write to local disk: %v", err) err = fmt.Errorf("failed to write to local disk: %v", err)
glog.V(0).Infoln(err)
return return
} }
needToReplicate := !s.HasVolume(volumeId) if len(remoteLocations) > 0 { //send to other replica locations
needToReplicate = needToReplicate || s.GetVolume(volumeId).NeedToReplicate() if err = distributedOperation(remoteLocations, s, func(location operation.Location) error {
if !needToReplicate {
needToReplicate = s.GetVolume(volumeId).NeedToReplicate()
}
if needToReplicate { //send to other replica locations
if r.FormValue("type") != "replicate" {
if err = distributedOperation(masterNode, s, volumeId, func(location operation.Location) error {
u := url.URL{ u := url.URL{
Scheme: "http", Scheme: "http",
Host: location.Url, Host: location.Url,
@ -76,7 +79,7 @@ func ReplicatedWrite(masterNode string, s *storage.Store,
}); err != nil { }); err != nil {
size = 0 size = 0
err = fmt.Errorf("failed to write to replicas for volume %d: %v", volumeId, err) err = fmt.Errorf("failed to write to replicas for volume %d: %v", volumeId, err)
} glog.V(0).Infoln(err)
} }
} }
return return
@ -84,31 +87,34 @@ func ReplicatedWrite(masterNode string, s *storage.Store,
func ReplicatedDelete(masterNode string, store *storage.Store, func ReplicatedDelete(masterNode string, store *storage.Store,
volumeId needle.VolumeId, n *needle.Needle, volumeId needle.VolumeId, n *needle.Needle,
r *http.Request) (uint32, error) { r *http.Request) (size uint32, err error) {
//check JWT //check JWT
jwt := security.GetJwt(r) jwt := security.GetJwt(r)
ret, err := store.DeleteVolumeNeedle(volumeId, n) var remoteLocations []operation.Location
if r.FormValue("type") != "replicate" {
remoteLocations, err = getWritableRemoteReplications(store, volumeId, masterNode)
if err != nil { if err != nil {
glog.V(0).Infoln("delete error:", err) glog.V(0).Infoln(err)
return ret, err return
}
} }
needToReplicate := !store.HasVolume(volumeId) size, err = store.DeleteVolumeNeedle(volumeId, n)
if !needToReplicate && ret > 0 { if err != nil {
needToReplicate = store.GetVolume(volumeId).NeedToReplicate() glog.V(0).Infoln("delete error:", err)
return
} }
if needToReplicate { //send to other replica locations
if r.FormValue("type") != "replicate" { if len(remoteLocations) > 0 { //send to other replica locations
if err = distributedOperation(masterNode, store, volumeId, func(location operation.Location) error { if err = distributedOperation(remoteLocations, store, func(location operation.Location) error {
return util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", string(jwt)) return util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", string(jwt))
}); err != nil { }); err != nil {
ret = 0 size = 0
} }
} }
} return
return ret, err
} }
type DistributedOperationResult map[string]error type DistributedOperationResult map[string]error
@ -131,32 +137,44 @@ type RemoteResult struct {
Error error Error error
} }
func distributedOperation(masterNode string, store *storage.Store, volumeId needle.VolumeId, op func(location operation.Location) error) error { func distributedOperation(locations []operation.Location, store *storage.Store, op func(location operation.Location) error) error {
if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil { length := len(locations)
length := 0
selfUrl := (store.Ip + ":" + strconv.Itoa(store.Port))
results := make(chan RemoteResult) results := make(chan RemoteResult)
for _, location := range lookupResult.Locations { for _, location := range locations {
if location.Url != selfUrl {
length++
go func(location operation.Location, results chan RemoteResult) { go func(location operation.Location, results chan RemoteResult) {
results <- RemoteResult{location.Url, op(location)} results <- RemoteResult{location.Url, op(location)}
}(location, results) }(location, results)
} }
}
ret := DistributedOperationResult(make(map[string]error)) ret := DistributedOperationResult(make(map[string]error))
for i := 0; i < length; i++ { for i := 0; i < length; i++ {
result := <-results result := <-results
ret[result.Host] = result.Error ret[result.Host] = result.Error
} }
if volume := store.GetVolume(volumeId); volume != nil {
if length+1 < volume.ReplicaPlacement.GetCopyCount() {
return fmt.Errorf("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount())
}
}
return ret.Error() return ret.Error()
}
func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, masterNode string) (
remoteLocations []operation.Location, err error) {
copyCount := s.GetVolume(volumeId).ReplicaPlacement.GetCopyCount()
if copyCount > 1 {
if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil {
if len(lookupResult.Locations) < copyCount {
err = fmt.Errorf("replicating opetations [%d] is less than volume's replication copy count [%d]",
len(lookupResult.Locations), copyCount)
return
}
selfUrl := s.Ip + ":" + strconv.Itoa(s.Port)
for _, location := range lookupResult.Locations {
if location.Url != selfUrl {
remoteLocations = append(remoteLocations, location)
}
}
} else { } else {
glog.V(0).Infoln() err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr)
return fmt.Errorf("Failed to lookup for %d: %v", volumeId, lookupErr) return
} }
} }
return
}