ec encode distribute ec data and parity shards evenly

This commit is contained in:
Chris Lu 2019-12-24 16:52:21 -08:00
parent 9ff72f616a
commit 3ebeae0c0b
3 changed files with 36 additions and 27 deletions

View file

@ -22,7 +22,7 @@ func moveMountedShardToEcNode(ctx context.Context, commandEnv *CommandEnv, exist
if applyBalancing { if applyBalancing {
// ask destination node to copy shard and the ecx file from source node, and mount it // ask destination node to copy shard and the ecx file from source node, and mount it
copiedShardIds, err = oneServerCopyAndMountEcShardsFromSource(ctx, commandEnv.option.GrpcDialOption, destinationEcNode, uint32(shardId), 1, vid, collection, existingLocation.info.Id) copiedShardIds, err = oneServerCopyAndMountEcShardsFromSource(ctx, commandEnv.option.GrpcDialOption, destinationEcNode, []uint32{uint32(shardId)}, vid, collection, existingLocation.info.Id)
if err != nil { if err != nil {
return err return err
} }
@ -51,13 +51,9 @@ func moveMountedShardToEcNode(ctx context.Context, commandEnv *CommandEnv, exist
} }
func oneServerCopyAndMountEcShardsFromSource(ctx context.Context, grpcDialOption grpc.DialOption, func oneServerCopyAndMountEcShardsFromSource(ctx context.Context, grpcDialOption grpc.DialOption,
targetServer *EcNode, startFromShardId uint32, shardCount int, targetServer *EcNode, shardIdsToCopy []uint32,
volumeId needle.VolumeId, collection string, existingLocation string) (copiedShardIds []uint32, err error) { volumeId needle.VolumeId, collection string, existingLocation string) (copiedShardIds []uint32, err error) {
var shardIdsToCopy []uint32
for shardId := startFromShardId; shardId < startFromShardId+uint32(shardCount); shardId++ {
shardIdsToCopy = append(shardIdsToCopy, shardId)
}
fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id) fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
err = operation.WithVolumeServerClient(targetServer.info.Id, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { err = operation.WithVolumeServerClient(targetServer.info.Id, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {

View file

@ -93,6 +93,8 @@ func doEcEncode(ctx context.Context, commandEnv *CommandEnv, collection string,
return fmt.Errorf("volume %d not found", vid) return fmt.Errorf("volume %d not found", vid)
} }
// fmt.Printf("found ec %d shards on %v\n", vid, locations)
// mark the volume as readonly // mark the volume as readonly
err = markVolumeReadonly(ctx, commandEnv.option.GrpcDialOption, needle.VolumeId(vid), locations) err = markVolumeReadonly(ctx, commandEnv.option.GrpcDialOption, needle.VolumeId(vid), locations)
if err != nil { if err != nil {
@ -164,10 +166,10 @@ func spreadEcShards(ctx context.Context, commandEnv *CommandEnv, volumeId needle
} }
// calculate how many shards to allocate for these servers // calculate how many shards to allocate for these servers
allocated := balancedEcDistribution(allocatedDataNodes) allocatedEcIds := balancedEcDistribution(allocatedDataNodes)
// ask the data nodes to copy from the source volume server // ask the data nodes to copy from the source volume server
copiedShardIds, err := parallelCopyEcShardsFromSource(ctx, commandEnv.option.GrpcDialOption, allocatedDataNodes, allocated, volumeId, collection, existingLocations[0]) copiedShardIds, err := parallelCopyEcShardsFromSource(ctx, commandEnv.option.GrpcDialOption, allocatedDataNodes, allocatedEcIds, volumeId, collection, existingLocations[0])
if err != nil { if err != nil {
return err return err
} }
@ -197,31 +199,29 @@ func spreadEcShards(ctx context.Context, commandEnv *CommandEnv, volumeId needle
} }
func parallelCopyEcShardsFromSource(ctx context.Context, grpcDialOption grpc.DialOption, func parallelCopyEcShardsFromSource(ctx context.Context, grpcDialOption grpc.DialOption,
targetServers []*EcNode, allocated []int, targetServers []*EcNode, allocatedEcIds [][]uint32,
volumeId needle.VolumeId, collection string, existingLocation wdclient.Location) (actuallyCopied []uint32, err error) { volumeId needle.VolumeId, collection string, existingLocation wdclient.Location) (actuallyCopied []uint32, err error) {
// parallelize // parallelize
shardIdChan := make(chan []uint32, len(targetServers)) shardIdChan := make(chan []uint32, len(targetServers))
var wg sync.WaitGroup var wg sync.WaitGroup
startFromShardId := uint32(0)
for i, server := range targetServers { for i, server := range targetServers {
if allocated[i] <= 0 { if len(allocatedEcIds[i]) <= 0 {
continue continue
} }
wg.Add(1) wg.Add(1)
go func(server *EcNode, startFromShardId uint32, shardCount int) { go func(server *EcNode, allocatedEcShardIds []uint32) {
defer wg.Done() defer wg.Done()
copiedShardIds, copyErr := oneServerCopyAndMountEcShardsFromSource(ctx, grpcDialOption, server, copiedShardIds, copyErr := oneServerCopyAndMountEcShardsFromSource(ctx, grpcDialOption, server,
startFromShardId, shardCount, volumeId, collection, existingLocation.Url) allocatedEcShardIds, volumeId, collection, existingLocation.Url)
if copyErr != nil { if copyErr != nil {
err = copyErr err = copyErr
} else { } else {
shardIdChan <- copiedShardIds shardIdChan <- copiedShardIds
server.addEcVolumeShards(volumeId, collection, copiedShardIds) server.addEcVolumeShards(volumeId, collection, copiedShardIds)
} }
}(server, startFromShardId, allocated[i]) }(server, allocatedEcIds[i])
startFromShardId += uint32(allocated[i])
} }
wg.Wait() wg.Wait()
close(shardIdChan) close(shardIdChan)
@ -237,18 +237,18 @@ func parallelCopyEcShardsFromSource(ctx context.Context, grpcDialOption grpc.Dia
return return
} }
func balancedEcDistribution(servers []*EcNode) (allocated []int) { func balancedEcDistribution(servers []*EcNode) (allocated [][]uint32) {
allocated = make([]int, len(servers)) allocated = make([][]uint32, len(servers))
allocatedCount := 0 allocatedShardIdIndex := uint32(0)
for allocatedCount < erasure_coding.TotalShardsCount { serverIndex := 0
for i, server := range servers { for allocatedShardIdIndex < erasure_coding.TotalShardsCount {
if server.freeEcSlot-allocated[i] > 0 { if servers[serverIndex].freeEcSlot > 0 {
allocated[i] += 1 allocated[serverIndex] = append(allocated[serverIndex], allocatedShardIdIndex)
allocatedCount += 1 allocatedShardIdIndex++
}
if allocatedCount >= erasure_coding.TotalShardsCount {
break
} }
serverIndex++
if serverIndex >= len(servers) {
serverIndex = 0
} }
} }

View file

@ -2,12 +2,25 @@ package shell
import ( import (
"context" "context"
"fmt"
"testing" "testing"
"github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
) )
func TestCommandEcDistribution(t *testing.T) {
allEcNodes := []*EcNode{
newEcNode("dc1", "rack1", "dn1", 100),
newEcNode("dc1", "rack2", "dn2", 100),
}
allocated := balancedEcDistribution(allEcNodes)
fmt.Printf("allocated: %+v", allocated)
}
func TestCommandEcBalanceSmall(t *testing.T) { func TestCommandEcBalanceSmall(t *testing.T) {
allEcNodes := []*EcNode{ allEcNodes := []*EcNode{