seaweedfs/weed/shell/command_volume_tier_move.go

319 lines
10 KiB
Go
Raw Permalink Normal View History

2021-02-16 10:47:02 +00:00
package shell
import (
2022-09-15 06:06:44 +00:00
"context"
"errors"
2021-02-16 10:47:02 +00:00
"flag"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
2021-02-16 10:47:02 +00:00
"io"
"path/filepath"
2021-08-10 09:50:28 +00:00
"sync"
2021-02-16 10:47:02 +00:00
"time"
"github.com/seaweedfs/seaweedfs/weed/operation"
2022-09-15 06:06:44 +00:00
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
2021-02-16 10:47:02 +00:00
)
func init() {
Commands = append(Commands, &commandVolumeTierMove{})
}
2022-01-26 15:22:31 +00:00
type volumeTierMoveJob struct {
src pb.ServerAddress
vid needle.VolumeId
}
2021-02-16 10:47:02 +00:00
type commandVolumeTierMove struct {
2022-01-26 15:22:31 +00:00
activeServers sync.Map
queues map[pb.ServerAddress]chan volumeTierMoveJob
2022-01-26 15:22:31 +00:00
//activeServers map[pb.ServerAddress]struct{}
//activeServersLock sync.Mutex
//activeServersCond *sync.Cond
2021-02-16 10:47:02 +00:00
}
func (c *commandVolumeTierMove) Name() string {
2021-02-19 11:39:19 +00:00
return "volume.tier.move"
2021-02-16 10:47:02 +00:00
}
func (c *commandVolumeTierMove) Help() string {
2021-02-22 09:30:07 +00:00
return `change a volume from one disk type to another
2021-02-16 10:47:02 +00:00
volume.tier.move -fromDiskType=hdd -toDiskType=ssd [-collectionPattern=""] [-fullPercent=95] [-quietFor=1h] [-parallelLimit=4] [-toReplication=XYZ]
2021-02-22 08:28:42 +00:00
Even if the volume is replicated, only one replica will be changed and the rest replicas will be dropped.
So "volume.fix.replication" and "volume.balance" should be followed.
2021-02-16 10:47:02 +00:00
`
}
func (c *commandVolumeTierMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
collectionPattern := tierCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'")
2021-02-16 10:47:02 +00:00
fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size")
quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period")
source := tierCommand.String("fromDiskType", "", "the source disk type")
target := tierCommand.String("toDiskType", "", "the target disk type")
parallelLimit := tierCommand.Int("parallelLimit", 0, "limit the number of parallel copying jobs")
2021-02-22 09:30:07 +00:00
applyChange := tierCommand.Bool("force", false, "actually apply the changes")
ioBytePerSecond := tierCommand.Int64("ioBytePerSecond", 0, "limit the speed of move")
2022-09-15 06:06:44 +00:00
replicationString := tierCommand.String("toReplication", "", "the new target replication setting")
2021-02-16 10:47:02 +00:00
if err = tierCommand.Parse(args); err != nil {
return nil
}
2022-05-31 21:48:46 +00:00
infoAboutSimulationMode(writer, *applyChange, "-force")
2021-02-16 10:47:02 +00:00
2021-12-10 21:24:38 +00:00
if err = commandEnv.confirmIsLocked(args); err != nil {
return
}
2021-02-22 08:28:42 +00:00
fromDiskType := types.ToDiskType(*source)
toDiskType := types.ToDiskType(*target)
2021-02-16 10:47:02 +00:00
2021-02-22 08:28:42 +00:00
if fromDiskType == toDiskType {
return fmt.Errorf("source tier %s is the same as target tier %s", fromDiskType, toDiskType)
}
2021-02-16 10:47:02 +00:00
2021-02-22 08:28:42 +00:00
// collect topology information
topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
2021-02-22 08:28:42 +00:00
if err != nil {
return err
2021-02-16 10:47:02 +00:00
}
2021-02-22 09:30:07 +00:00
// collect all volumes that should change
volumeIds, err := collectVolumeIdsForTierChange(commandEnv, topologyInfo, volumeSizeLimitMb, fromDiskType, *collectionPattern, *fullPercentage, *quietPeriod)
2021-02-16 10:47:02 +00:00
if err != nil {
return err
}
fmt.Printf("tier move volumes: %v\n", volumeIds)
2021-02-22 09:30:07 +00:00
_, allLocations := collectVolumeReplicaLocations(topologyInfo)
2022-01-26 15:22:31 +00:00
allLocations = filterLocationsByDiskType(allLocations, toDiskType)
keepDataNodesSorted(allLocations, toDiskType)
if len(allLocations) > 0 && *parallelLimit > 0 && *parallelLimit < len(allLocations) {
allLocations = allLocations[:*parallelLimit]
2022-01-26 15:22:31 +00:00
}
wg := sync.WaitGroup{}
bufferLen := len(allLocations)
c.queues = make(map[pb.ServerAddress]chan volumeTierMoveJob)
2022-01-26 15:22:31 +00:00
for _, dst := range allLocations {
destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
c.queues[destServerAddress] = make(chan volumeTierMoveJob, bufferLen)
wg.Add(1)
go func(dst location, jobs <-chan volumeTierMoveJob, applyChanges bool) {
defer wg.Done()
2022-01-26 15:22:31 +00:00
for job := range jobs {
fmt.Fprintf(writer, "moving volume %d from %s to %s with disk type %s ...\n", job.vid, job.src, dst.dataNode.Id, toDiskType.ReadableString())
locations, found := commandEnv.MasterClient.GetLocations(uint32(job.vid))
if !found {
fmt.Printf("volume %d not found", job.vid)
continue
}
unlock := c.Lock(job.src)
if applyChanges {
if err := c.doMoveOneVolume(commandEnv, writer, job.vid, toDiskType, locations, job.src, dst, *ioBytePerSecond, replicationString); err != nil {
fmt.Fprintf(writer, "move volume %d %s => %s: %v\n", job.vid, job.src, dst.dataNode.Id, err)
}
2022-01-26 15:22:31 +00:00
}
unlock()
}
}(dst, c.queues[destServerAddress], *applyChange)
}
2021-02-22 09:30:07 +00:00
for _, vid := range volumeIds {
2022-01-26 15:22:31 +00:00
if err = c.doVolumeTierMove(commandEnv, writer, vid, toDiskType, allLocations); err != nil {
2021-02-22 09:30:07 +00:00
fmt.Printf("tier move volume %d: %v\n", vid, err)
}
2022-01-26 15:22:31 +00:00
allLocations = rotateDataNodes(allLocations)
2021-02-22 09:30:07 +00:00
}
2022-01-26 15:22:31 +00:00
for key, _ := range c.queues {
close(c.queues[key])
}
wg.Wait()
2021-02-22 09:30:07 +00:00
return nil
}
2022-01-26 15:22:31 +00:00
func (c *commandVolumeTierMove) Lock(key pb.ServerAddress) func() {
value, _ := c.activeServers.LoadOrStore(key, &sync.Mutex{})
mtx := value.(*sync.Mutex)
mtx.Lock()
return func() { mtx.Unlock() }
}
func filterLocationsByDiskType(dataNodes []location, diskType types.DiskType) (ret []location) {
for _, loc := range dataNodes {
_, found := loc.dataNode.DiskInfos[string(diskType)]
if found {
ret = append(ret, loc)
}
}
return
}
func rotateDataNodes(dataNodes []location) []location {
if len(dataNodes) > 0 {
return append(dataNodes[1:], dataNodes[0])
} else {
return dataNodes
}
}
func isOneOf(server string, locations []wdclient.Location) bool {
for _, loc := range locations {
if server == loc.Url {
return true
}
}
return false
}
2022-01-26 15:22:31 +00:00
func (c *commandVolumeTierMove) doVolumeTierMove(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, allLocations []location) (err error) {
2021-02-22 09:30:07 +00:00
// find volume location
locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
if !found {
return fmt.Errorf("volume %d not found", vid)
}
// find one server with the most empty volume slots with target disk type
hasFoundTarget := false
fn := capacityByFreeVolumeCount(toDiskType)
for _, dst := range allLocations {
if fn(dst.dataNode) > 0 && !hasFoundTarget {
2021-02-22 09:30:07 +00:00
// ask the volume server to replicate the volume
if isOneOf(dst.dataNode.Id, locations) {
continue
}
var sourceVolumeServer pb.ServerAddress
2021-02-22 09:30:07 +00:00
for _, loc := range locations {
if loc.Url != dst.dataNode.Id {
sourceVolumeServer = loc.ServerAddress()
2021-02-22 09:30:07 +00:00
}
}
if sourceVolumeServer == "" {
continue
}
hasFoundTarget = true
2022-01-26 15:22:31 +00:00
// adjust volume count
dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
2021-02-22 09:30:07 +00:00
destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
2022-01-26 15:22:31 +00:00
c.queues[destServerAddress] <- volumeTierMoveJob{sourceVolumeServer, vid}
2021-02-22 09:30:07 +00:00
}
}
if !hasFoundTarget {
fmt.Fprintf(writer, "can not find disk type %s for volume %d\n", toDiskType.ReadableString(), vid)
2021-02-22 09:30:07 +00:00
}
2021-02-16 10:47:02 +00:00
return nil
}
2022-09-15 06:06:44 +00:00
func (c *commandVolumeTierMove) doMoveOneVolume(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, locations []wdclient.Location, sourceVolumeServer pb.ServerAddress, dst location, ioBytePerSecond int64, replicationString *string) (err error) {
2021-08-10 09:50:28 +00:00
if !commandEnv.isLocked() {
return fmt.Errorf("lock is lost")
}
2021-08-10 09:50:28 +00:00
// mark all replicas as read only
if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, false); err != nil {
return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
}
newAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
if err = LiveMoveVolume(commandEnv.option.GrpcDialOption, writer, vid, sourceVolumeServer, newAddress, 5*time.Second, toDiskType.ReadableString(), ioBytePerSecond, true); err != nil {
// mark all replicas as writable
if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, true); err != nil {
glog.Errorf("mark volume %d as writable on %s: %v", vid, locations[0].Url, err)
}
2021-08-10 09:50:28 +00:00
return fmt.Errorf("move volume %d %s => %s : %v", vid, locations[0].Url, dst.dataNode.Id, err)
}
2021-08-10 09:50:28 +00:00
2022-09-15 06:06:44 +00:00
// If move is successful and replication is not empty, alter moved volume's replication setting
if *replicationString != "" {
err = operation.WithVolumeServerClient(false, newAddress, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
resp, configureErr := volumeServerClient.VolumeConfigure(context.Background(), &volume_server_pb.VolumeConfigureRequest{
VolumeId: uint32(vid),
Replication: *replicationString,
})
if configureErr != nil {
return configureErr
}
if resp.Error != "" {
return errors.New(resp.Error)
}
return nil
})
if err != nil {
2022-09-15 06:06:44 +00:00
glog.Errorf("update volume %d replication on %s: %v", vid, locations[0].Url, err)
}
2022-09-15 06:06:44 +00:00
}
2021-08-10 09:50:28 +00:00
// remove the remaining replicas
for _, loc := range locations {
if loc.Url != dst.dataNode.Id && loc.ServerAddress() != sourceVolumeServer {
if err = deleteVolume(commandEnv.option.GrpcDialOption, vid, loc.ServerAddress()); err != nil {
fmt.Fprintf(writer, "failed to delete volume %d on %s: %v\n", vid, loc.Url, err)
2021-08-10 09:50:28 +00:00
}
// reduce volume count? Not really necessary since they are "more" full and will not be a candidate to move to
2021-08-10 09:50:28 +00:00
}
}
return nil
}
func collectVolumeIdsForTierChange(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, sourceTier types.DiskType, collectionPattern string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) {
2021-02-16 10:47:02 +00:00
quietSeconds := int64(quietPeriod / time.Second)
nowUnixSeconds := time.Now().Unix()
fmt.Printf("collect %s volumes quiet for: %d seconds\n", sourceTier, quietSeconds)
vidMap := make(map[uint32]bool)
2021-02-22 08:28:42 +00:00
eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
2021-02-16 10:47:02 +00:00
for _, diskInfo := range dn.DiskInfos {
for _, v := range diskInfo.VolumeInfos {
// check collection name pattern
if collectionPattern != "" {
matched, err := filepath.Match(collectionPattern, v.Collection)
if err != nil {
return
}
if !matched {
continue
}
}
if v.ModifiedAtSecond+quietSeconds < nowUnixSeconds && types.ToDiskType(v.DiskType) == sourceTier {
2021-02-22 08:28:42 +00:00
if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 {
2021-02-16 10:47:02 +00:00
vidMap[v.Id] = true
}
}
}
}
})
for vid := range vidMap {
vids = append(vids, needle.VolumeId(vid))
}
return
}