From 4236c3659977dc38e42e64fc1fe74999b2e8d693 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 11 Jul 2022 16:58:15 +0500 Subject: [PATCH 01/25] volume server evacuate to target server --- weed/shell/command_volume_server_evacuate.go | 29 ++++++++++++++------ 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index ffbee0302..f2c24a8b4 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -47,7 +47,7 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") - c.targetServer = *vsEvacuateCommand.String("target", "", ": of target volume") + targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") retryCount := vsEvacuateCommand.Int("retry", 0, "how many times to retry") @@ -63,6 +63,9 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, if *volumeServer == "" { return fmt.Errorf("need to specify volume server by -node=:") } + if *targetServer != "" { + c.targetServer = *targetServer + } for i := 0; i < *retryCount+1; i++ { if err = c.volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer); err == nil { return nil @@ -103,14 +106,27 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE if thisNode == nil { return fmt.Errorf("%s is not found in this cluster", volumeServer) } + if c.targetServer != "" { + targetServerFound := false + for _, otherNode := range otherNodes { + if otherNode.info.Id == c.targetServer { + otherNodes = []*Node{otherNode} + targetServerFound = true + break + } + } + if !targetServerFound { + return fmt.Errorf("target %s is not found in this cluster", c.targetServer) + } + } // move away normal volumes volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) for _, diskInfo := range thisNode.info.DiskInfos { for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { - return fmt.Errorf("move away volume %d from %s: %v", vol.Id, volumeServer, err) + fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } if !hasMoved { if skipNonMoveable { @@ -138,7 +154,7 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, for _, ecShardInfo := range diskInfo.EcShardInfos { hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) if err != nil { - return fmt.Errorf("move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) + fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) } if !hasMoved { if skipNonMoveable { @@ -160,9 +176,6 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv }) for i := 0; i < len(otherNodes); i++ { emptyNode := otherNodes[i] - if c.targetServer != "" && c.targetServer != emptyNode.info.Id { - continue - } collectionPrefix := "" if ecShardInfo.Collection != "" { collectionPrefix = ecShardInfo.Collection + "_" @@ -184,7 +197,7 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv return } -func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { +func (c *commandVolumeServerEvacuate) moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { fn := capacityByFreeVolumeCount(types.ToDiskType(vol.DiskType)) for _, n := range otherNodes { n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { From 087fa1347f6348b829b95a03877367667cea5de8 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 11:33:08 +0500 Subject: [PATCH 02/25] volume server evacuate from rack --- weed/shell/command_volume_server_evacuate.go | 103 ++++++++++--------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index f2c24a8b4..37fb29b14 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -19,6 +19,7 @@ func init() { type commandVolumeServerEvacuate struct { targetServer string + volumeRack string } func (c *commandVolumeServerEvacuate) Name() string { @@ -47,6 +48,7 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") + volumeRack := vsEvacuateCommand.String("rack", "", "rack for then volume servers") targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") @@ -66,6 +68,9 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, if *targetServer != "" { c.targetServer = *targetServer } + if *volumeRack != "" { + c.volumeRack = *volumeRack + } for i := 0; i < *retryCount+1; i++ { if err = c.volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer); err == nil { return nil @@ -102,41 +107,31 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server volumeServers := collectVolumeServersByDc(topologyInfo, "") - thisNode, otherNodes := nodesOtherThan(volumeServers, volumeServer) - if thisNode == nil { + thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) + if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } - if c.targetServer != "" { - targetServerFound := false - for _, otherNode := range otherNodes { - if otherNode.info.Id == c.targetServer { - otherNodes = []*Node{otherNode} - targetServerFound = true - break - } - } - if !targetServerFound { - return fmt.Errorf("target %s is not found in this cluster", c.targetServer) - } - } // move away normal volumes - volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) - for _, diskInfo := range thisNode.info.DiskInfos { - for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) - if err != nil { - fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) - } - if !hasMoved { - if skipNonMoveable { - replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) - fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) - } else { - return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + for _, thisNode := range thisNodes { + for _, diskInfo := range thisNode.info.DiskInfos { + volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) + for _, vol := range diskInfo.VolumeInfos { + hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + if err != nil { + fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) + fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) + } else { + return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + } } } } + } return nil } @@ -144,23 +139,25 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this ec volume server ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") - thisNode, otherNodes := ecNodesOtherThan(ecNodes, volumeServer) - if thisNode == nil { + thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) + if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster\n", volumeServer) } // move away ec volumes - for _, diskInfo := range thisNode.info.DiskInfos { - for _, ecShardInfo := range diskInfo.EcShardInfos { - hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) - if err != nil { - fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) - } - if !hasMoved { - if skipNonMoveable { - fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) - } else { - return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + for _, thisNode := range thisNodes { + for _, diskInfo := range thisNode.info.DiskInfos { + for _, ecShardInfo := range diskInfo.EcShardInfos { + hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) + if err != nil { + fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) + } else { + return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + } } } } @@ -220,10 +217,16 @@ func (c *commandVolumeServerEvacuate) moveAwayOneNormalVolume(commandEnv *Comman return } -func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, otherNodes []*Node) { +func (c *commandVolumeServerEvacuate) nodesOtherThan(volumeServers []*Node, thisServer string) (thisNodes []*Node, otherNodes []*Node) { for _, node := range volumeServers { - if node.info.Id == thisServer { - thisNode = node + if node.info.Id == thisServer || (c.volumeRack != "" && node.rack == c.volumeRack) { + thisNodes = append(thisNodes, node) + continue + } + if c.volumeRack != "" && c.volumeRack == node.rack { + continue + } + if c.targetServer != "" && c.targetServer != node.info.Id { continue } otherNodes = append(otherNodes, node) @@ -231,10 +234,16 @@ func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, o return } -func ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNode *EcNode, otherNodes []*EcNode) { +func (c *commandVolumeServerEvacuate) ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNodes []*EcNode, otherNodes []*EcNode) { for _, node := range volumeServers { - if node.info.Id == thisServer { - thisNode = node + if node.info.Id == thisServer || (c.volumeRack != "" && string(node.rack) == c.volumeRack) { + thisNodes = append(thisNodes, node) + continue + } + if c.volumeRack != "" && c.volumeRack == string(node.rack) { + continue + } + if c.targetServer != "" && c.targetServer != node.info.Id { continue } otherNodes = append(otherNodes, node) From ee95d23a22d55a12662c1ebc8e2292b61f505bb0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 11:56:58 +0500 Subject: [PATCH 03/25] help rack --- weed/shell/command_volume_server_evacuate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 37fb29b14..dad8d8626 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -48,7 +48,7 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") - volumeRack := vsEvacuateCommand.String("rack", "", "rack for then volume servers") + volumeRack := vsEvacuateCommand.String("rack", "", "source rack for the volume servers") targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") From 8372721a62f2809ce99aaaf31f4bad5bbf2d99b1 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 13:47:21 +0500 Subject: [PATCH 04/25] update topologyInfo --- weed/shell/command_volume_server_evacuate.go | 32 +++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index dad8d8626..195cc2699 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -11,6 +11,7 @@ import ( "golang.org/x/exp/slices" "io" "os" + "time" ) func init() { @@ -18,6 +19,7 @@ func init() { } type commandVolumeServerEvacuate struct { + topologyInfo *master_pb.TopologyInfo targetServer string volumeRack string } @@ -58,12 +60,12 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, } infoAboutSimulationMode(writer, *applyChange, "-force") - if err = commandEnv.confirmIsLocked(args); err != nil { + if err = commandEnv.confirmIsLocked(args); err != nil && *applyChange { return } - if *volumeServer == "" { - return fmt.Errorf("need to specify volume server by -node=:") + if *volumeServer == "" && *volumeRack == "" { + return fmt.Errorf("need to specify volume server by -node=: or source rack") } if *targetServer != "" { c.targetServer = *targetServer @@ -88,25 +90,33 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn // list all the volumes // collect topology information - topologyInfo, _, err := collectTopologyInfo(commandEnv, 0) + c.topologyInfo, _, err = collectTopologyInfo(commandEnv, 0) if err != nil { return err } - if err := c.evacuateNormalVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + go func() { + for { + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 5*time.Minute); err != nil { + c.topologyInfo = topologyInfo + } + } + }() + + if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } - if err := c.evacuateEcVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + if err := c.evacuateEcVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } return nil } -func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { +func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server - volumeServers := collectVolumeServersByDc(topologyInfo, "") + volumeServers := collectVolumeServersByDc(c.topologyInfo, "") thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) @@ -115,7 +125,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE // move away normal volumes for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { - volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) + volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { @@ -136,9 +146,9 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE return nil } -func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { +func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this ec volume server - ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") + ecNodes, _ := collectEcVolumeServersByDc(c.topologyInfo, "") thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster\n", volumeServer) From 6622240df70c04b381b7f6daaf9dd07494b10701 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 14:56:34 +0500 Subject: [PATCH 05/25] fix TestVolumeServerEvacuate --- weed/shell/command_volume_server_evacuate_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate_test.go b/weed/shell/command_volume_server_evacuate_test.go index 2cdb94a60..4563f38ba 100644 --- a/weed/shell/command_volume_server_evacuate_test.go +++ b/weed/shell/command_volume_server_evacuate_test.go @@ -6,12 +6,11 @@ import ( ) func TestVolumeServerEvacuate(t *testing.T) { - topologyInfo := parseOutput(topoData) + c := commandVolumeServerEvacuate{} + c.topologyInfo = parseOutput(topoData) volumeServer := "192.168.1.4:8080" - - c := commandVolumeServerEvacuate{} - if err := c.evacuateNormalVolumes(nil, topologyInfo, volumeServer, true, false, os.Stdout); err != nil { + if err := c.evacuateNormalVolumes(nil, volumeServer, true, false, os.Stdout); err != nil { t.Errorf("evacuate: %v", err) } From 4d5144e50d39fb33aea688f9c1bc5f0f3711c8c0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Fri, 15 Jul 2022 13:51:08 +0500 Subject: [PATCH 06/25] clouse background update --- weed/shell/command_volume_server_evacuate.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 195cc2699..3b0c8381b 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -14,6 +14,8 @@ import ( "time" ) +const topologyInfoUpdateInterval = 5 * time.Minute + func init() { Commands = append(Commands, &commandVolumeServerEvacuate{}) } @@ -95,13 +97,22 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn return err } + stopchan := make(chan struct{}) go func() { for { - if topologyInfo, _, err := collectTopologyInfo(commandEnv, 5*time.Minute); err != nil { - c.topologyInfo = topologyInfo + select { + default: + if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { + c.topologyInfo = topologyInfo + } else { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } + case <-stopchan: + return } } }() + defer close(stopchan) if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err @@ -127,7 +138,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE for _, diskInfo := range thisNode.info.DiskInfos { volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } @@ -204,7 +215,7 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv return } -func (c *commandVolumeServerEvacuate) moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { +func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { fn := capacityByFreeVolumeCount(types.ToDiskType(vol.DiskType)) for _, n := range otherNodes { n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { From 01996bccf8ca8cc285e95b55633a501516668a6f Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Fri, 15 Jul 2022 15:29:15 +0500 Subject: [PATCH 07/25] Use fallback if urls are not found --- weed/filer/stream.go | 6 +++--- weed/wdclient/masterclient.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/weed/filer/stream.go b/weed/filer/stream.go index 7da9fd0a0..71808017e 100644 --- a/weed/filer/stream.go +++ b/weed/filer/stream.go @@ -69,14 +69,14 @@ func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writ fileId2Url := make(map[string][]string) for _, chunkView := range chunkViews { - urlStrings, err := masterClient.GetLookupFileIdFunction()(chunkView.FileId) if err != nil { glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) return err } else if len(urlStrings) == 0 { - glog.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId) - return fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId) + errUrlNotFound := fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId) + glog.Error(errUrlNotFound) + return errUrlNotFound } fileId2Url[chunkView.FileId] = urlStrings } diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index 3e76dc0c5..d6a06bb57 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -2,6 +2,7 @@ package wdclient import ( "context" + "fmt" "github.com/chrislusf/seaweedfs/weed/stats" "math/rand" "time" @@ -44,7 +45,7 @@ func (mc *MasterClient) GetLookupFileIdFunction() LookupFileIdFunctionType { func (mc *MasterClient) LookupFileIdWithFallback(fileId string) (fullUrls []string, err error) { fullUrls, err = mc.vidMap.LookupFileId(fileId) - if err == nil { + if err == nil && len(fullUrls) > 0 { return } err = pb.WithMasterClient(false, mc.currentMaster, mc.grpcDialOption, func(client master_pb.SeaweedClient) error { @@ -52,7 +53,7 @@ func (mc *MasterClient) LookupFileIdWithFallback(fileId string) (fullUrls []stri VolumeOrFileIds: []string{fileId}, }) if err != nil { - return err + return fmt.Errorf("LookupVolume failed: %v", err) } for vid, vidLocation := range resp.VolumeIdLocations { for _, vidLoc := range vidLocation.Locations { @@ -65,7 +66,6 @@ func (mc *MasterClient) LookupFileIdWithFallback(fileId string) (fullUrls []stri fullUrls = append(fullUrls, "http://"+loc.Url+"/"+fileId) } } - return nil }) return From 7b1497ee63ce4126236d08ab54d9d6e22e43556d Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Fri, 15 Jul 2022 16:05:35 +0500 Subject: [PATCH 08/25] Use BackoffSchedule for getLookupFileId --- weed/filer/stream.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/weed/filer/stream.go b/weed/filer/stream.go index 71808017e..d1b66e88d 100644 --- a/weed/filer/stream.go +++ b/weed/filer/stream.go @@ -18,6 +18,12 @@ import ( "github.com/chrislusf/seaweedfs/weed/wdclient" ) +var getLookupFileIdBackoffSchedule = []time.Duration{ + 150 * time.Millisecond, + 600 * time.Millisecond, + 1800 * time.Millisecond, +} + func HasData(entry *filer_pb.Entry) bool { if len(entry.Content) > 0 { @@ -69,7 +75,15 @@ func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writ fileId2Url := make(map[string][]string) for _, chunkView := range chunkViews { - urlStrings, err := masterClient.GetLookupFileIdFunction()(chunkView.FileId) + var urlStrings []string + var err error + for _, backoff := range getLookupFileIdBackoffSchedule { + urlStrings, err = masterClient.GetLookupFileIdFunction()(chunkView.FileId) + if err == nil && len(urlStrings) > 0 { + time.Sleep(backoff) + break + } + } if err != nil { glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) return err From 3c2774ec3de28fc3c5fdff4168cffbe31feabdb9 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 01:46:31 +0500 Subject: [PATCH 09/25] fix update topologyInfo --- weed/shell/command_volume_server_evacuate.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 3b0c8381b..0595ef308 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -103,9 +103,9 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn select { default: if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { - c.topologyInfo = topologyInfo - } else { fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + c.topologyInfo = topologyInfo } case <-stopchan: return From 2b4112e462d48af926caf6431a3de6ef256afcae Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 11:32:28 +0500 Subject: [PATCH 10/25] update otherNodes --- weed/shell/command_volume_server_evacuate.go | 40 +++++++++++--------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 0595ef308..7d50b7f81 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -24,6 +24,7 @@ type commandVolumeServerEvacuate struct { topologyInfo *master_pb.TopologyInfo targetServer string volumeRack string + otherNodes []*Node } func (c *commandVolumeServerEvacuate) Name() string { @@ -97,22 +98,27 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn return err } - stopchan := make(chan struct{}) - go func() { - for { - select { - default: - if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { - fmt.Fprintf(writer, "update topologyInfo %v", err) - } else { - c.topologyInfo = topologyInfo + if applyChange { + stopchan := make(chan struct{}) + go func() { + for { + select { + default: + if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + c.topologyInfo = topologyInfo + _, c.otherNodes = c.nodesOtherThan( + collectVolumeServersByDc(c.topologyInfo, ""), volumeServer) + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(c.otherNodes)) + } + case <-stopchan: + return } - case <-stopchan: - return } - } - }() - defer close(stopchan) + }() + defer close(stopchan) + } if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err @@ -128,7 +134,8 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server volumeServers := collectVolumeServersByDc(c.topologyInfo, "") - thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) + var thisNodes []*Node + thisNodes, c.otherNodes = c.nodesOtherThan(volumeServers, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } @@ -138,7 +145,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE for _, diskInfo := range thisNode.info.DiskInfos { volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, c.otherNodes, applyChange) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } @@ -152,7 +159,6 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE } } } - } return nil } From 73a0dea16bb72a9f8b8d89cd53ce3f92092af5fb Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:27:02 +0500 Subject: [PATCH 11/25] sync update topologyInfo --- weed/shell/command_volume_server_evacuate.go | 45 +++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 7d50b7f81..d1c474a76 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -24,7 +24,6 @@ type commandVolumeServerEvacuate struct { topologyInfo *master_pb.TopologyInfo targetServer string volumeRack string - otherNodes []*Node } func (c *commandVolumeServerEvacuate) Name() string { @@ -98,28 +97,6 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn return err } - if applyChange { - stopchan := make(chan struct{}) - go func() { - for { - select { - default: - if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { - fmt.Fprintf(writer, "update topologyInfo %v", err) - } else { - c.topologyInfo = topologyInfo - _, c.otherNodes = c.nodesOtherThan( - collectVolumeServersByDc(c.topologyInfo, ""), volumeServer) - fmt.Fprintf(writer, "topologyInfo updated %v\n", len(c.otherNodes)) - } - case <-stopchan: - return - } - } - }() - defer close(stopchan) - } - if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } @@ -134,18 +111,34 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server volumeServers := collectVolumeServersByDc(c.topologyInfo, "") - var thisNodes []*Node - thisNodes, c.otherNodes = c.nodesOtherThan(volumeServers, volumeServer) + thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } // move away normal volumes + ticker := time.NewTicker(topologyInfoUpdateInterval) for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { + if applyChange { + select { + case <-ticker.C: + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + _, otherNodesNew := c.nodesOtherThan( + collectVolumeServersByDc(topologyInfo, ""), volumeServer) + if len(otherNodesNew) > 0 { + otherNodes = otherNodesNew + c.topologyInfo = topologyInfo + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) + } + } + } + } volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, c.otherNodes, applyChange) + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } From d422e7769c7f52cd250c3bf8ced279cfef8c8ef0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:38:19 +0500 Subject: [PATCH 12/25] ticker.Stop --- weed/shell/command_volume_server_evacuate.go | 1 + 1 file changed, 1 insertion(+) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index d1c474a76..c9df2c79a 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -118,6 +118,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE // move away normal volumes ticker := time.NewTicker(topologyInfoUpdateInterval) + defer ticker.Stop() for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { if applyChange { From bec9f79659297ae7f64db40c5d9ed10396de7ea7 Mon Sep 17 00:00:00 2001 From: guol-fnst Date: Tue, 19 Jul 2022 09:20:45 +0800 Subject: [PATCH 13/25] use snapshot --- weed/storage/disk_location.go | 7 +- weed/storage/idx/walk.go | 33 +++++++++ weed/storage/needle_map_leveldb.go | 108 ++++++++++++++++++++++++++--- 3 files changed, 136 insertions(+), 12 deletions(-) diff --git a/weed/storage/disk_location.go b/weed/storage/disk_location.go index 8af8ea663..d72d83208 100644 --- a/weed/storage/disk_location.go +++ b/weed/storage/disk_location.go @@ -123,7 +123,7 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne if volumeName == "" { return false } - + glog.V(0).Infof("data file %s", l.Directory+"/"+volumeName) // skip if ec volumes exists if skipIfEcVolumesExists { if util.FileExists(l.Directory + "/" + volumeName + ".ecx") { @@ -147,7 +147,7 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne glog.Warningf("get volume id failed, %s, err : %s", volumeName, err) return false } - + glog.V(0).Infof("data file %s", l.Directory+"/"+volumeName) // avoid loading one volume more than once l.volumesLock.RLock() _, found := l.volumes[vid] @@ -156,6 +156,7 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne glog.V(1).Infof("loaded volume, %v", vid) return true } + glog.V(0).Infof("data file %s", l.Directory+"/"+volumeName) // load the volume v, e := NewVolume(l.Directory, l.IdxDirectory, collection, vid, needleMapKind, nil, nil, 0, 0) @@ -222,6 +223,8 @@ func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapKind) { workerNum = 10 } } + workerNum = 10 + l.concurrentLoadingVolumes(needleMapKind, workerNum) glog.V(0).Infof("Store started on dir: %s with %d volumes max %d", l.Directory, len(l.volumes), l.MaxVolumeCount) diff --git a/weed/storage/idx/walk.go b/weed/storage/idx/walk.go index 5215d3c4f..ef177ec2f 100644 --- a/weed/storage/idx/walk.go +++ b/weed/storage/idx/walk.go @@ -42,6 +42,39 @@ func WalkIndexFile(r io.ReaderAt, fn func(key types.NeedleId, offset types.Offse return e } +func WalkIndexFileIncrent(r io.ReaderAt, milestone uint64, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error { + var readerOffset = int64(milestone * types.NeedleMapEntrySize) + bytes := make([]byte, types.NeedleMapEntrySize*RowsToRead) + count, e := r.ReadAt(bytes, readerOffset) + if count == 0 && e == io.EOF { + return nil + } + glog.V(3).Infof("readerOffset %d count %d err: %v", readerOffset, count, e) + readerOffset += int64(count) + var ( + key types.NeedleId + offset types.Offset + size types.Size + i int + ) + + for count > 0 && e == nil || e == io.EOF { + for i = 0; i+types.NeedleMapEntrySize <= count; i += types.NeedleMapEntrySize { + key, offset, size = IdxFileEntry(bytes[i : i+types.NeedleMapEntrySize]) + if e = fn(key, offset, size); e != nil { + return e + } + } + if e == io.EOF { + return nil + } + count, e = r.ReadAt(bytes, readerOffset) + glog.V(3).Infof("readerOffset %d count %d err: %v", readerOffset, count, e) + readerOffset += int64(count) + } + return e +} + func IdxFileEntry(bytes []byte) (key types.NeedleId, offset types.Offset, size types.Size) { key = types.BytesToNeedleId(bytes[:types.NeedleIdSize]) offset = types.BytesToOffset(bytes[types.NeedleIdSize : types.NeedleIdSize+types.OffsetSize]) diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index 31c86d124..f014797df 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -4,11 +4,14 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/syndtr/goleveldb/leveldb/errors" "github.com/syndtr/goleveldb/leveldb/opt" "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" "github.com/syndtr/goleveldb/leveldb" @@ -17,26 +20,36 @@ import ( . "github.com/chrislusf/seaweedfs/weed/storage/types" ) +//use "2 >> 16" to reduce cpu cost +const milestoneCnt = 40 +const milestoneKey = 0xffffffffffffffff - 1 + type LevelDbNeedleMap struct { baseNeedleMapper dbFileName string db *leveldb.DB + recordNum uint64 } func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options) (m *LevelDbNeedleMap, err error) { + glog.V(0).Infof("NewLevelDbNeedleMap pocessing %s...", indexFile.Name()) + db, errd := leveldb.OpenFile(dbFileName, opts) + glog.V(0).Infof("begain %v %s %d", errd, dbFileName, getMileStone(db)) + db.Close() + m = &LevelDbNeedleMap{dbFileName: dbFileName} m.indexFile = indexFile if !isLevelDbFresh(dbFileName, indexFile) { - glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) + glog.V(0).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) generateLevelDbFile(dbFileName, indexFile) - glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) + glog.V(0).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) } if stat, err := indexFile.Stat(); err != nil { glog.Fatalf("stat file %s: %v", indexFile.Name(), err) } else { m.indexFileOffset = stat.Size() } - glog.V(1).Infof("Opening %s...", dbFileName) + glog.V(0).Infof("Opening %s...", dbFileName) if m.db, err = leveldb.OpenFile(dbFileName, opts); err != nil { if errors.IsCorrupted(err) { @@ -46,11 +59,19 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Option return } } - glog.V(1).Infof("Loading %s...", indexFile.Name()) + glog.V(0).Infof("getMileStone %s : %d", dbFileName, getMileStone(m.db)) + m.recordNum = uint64(m.indexFileOffset / types.NeedleMapEntrySize) + milestone := (m.recordNum / milestoneCnt) * milestoneCnt + err = setMileStone(m.db, milestone) + if err != nil { + return + } + glog.V(0).Infof("Loading %s... %d %d", indexFile.Name(), milestone, getMileStone(m.db)) mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) if indexLoadError != nil { return nil, indexLoadError } + glog.V(0).Infof("finish Loading %s...", indexFile.Name()) m.mapMetric = *mm return } @@ -78,9 +99,21 @@ func generateLevelDbFile(dbFileName string, indexFile *os.File) error { return err } defer db.Close() - return idx.WalkIndexFile(indexFile, func(key NeedleId, offset Offset, size Size) error { + + milestone := getMileStone(db) + if stat, err := indexFile.Stat(); err != nil { + glog.Fatalf("stat file %s: %v", indexFile.Name(), err) + return err + } else { + if milestone*types.NeedleMapEntrySize > uint64(stat.Size()) { + glog.Warningf("wrong milestone %d for filesize %d, set milestone to 0", milestone, stat.Size()) + milestone = 0 + } + glog.V(0).Infof("generateLevelDbFile %s, milestone %d, num of entries:%d", dbFileName, milestone, (uint64(stat.Size())-milestone*types.NeedleMapEntrySize)/types.NeedleMapEntrySize) + } + return idx.WalkIndexFileIncrent(indexFile, milestone, func(key NeedleId, offset Offset, size Size) error { if !offset.IsZero() && size.IsValid() { - levelDbWrite(db, key, offset, size) + levelDbWrite(db, key, offset, size, 0) } else { levelDbDelete(db, key) } @@ -102,6 +135,7 @@ func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, o func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error { var oldSize Size + var milestone uint64 if oldNeedle, ok := m.Get(key); ok { oldSize = oldNeedle.Size } @@ -110,16 +144,61 @@ func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error { if err := m.appendToIndexFile(key, offset, size); err != nil { return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) } - return levelDbWrite(m.db, key, offset, size) + //atomic.AddUint64(&m.recordNum, 1) + //milestone = atomic.LoadUint64(&m.recordNum) + m.recordNum++ + if m.recordNum%milestoneCnt != 0 { + milestone = 0 + } else { + milestone = (m.recordNum / milestoneCnt) * milestoneCnt + glog.V(0).Infof("put cnt:%d milestone:%s %d", m.recordNum, m.dbFileName, milestone) + } + return levelDbWrite(m.db, key, offset, size, milestone) } -func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size) error { +func getMileStone(db *leveldb.DB) uint64 { + var mskBytes = make([]byte, 8) + util.Uint64toBytes(mskBytes, milestoneKey) + data, err := db.Get(mskBytes, nil) + if err != nil || len(data) != 8 { + glog.Warningf("get milestone from db error: %v, %d", err, len(data)) + if !strings.Contains(strings.ToLower(err.Error()), "not found") { + err = setMileStone(db, 0) + if err != nil { + glog.Errorf("failed to set milestone: %v", err) + } + } + + return 0 + } + return util.BytesToUint64(data) +} + +func setMileStone(db *leveldb.DB, milestone uint64) error { + glog.V(0).Infof("set milestone %d", milestone) + var mskBytes = make([]byte, 8) + util.Uint64toBytes(mskBytes, milestoneKey) + var msBytes = make([]byte, 8) + util.Uint64toBytes(msBytes, milestone) + if err := db.Put(mskBytes, msBytes, nil); err != nil { + return fmt.Errorf("failed to setMileStone: %v", err) + } + glog.V(0).Infof("ssset milestone %d, %d", milestone, getMileStone(db)) + return nil +} + +func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, milestone uint64) error { bytes := needle_map.ToBytes(key, offset, size) if err := db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil { return fmt.Errorf("failed to write leveldb: %v", err) } + // set milestone + if milestone != 0 { + glog.V(0).Infof("actually set milestone %d", milestone) + return setMileStone(db, milestone) + } return nil } func levelDbDelete(db *leveldb.DB, key NeedleId) error { @@ -129,6 +208,7 @@ func levelDbDelete(db *leveldb.DB, key NeedleId) error { } func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { + var milestone uint64 oldNeedle, found := m.Get(key) if !found || oldNeedle.Size.IsDeleted() { return nil @@ -139,8 +219,16 @@ func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil { return err } - - return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size) + //atomic.AddUint64(&m.recordNum, 1) + //milestone = atomic.LoadUint64(&m.recordNum) + m.recordNum++ + if m.recordNum%milestoneCnt != 0 { + milestone = 0 + } else { + milestone = (m.recordNum / milestoneCnt) * milestoneCnt + } + glog.V(0).Infof("delete cnt:%d milestone:%s %d", m.recordNum, m.dbFileName, milestone) + return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, milestone) } func (m *LevelDbNeedleMap) Close() { From f419d5643a93ba32572f73a66710290216122459 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 19 Jul 2022 11:50:52 +0500 Subject: [PATCH 14/25] fix typo add remove logs --- weed/server/master_server.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/weed/server/master_server.go b/weed/server/master_server.go index 9bf840f08..0fdc3944f 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -65,8 +65,8 @@ type MasterServer struct { boundedLeaderChan chan int - onPeerUpdatDoneCn chan string - onPeerUpdatDoneCnExist bool + onPeerUpdateDoneCn chan string + onPeerUpdateDoneCnExist bool // notifying clients clientChansLock sync.RWMutex @@ -118,7 +118,7 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se Cluster: cluster.NewCluster(), } ms.boundedLeaderChan = make(chan int, 16) - ms.onPeerUpdatDoneCn = make(chan string) + ms.onPeerUpdateDoneCn = make(chan string) ms.MasterClient.OnPeerUpdate = ms.OnPeerUpdate @@ -366,14 +366,15 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF hashicorpRaft.ServerAddress(peerAddress.ToGrpcAddress()), 0, 0) } } - if ms.onPeerUpdatDoneCnExist { - ms.onPeerUpdatDoneCn <- peerName + if ms.onPeerUpdateDoneCnExist { + ms.onPeerUpdateDoneCn <- peerName } } else if isLeader { go func(peerName string) { + raftServerRemovalTimeAfter := time.After(RaftServerRemovalTime) for { select { - case <-time.After(RaftServerRemovalTime): + case <-raftServerRemovalTimeAfter: err := ms.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error { _, err := client.RaftRemoveServer(context.Background(), &master_pb.RaftRemoveServerRequest{ Id: peerName, @@ -384,14 +385,16 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF if err != nil { glog.Warningf("failed to removing old raft server %s: %v", peerName, err) } + glog.V(0).Infof("old raft server %s removed", peerName) return - case peerDone := <-ms.onPeerUpdatDoneCn: + case peerDone := <-ms.onPeerUpdateDoneCn: if peerName == peerDone { + glog.V(0).Infof("raft server %s remove canceled", peerName) return } } } }(peerName) - ms.onPeerUpdatDoneCnExist = true + ms.onPeerUpdateDoneCnExist = true } } From b9256e0b34fcd8ae62b89d1df5b61b7c35e204b4 Mon Sep 17 00:00:00 2001 From: guol-fnst Date: Tue, 19 Jul 2022 13:17:52 +0800 Subject: [PATCH 15/25] optimiz --- weed/storage/disk_location.go | 7 ++--- weed/storage/idx/walk.go | 3 +- weed/storage/needle_map_leveldb.go | 46 +++++++++++------------------- 3 files changed, 20 insertions(+), 36 deletions(-) diff --git a/weed/storage/disk_location.go b/weed/storage/disk_location.go index d72d83208..8af8ea663 100644 --- a/weed/storage/disk_location.go +++ b/weed/storage/disk_location.go @@ -123,7 +123,7 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne if volumeName == "" { return false } - glog.V(0).Infof("data file %s", l.Directory+"/"+volumeName) + // skip if ec volumes exists if skipIfEcVolumesExists { if util.FileExists(l.Directory + "/" + volumeName + ".ecx") { @@ -147,7 +147,7 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne glog.Warningf("get volume id failed, %s, err : %s", volumeName, err) return false } - glog.V(0).Infof("data file %s", l.Directory+"/"+volumeName) + // avoid loading one volume more than once l.volumesLock.RLock() _, found := l.volumes[vid] @@ -156,7 +156,6 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne glog.V(1).Infof("loaded volume, %v", vid) return true } - glog.V(0).Infof("data file %s", l.Directory+"/"+volumeName) // load the volume v, e := NewVolume(l.Directory, l.IdxDirectory, collection, vid, needleMapKind, nil, nil, 0, 0) @@ -223,8 +222,6 @@ func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapKind) { workerNum = 10 } } - workerNum = 10 - l.concurrentLoadingVolumes(needleMapKind, workerNum) glog.V(0).Infof("Store started on dir: %s with %d volumes max %d", l.Directory, len(l.volumes), l.MaxVolumeCount) diff --git a/weed/storage/idx/walk.go b/weed/storage/idx/walk.go index ef177ec2f..74cb83b45 100644 --- a/weed/storage/idx/walk.go +++ b/weed/storage/idx/walk.go @@ -42,7 +42,8 @@ func WalkIndexFile(r io.ReaderAt, fn func(key types.NeedleId, offset types.Offse return e } -func WalkIndexFileIncrent(r io.ReaderAt, milestone uint64, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error { +//copied from WalkIndexFile, just init readerOffset from milestone +func WalkIndexFileIncrement(r io.ReaderAt, milestone uint64, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error { var readerOffset = int64(milestone * types.NeedleMapEntrySize) bytes := make([]byte, types.NeedleMapEntrySize*RowsToRead) count, e := r.ReadAt(bytes, readerOffset) diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index f014797df..c26856ba3 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -20,8 +20,8 @@ import ( . "github.com/chrislusf/seaweedfs/weed/storage/types" ) -//use "2 >> 16" to reduce cpu cost -const milestoneCnt = 40 +//mark it every milestoneCnt operations +const milestoneCnt = 10000 const milestoneKey = 0xffffffffffffffff - 1 type LevelDbNeedleMap struct { @@ -32,24 +32,19 @@ type LevelDbNeedleMap struct { } func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options) (m *LevelDbNeedleMap, err error) { - glog.V(0).Infof("NewLevelDbNeedleMap pocessing %s...", indexFile.Name()) - db, errd := leveldb.OpenFile(dbFileName, opts) - glog.V(0).Infof("begain %v %s %d", errd, dbFileName, getMileStone(db)) - db.Close() - m = &LevelDbNeedleMap{dbFileName: dbFileName} m.indexFile = indexFile if !isLevelDbFresh(dbFileName, indexFile) { - glog.V(0).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) + glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) generateLevelDbFile(dbFileName, indexFile) - glog.V(0).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) + glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) } if stat, err := indexFile.Stat(); err != nil { glog.Fatalf("stat file %s: %v", indexFile.Name(), err) } else { m.indexFileOffset = stat.Size() } - glog.V(0).Infof("Opening %s...", dbFileName) + glog.V(1).Infof("Opening %s...", dbFileName) if m.db, err = leveldb.OpenFile(dbFileName, opts); err != nil { if errors.IsCorrupted(err) { @@ -59,19 +54,18 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Option return } } - glog.V(0).Infof("getMileStone %s : %d", dbFileName, getMileStone(m.db)) + glog.V(1).Infof("Loading %s... , milestone: %d", dbFileName, getMileStone(m.db)) m.recordNum = uint64(m.indexFileOffset / types.NeedleMapEntrySize) milestone := (m.recordNum / milestoneCnt) * milestoneCnt err = setMileStone(m.db, milestone) if err != nil { + glog.Fatalf("set milestone for %s error: %s\n", dbFileName, err) return } - glog.V(0).Infof("Loading %s... %d %d", indexFile.Name(), milestone, getMileStone(m.db)) mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) if indexLoadError != nil { return nil, indexLoadError } - glog.V(0).Infof("finish Loading %s...", indexFile.Name()) m.mapMetric = *mm return } @@ -106,14 +100,13 @@ func generateLevelDbFile(dbFileName string, indexFile *os.File) error { return err } else { if milestone*types.NeedleMapEntrySize > uint64(stat.Size()) { - glog.Warningf("wrong milestone %d for filesize %d, set milestone to 0", milestone, stat.Size()) - milestone = 0 + glog.Warningf("wrong milestone %d for filesize %d", milestone, stat.Size()) } glog.V(0).Infof("generateLevelDbFile %s, milestone %d, num of entries:%d", dbFileName, milestone, (uint64(stat.Size())-milestone*types.NeedleMapEntrySize)/types.NeedleMapEntrySize) } - return idx.WalkIndexFileIncrent(indexFile, milestone, func(key NeedleId, offset Offset, size Size) error { + return idx.WalkIndexFileIncrement(indexFile, milestone, func(key NeedleId, offset Offset, size Size) error { if !offset.IsZero() && size.IsValid() { - levelDbWrite(db, key, offset, size, 0) + levelDbWrite(db, key, offset, size, false, 0) } else { levelDbDelete(db, key) } @@ -144,16 +137,14 @@ func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error { if err := m.appendToIndexFile(key, offset, size); err != nil { return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) } - //atomic.AddUint64(&m.recordNum, 1) - //milestone = atomic.LoadUint64(&m.recordNum) m.recordNum++ if m.recordNum%milestoneCnt != 0 { milestone = 0 } else { milestone = (m.recordNum / milestoneCnt) * milestoneCnt - glog.V(0).Infof("put cnt:%d milestone:%s %d", m.recordNum, m.dbFileName, milestone) + glog.V(1).Infof("put cnt:%d for %s,milestone: %d", m.recordNum, m.dbFileName, milestone) } - return levelDbWrite(m.db, key, offset, size, milestone) + return levelDbWrite(m.db, key, offset, size, milestone == 0, milestone) } func getMileStone(db *leveldb.DB) uint64 { @@ -175,7 +166,7 @@ func getMileStone(db *leveldb.DB) uint64 { } func setMileStone(db *leveldb.DB, milestone uint64) error { - glog.V(0).Infof("set milestone %d", milestone) + glog.V(1).Infof("set milestone %d", milestone) var mskBytes = make([]byte, 8) util.Uint64toBytes(mskBytes, milestoneKey) var msBytes = make([]byte, 8) @@ -183,11 +174,10 @@ func setMileStone(db *leveldb.DB, milestone uint64) error { if err := db.Put(mskBytes, msBytes, nil); err != nil { return fmt.Errorf("failed to setMileStone: %v", err) } - glog.V(0).Infof("ssset milestone %d, %d", milestone, getMileStone(db)) return nil } -func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, milestone uint64) error { +func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, upateMilstone bool, milestone uint64) error { bytes := needle_map.ToBytes(key, offset, size) @@ -195,8 +185,7 @@ func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, milest return fmt.Errorf("failed to write leveldb: %v", err) } // set milestone - if milestone != 0 { - glog.V(0).Infof("actually set milestone %d", milestone) + if upateMilstone { return setMileStone(db, milestone) } return nil @@ -219,16 +208,13 @@ func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil { return err } - //atomic.AddUint64(&m.recordNum, 1) - //milestone = atomic.LoadUint64(&m.recordNum) m.recordNum++ if m.recordNum%milestoneCnt != 0 { milestone = 0 } else { milestone = (m.recordNum / milestoneCnt) * milestoneCnt } - glog.V(0).Infof("delete cnt:%d milestone:%s %d", m.recordNum, m.dbFileName, milestone) - return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, milestone) + return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, milestone == 0, milestone) } func (m *LevelDbNeedleMap) Close() { From 91285bb51d3b95befb5aba0104f6ac83e5b07572 Mon Sep 17 00:00:00 2001 From: guol-fnst Date: Tue, 19 Jul 2022 20:00:44 +0800 Subject: [PATCH 16/25] remove uncessary code --- weed/storage/needle_map_leveldb.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index c26856ba3..f41524a97 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -4,7 +4,6 @@ import ( "fmt" "os" "path/filepath" - "strings" "github.com/syndtr/goleveldb/leveldb/errors" "github.com/syndtr/goleveldb/leveldb/opt" @@ -153,13 +152,14 @@ func getMileStone(db *leveldb.DB) uint64 { data, err := db.Get(mskBytes, nil) if err != nil || len(data) != 8 { glog.Warningf("get milestone from db error: %v, %d", err, len(data)) - if !strings.Contains(strings.ToLower(err.Error()), "not found") { - err = setMileStone(db, 0) - if err != nil { - glog.Errorf("failed to set milestone: %v", err) + /* + if !strings.Contains(strings.ToLower(err.Error()), "not found") { + err = setMileStone(db, 0) + if err != nil { + glog.Errorf("failed to set milestone: %v", err) + } } - } - + */ return 0 } return util.BytesToUint64(data) From 2ae3f812f8bd2ce1aaae505a31cc33decf872f0b Mon Sep 17 00:00:00 2001 From: chrislu Date: Tue, 19 Jul 2022 11:43:31 -0700 Subject: [PATCH 17/25] minor --- weed/util/retry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/util/retry.go b/weed/util/retry.go index b7cd278b3..892341dc1 100644 --- a/weed/util/retry.go +++ b/weed/util/retry.go @@ -32,7 +32,7 @@ func Retry(name string, job func() error) (err error) { return err } -func RetryForever(name string, job func() error, onErrFn func(err error) bool) { +func RetryForever(name string, job func() error, onErrFn func(err error) (shouldContinue bool)) { waitTime := time.Second for { err := job() From 11e393dbe796ea3adf56e9fac9852dd8bb07ac2a Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Wed, 20 Jul 2022 00:45:13 +0500 Subject: [PATCH 18/25] err msg with duplicated local subscription detected move to log level 1 https://github.com/chrislusf/seaweedfs/issues/3320 --- weed/filer/meta_aggregator.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/weed/filer/meta_aggregator.go b/weed/filer/meta_aggregator.go index c672ce342..5799e247e 100644 --- a/weed/filer/meta_aggregator.go +++ b/weed/filer/meta_aggregator.go @@ -7,6 +7,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/util" "io" + "strings" "sync" "time" @@ -99,7 +100,11 @@ func (ma *MetaAggregator) loopSubscribeToOneFiler(f *Filer, self pb.ServerAddres return } if err != nil { - glog.V(0).Infof("subscribing remote %s meta change: %v", peer, err) + errLvl := glog.Level(0) + if strings.Contains(err.Error(), "duplicated local subscription detected") { + errLvl = glog.Level(1) + } + glog.V(errLvl).Infof("subscribing remote %s meta change: %v", peer, err) } if lastTsNs < nextLastTsNs { lastTsNs = nextLastTsNs From d3f7c09c03168fc0c29dd19c90dd9d2c86b2bda3 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Wed, 20 Jul 2022 00:54:23 +0500 Subject: [PATCH 19/25] remove ticker update the topology before each file --- weed/shell/command_volume_server_evacuate.go | 26 +++++++------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index c9df2c79a..f72d73230 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -11,11 +11,8 @@ import ( "golang.org/x/exp/slices" "io" "os" - "time" ) -const topologyInfoUpdateInterval = 5 * time.Minute - func init() { Commands = append(Commands, &commandVolumeServerEvacuate{}) } @@ -117,23 +114,18 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE } // move away normal volumes - ticker := time.NewTicker(topologyInfoUpdateInterval) - defer ticker.Stop() for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { if applyChange { - select { - case <-ticker.C: - if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { - fmt.Fprintf(writer, "update topologyInfo %v", err) - } else { - _, otherNodesNew := c.nodesOtherThan( - collectVolumeServersByDc(topologyInfo, ""), volumeServer) - if len(otherNodesNew) > 0 { - otherNodes = otherNodesNew - c.topologyInfo = topologyInfo - fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) - } + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + _, otherNodesNew := c.nodesOtherThan( + collectVolumeServersByDc(topologyInfo, ""), volumeServer) + if len(otherNodesNew) > 0 { + otherNodes = otherNodesNew + c.topologyInfo = topologyInfo + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) } } } From ac694f0c8f2b5456bd175e1288023f0bdb5b7a14 Mon Sep 17 00:00:00 2001 From: guol-fnst Date: Wed, 20 Jul 2022 09:26:06 +0800 Subject: [PATCH 20/25] rename parameter and reuse functions rename milestone to watermark --- .../diff_volume_servers.go | 9 +- unmaintained/see_idx/see_idx.go | 5 +- weed/storage/erasure_coding/ec_encoder.go | 2 +- weed/storage/idx/walk.go | 38 +------- weed/storage/needle_map/memdb.go | 2 +- weed/storage/needle_map_leveldb.go | 93 +++++++++---------- weed/storage/needle_map_memory.go | 2 +- 7 files changed, 58 insertions(+), 93 deletions(-) diff --git a/unmaintained/diff_volume_servers/diff_volume_servers.go b/unmaintained/diff_volume_servers/diff_volume_servers.go index 0188d18d4..815eeae54 100644 --- a/unmaintained/diff_volume_servers/diff_volume_servers.go +++ b/unmaintained/diff_volume_servers/diff_volume_servers.go @@ -6,6 +6,10 @@ import ( "errors" "flag" "fmt" + "io" + "math" + "os" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb" @@ -16,9 +20,6 @@ import ( "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" "google.golang.org/grpc" - "io" - "math" - "os" ) var ( @@ -155,7 +156,7 @@ func getVolumeFiles(v uint32, addr pb.ServerAddress) (map[types.NeedleId]needleS var maxOffset int64 files := map[types.NeedleId]needleState{} - err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size types.Size) error { + err = idx.WalkIndexFile(idxFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error { if offset.IsZero() || size.IsDeleted() { files[key] = needleState{ state: stateDeleted, diff --git a/unmaintained/see_idx/see_idx.go b/unmaintained/see_idx/see_idx.go index 22c659351..616263b1c 100644 --- a/unmaintained/see_idx/see_idx.go +++ b/unmaintained/see_idx/see_idx.go @@ -3,11 +3,12 @@ package main import ( "flag" "fmt" - "github.com/chrislusf/seaweedfs/weed/util" "os" "path" "strconv" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage/idx" "github.com/chrislusf/seaweedfs/weed/storage/types" @@ -36,7 +37,7 @@ func main() { } defer indexFile.Close() - idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error { + idx.WalkIndexFile(indexFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error { fmt.Printf("key:%v offset:%v size:%v(%v)\n", key, offset, size, util.BytesToHumanReadable(uint64(size))) return nil }) diff --git a/weed/storage/erasure_coding/ec_encoder.go b/weed/storage/erasure_coding/ec_encoder.go index 157149865..ea331ca39 100644 --- a/weed/storage/erasure_coding/ec_encoder.go +++ b/weed/storage/erasure_coding/ec_encoder.go @@ -294,7 +294,7 @@ func readNeedleMap(baseFileName string) (*needle_map.MemDb, error) { defer indexFile.Close() cm := needle_map.NewMemDb() - err = idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error { + err = idx.WalkIndexFile(indexFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error { if !offset.IsZero() && size != types.TombstoneFileSize { cm.Set(key, offset, size) } else { diff --git a/weed/storage/idx/walk.go b/weed/storage/idx/walk.go index 74cb83b45..70d3855ea 100644 --- a/weed/storage/idx/walk.go +++ b/weed/storage/idx/walk.go @@ -9,42 +9,8 @@ import ( // walks through the index file, calls fn function with each key, offset, size // stops with the error returned by the fn function -func WalkIndexFile(r io.ReaderAt, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error { - var readerOffset int64 - bytes := make([]byte, types.NeedleMapEntrySize*RowsToRead) - count, e := r.ReadAt(bytes, readerOffset) - if count == 0 && e == io.EOF { - return nil - } - glog.V(3).Infof("readerOffset %d count %d err: %v", readerOffset, count, e) - readerOffset += int64(count) - var ( - key types.NeedleId - offset types.Offset - size types.Size - i int - ) - - for count > 0 && e == nil || e == io.EOF { - for i = 0; i+types.NeedleMapEntrySize <= count; i += types.NeedleMapEntrySize { - key, offset, size = IdxFileEntry(bytes[i : i+types.NeedleMapEntrySize]) - if e = fn(key, offset, size); e != nil { - return e - } - } - if e == io.EOF { - return nil - } - count, e = r.ReadAt(bytes, readerOffset) - glog.V(3).Infof("readerOffset %d count %d err: %v", readerOffset, count, e) - readerOffset += int64(count) - } - return e -} - -//copied from WalkIndexFile, just init readerOffset from milestone -func WalkIndexFileIncrement(r io.ReaderAt, milestone uint64, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error { - var readerOffset = int64(milestone * types.NeedleMapEntrySize) +func WalkIndexFile(r io.ReaderAt, startFrom uint64, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error { + readerOffset := int64(startFrom * types.NeedleMapEntrySize) bytes := make([]byte, types.NeedleMapEntrySize*RowsToRead) count, e := r.ReadAt(bytes, readerOffset) if count == 0 && e == io.EOF { diff --git a/weed/storage/needle_map/memdb.go b/weed/storage/needle_map/memdb.go index ba1fd3d1e..a362a85ae 100644 --- a/weed/storage/needle_map/memdb.go +++ b/weed/storage/needle_map/memdb.go @@ -111,7 +111,7 @@ func (cm *MemDb) LoadFromIdx(idxName string) (ret error) { func (cm *MemDb) LoadFromReaderAt(readerAt io.ReaderAt) (ret error) { - return idx.WalkIndexFile(readerAt, func(key NeedleId, offset Offset, size Size) error { + return idx.WalkIndexFile(readerAt, 0, func(key NeedleId, offset Offset, size Size) error { if offset.IsZero() || size.IsDeleted() { return cm.Delete(key) } diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index f41524a97..a1934b8f1 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -19,15 +19,16 @@ import ( . "github.com/chrislusf/seaweedfs/weed/storage/types" ) -//mark it every milestoneCnt operations -const milestoneCnt = 10000 -const milestoneKey = 0xffffffffffffffff - 1 +//mark it every watermarkBatchSize operations +const watermarkBatchSize = 10000 + +var watermarkKey = []byte("idx_entry_watermark") type LevelDbNeedleMap struct { baseNeedleMapper - dbFileName string - db *leveldb.DB - recordNum uint64 + dbFileName string + db *leveldb.DB + recordCount uint64 } func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options) (m *LevelDbNeedleMap, err error) { @@ -53,12 +54,12 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Option return } } - glog.V(1).Infof("Loading %s... , milestone: %d", dbFileName, getMileStone(m.db)) - m.recordNum = uint64(m.indexFileOffset / types.NeedleMapEntrySize) - milestone := (m.recordNum / milestoneCnt) * milestoneCnt - err = setMileStone(m.db, milestone) + glog.V(0).Infof("Loading %s... , watermark: %d", dbFileName, getWatermark(m.db)) + m.recordCount = uint64(m.indexFileOffset / types.NeedleMapEntrySize) + watermark := (m.recordCount / watermarkBatchSize) * watermarkBatchSize + err = setWatermark(m.db, watermark) if err != nil { - glog.Fatalf("set milestone for %s error: %s\n", dbFileName, err) + glog.Fatalf("set watermark for %s error: %s\n", dbFileName, err) return } mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) @@ -93,17 +94,17 @@ func generateLevelDbFile(dbFileName string, indexFile *os.File) error { } defer db.Close() - milestone := getMileStone(db) + watermark := getWatermark(db) if stat, err := indexFile.Stat(); err != nil { glog.Fatalf("stat file %s: %v", indexFile.Name(), err) return err } else { - if milestone*types.NeedleMapEntrySize > uint64(stat.Size()) { - glog.Warningf("wrong milestone %d for filesize %d", milestone, stat.Size()) + if watermark*types.NeedleMapEntrySize > uint64(stat.Size()) { + glog.Warningf("wrong watermark %d for filesize %d", watermark, stat.Size()) } - glog.V(0).Infof("generateLevelDbFile %s, milestone %d, num of entries:%d", dbFileName, milestone, (uint64(stat.Size())-milestone*types.NeedleMapEntrySize)/types.NeedleMapEntrySize) + glog.V(0).Infof("generateLevelDbFile %s, watermark %d, num of entries:%d", dbFileName, watermark, (uint64(stat.Size())-watermark*types.NeedleMapEntrySize)/types.NeedleMapEntrySize) } - return idx.WalkIndexFileIncrement(indexFile, milestone, func(key NeedleId, offset Offset, size Size) error { + return idx.WalkIndexFile(indexFile, watermark, func(key NeedleId, offset Offset, size Size) error { if !offset.IsZero() && size.IsValid() { levelDbWrite(db, key, offset, size, false, 0) } else { @@ -127,7 +128,7 @@ func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, o func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error { var oldSize Size - var milestone uint64 + var watermark uint64 if oldNeedle, ok := m.Get(key); ok { oldSize = oldNeedle.Size } @@ -136,27 +137,25 @@ func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error { if err := m.appendToIndexFile(key, offset, size); err != nil { return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) } - m.recordNum++ - if m.recordNum%milestoneCnt != 0 { - milestone = 0 + m.recordCount++ + if m.recordCount%watermarkBatchSize != 0 { + watermark = 0 } else { - milestone = (m.recordNum / milestoneCnt) * milestoneCnt - glog.V(1).Infof("put cnt:%d for %s,milestone: %d", m.recordNum, m.dbFileName, milestone) + watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize + glog.V(1).Infof("put cnt:%d for %s,watermark: %d", m.recordCount, m.dbFileName, watermark) } - return levelDbWrite(m.db, key, offset, size, milestone == 0, milestone) + return levelDbWrite(m.db, key, offset, size, watermark == 0, watermark) } -func getMileStone(db *leveldb.DB) uint64 { - var mskBytes = make([]byte, 8) - util.Uint64toBytes(mskBytes, milestoneKey) - data, err := db.Get(mskBytes, nil) +func getWatermark(db *leveldb.DB) uint64 { + data, err := db.Get(watermarkKey, nil) if err != nil || len(data) != 8 { - glog.Warningf("get milestone from db error: %v, %d", err, len(data)) + glog.Warningf("get watermark from db error: %v, %d", err, len(data)) /* if !strings.Contains(strings.ToLower(err.Error()), "not found") { - err = setMileStone(db, 0) + err = setWatermark(db, 0) if err != nil { - glog.Errorf("failed to set milestone: %v", err) + glog.Errorf("failed to set watermark: %v", err) } } */ @@ -165,28 +164,26 @@ func getMileStone(db *leveldb.DB) uint64 { return util.BytesToUint64(data) } -func setMileStone(db *leveldb.DB, milestone uint64) error { - glog.V(1).Infof("set milestone %d", milestone) - var mskBytes = make([]byte, 8) - util.Uint64toBytes(mskBytes, milestoneKey) - var msBytes = make([]byte, 8) - util.Uint64toBytes(msBytes, milestone) - if err := db.Put(mskBytes, msBytes, nil); err != nil { - return fmt.Errorf("failed to setMileStone: %v", err) +func setWatermark(db *leveldb.DB, watermark uint64) error { + glog.V(1).Infof("set watermark %d", watermark) + var wmBytes = make([]byte, 8) + util.Uint64toBytes(wmBytes, watermark) + if err := db.Put(watermarkKey, wmBytes, nil); err != nil { + return fmt.Errorf("failed to setWatermark: %v", err) } return nil } -func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, upateMilstone bool, milestone uint64) error { +func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, updateWatermark bool, watermark uint64) error { bytes := needle_map.ToBytes(key, offset, size) if err := db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil { return fmt.Errorf("failed to write leveldb: %v", err) } - // set milestone - if upateMilstone { - return setMileStone(db, milestone) + // set watermark + if updateWatermark { + return setWatermark(db, watermark) } return nil } @@ -197,7 +194,7 @@ func levelDbDelete(db *leveldb.DB, key NeedleId) error { } func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { - var milestone uint64 + var watermark uint64 oldNeedle, found := m.Get(key) if !found || oldNeedle.Size.IsDeleted() { return nil @@ -208,13 +205,13 @@ func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil { return err } - m.recordNum++ - if m.recordNum%milestoneCnt != 0 { - milestone = 0 + m.recordCount++ + if m.recordCount%watermarkBatchSize != 0 { + watermark = 0 } else { - milestone = (m.recordNum / milestoneCnt) * milestoneCnt + watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize } - return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, milestone == 0, milestone) + return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, watermark == 0, watermark) } func (m *LevelDbNeedleMap) Close() { diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go index 1b58708c6..4c7909dbd 100644 --- a/weed/storage/needle_map_memory.go +++ b/weed/storage/needle_map_memory.go @@ -33,7 +33,7 @@ func LoadCompactNeedleMap(file *os.File) (*NeedleMap, error) { } func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) { - e := idx.WalkIndexFile(file, func(key NeedleId, offset Offset, size Size) error { + e := idx.WalkIndexFile(file, 0, func(key NeedleId, offset Offset, size Size) error { nm.MaybeSetMaxFileKey(key) if !offset.IsZero() && size.IsValid() { nm.FileCounter++ From 312e13416b26fd6f1839c09ce82a6e6fe9fec8b9 Mon Sep 17 00:00:00 2001 From: Tuan Vuong Date: Wed, 20 Jul 2022 16:43:50 +0700 Subject: [PATCH 21/25] filter duplicated action --- weed/iamapi/iamapi_management_handlers.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/weed/iamapi/iamapi_management_handlers.go b/weed/iamapi/iamapi_management_handlers.go index e1f215bd3..8a42aa936 100644 --- a/weed/iamapi/iamapi_management_handlers.go +++ b/weed/iamapi/iamapi_management_handlers.go @@ -219,8 +219,16 @@ func (iama *IamApiServer) PutUserPolicy(s3cfg *iam_pb.S3ApiConfiguration, values if userName != ident.Name { continue } + + existedActions := make(map[string]bool, len(ident.Actions)) + for _, action := range ident.Actions { + existedActions[action] = true + } + for _, action := range actions { - ident.Actions = append(ident.Actions, action) + if !existedActions[action] { + ident.Actions = append(ident.Actions, action) + } } return resp, nil } @@ -349,7 +357,8 @@ func (iama *IamApiServer) CreateAccessKey(s3cfg *iam_pb.S3ApiConfiguration, valu } if !changed { s3cfg.Identities = append(s3cfg.Identities, - &iam_pb.Identity{Name: userName, + &iam_pb.Identity{ + Name: userName, Credentials: []*iam_pb.Credential{ { AccessKey: accessKeyId, From 6147b61b0a2ec576ade74c365884ac3bb51c56b6 Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 21 Jul 2022 01:38:26 -0700 Subject: [PATCH 22/25] mount: fix concurrent map read and map write fix https://github.com/chrislusf/seaweedfs/issues/3344 --- weed/mount/weedfs_rename.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/mount/weedfs_rename.go b/weed/mount/weedfs_rename.go index 0c7de0bbb..538cfead7 100644 --- a/weed/mount/weedfs_rename.go +++ b/weed/mount/weedfs_rename.go @@ -235,7 +235,7 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR sourceInode, targetInode := wfs.inodeToPath.MovePath(oldPath, newPath) if sourceInode != 0 { - if fh, foundFh := wfs.fhmap.inode2fh[sourceInode]; foundFh && fh.entry != nil { + if fh, foundFh := wfs.fhmap.FindFileHandle(sourceInode); foundFh && fh.entry != nil { fh.entry.Name = newName } // invalidate attr and data From c93f7ffa44b294ceef242700dce3288eb8569ced Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 21 Jul 2022 18:23:53 -0700 Subject: [PATCH 23/25] explicit bucket aware declaration --- weed/filer/abstract_sql/abstract_sql_store.go | 2 ++ weed/filer/mysql2/mysql2_store.go | 2 ++ weed/filer/postgres2/postgres2_store.go | 2 ++ weed/filer/ydb/ydb_store.go | 2 ++ 4 files changed, 8 insertions(+) diff --git a/weed/filer/abstract_sql/abstract_sql_store.go b/weed/filer/abstract_sql/abstract_sql_store.go index 13268b944..a159d5272 100644 --- a/weed/filer/abstract_sql/abstract_sql_store.go +++ b/weed/filer/abstract_sql/abstract_sql_store.go @@ -32,6 +32,8 @@ type AbstractSqlStore struct { dbsLock sync.Mutex } +var _ filer.BucketAware = (*AbstractSqlStore)(nil) + func (store *AbstractSqlStore) CanDropWholeBucket() bool { return store.SupportBucketTable } diff --git a/weed/filer/mysql2/mysql2_store.go b/weed/filer/mysql2/mysql2_store.go index e50480150..792c79e44 100644 --- a/weed/filer/mysql2/mysql2_store.go +++ b/weed/filer/mysql2/mysql2_store.go @@ -18,6 +18,8 @@ const ( CONNECTION_URL_PATTERN = "%s:%s@tcp(%s:%d)/%s?charset=utf8" ) +var _ filer.BucketAware = (*MysqlStore2)(nil) + func init() { filer.Stores = append(filer.Stores, &MysqlStore2{}) } diff --git a/weed/filer/postgres2/postgres2_store.go b/weed/filer/postgres2/postgres2_store.go index 0f573d8d0..3c57e4cb4 100644 --- a/weed/filer/postgres2/postgres2_store.go +++ b/weed/filer/postgres2/postgres2_store.go @@ -17,6 +17,8 @@ const ( CONNECTION_URL_PATTERN = "host=%s port=%d sslmode=%s connect_timeout=30" ) +var _ filer.BucketAware = (*PostgresStore2)(nil) + func init() { filer.Stores = append(filer.Stores, &PostgresStore2{}) } diff --git a/weed/filer/ydb/ydb_store.go b/weed/filer/ydb/ydb_store.go index 1e3a55a09..d5751bb5a 100644 --- a/weed/filer/ydb/ydb_store.go +++ b/weed/filer/ydb/ydb_store.go @@ -320,6 +320,8 @@ func (store *YdbStore) Shutdown() { _ = store.DB.Close(context.Background()) } +var _ filer.BucketAware = (*YdbStore)(nil) + func (store *YdbStore) CanDropWholeBucket() bool { return store.SupportBucketTable } From fc8241fb5e40c82ca0b645c4f5c7ddcea1362336 Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 21 Jul 2022 18:48:51 -0700 Subject: [PATCH 24/25] leveldb3: add instant dropping bucket --- weed/filer/leveldb3/leveldb3_store.go | 16 ++++++++++---- weed/filer/leveldb3/leveldb3_store_bucket.go | 23 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 weed/filer/leveldb3/leveldb3_store_bucket.go diff --git a/weed/filer/leveldb3/leveldb3_store.go b/weed/filer/leveldb3/leveldb3_store.go index d21515bd4..8da4a9e7f 100644 --- a/weed/filer/leveldb3/leveldb3_store.go +++ b/weed/filer/leveldb3/leveldb3_store.go @@ -121,23 +121,31 @@ func (store *LevelDB3Store) findDB(fullpath weed_util.FullPath, isForChildren bo } store.dbsLock.RUnlock() - // upgrade to write lock + + db, err := store.createDB(bucket) + + return db, bucket, shortPath, err +} + +func (store *LevelDB3Store) createDB(bucket string) (*leveldb.DB, error) { + store.dbsLock.Lock() defer store.dbsLock.Unlock() // double check after getting the write lock if db, found := store.dbs[bucket]; found { - return db, bucket, shortPath, nil + return db, nil } // create db db, err := store.loadDB(bucket) if err != nil { - return nil, bucket, shortPath, err + return nil, err } + store.dbs[bucket] = db - return db, bucket, shortPath, nil + return db, nil } func (store *LevelDB3Store) closeDB(bucket string) { diff --git a/weed/filer/leveldb3/leveldb3_store_bucket.go b/weed/filer/leveldb3/leveldb3_store_bucket.go new file mode 100644 index 000000000..823fe363b --- /dev/null +++ b/weed/filer/leveldb3/leveldb3_store_bucket.go @@ -0,0 +1,23 @@ +package leveldb + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "os" +) + +var _ filer.BucketAware = (*LevelDB3Store)(nil) + +func (store *LevelDB3Store) OnBucketCreation(bucket string) { + store.createDB(bucket) +} + +func (store *LevelDB3Store) OnBucketDeletion(bucket string) { + store.closeDB(bucket) + if bucket != "" { // just to make sure + os.RemoveAll(store.dir + "/" + bucket) + } +} + +func (store *LevelDB3Store) CanDropWholeBucket() bool { + return true +} From 7a6c559ab4a6b696bb574454b297ebefabec29ed Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 21 Jul 2022 22:01:05 -0700 Subject: [PATCH 25/25] fix Change replication via volume.configure.replication by collection fix https://github.com/chrislusf/seaweedfs/issues/3346 --- .../command_volume_configure_replication.go | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/weed/shell/command_volume_configure_replication.go b/weed/shell/command_volume_configure_replication.go index 610986489..b07d58083 100644 --- a/weed/shell/command_volume_configure_replication.go +++ b/weed/shell/command_volume_configure_replication.go @@ -68,45 +68,39 @@ func (c *commandVolumeConfigureReplication) Do(args []string, commandEnv *Comman volumeFilter := getVolumeFilter(replicaPlacement, uint32(vid), *collectionPattern) // find all data nodes with volumes that needs replication change - var allLocations []location eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { - loc := newLocation(dc, string(rack), dn) + var targetVolumeIds []uint32 for _, diskInfo := range dn.DiskInfos { for _, v := range diskInfo.VolumeInfos { if volumeFilter(v) { - allLocations = append(allLocations, loc) - continue + targetVolumeIds = append(targetVolumeIds, v.Id) } } } - }) - - if len(allLocations) == 0 { - return fmt.Errorf("no volume needs change") - } - - for _, dst := range allLocations { - err := operation.WithVolumeServerClient(false, pb.NewServerAddressFromDataNode(dst.dataNode), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { - resp, configureErr := volumeServerClient.VolumeConfigure(context.Background(), &volume_server_pb.VolumeConfigureRequest{ - VolumeId: uint32(vid), - Replication: replicaPlacement.String(), - }) - if configureErr != nil { - return configureErr - } - if resp.Error != "" { - return errors.New(resp.Error) + if len(targetVolumeIds) == 0 { + return + } + err = operation.WithVolumeServerClient(false, pb.NewServerAddressFromDataNode(dn), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + for _, targetVolumeId := range targetVolumeIds { + resp, configureErr := volumeServerClient.VolumeConfigure(context.Background(), &volume_server_pb.VolumeConfigureRequest{ + VolumeId: targetVolumeId, + Replication: replicaPlacement.String(), + }) + if configureErr != nil { + return configureErr + } + if resp.Error != "" { + return errors.New(resp.Error) + } } return nil }) - if err != nil { - return err + return } + }) - } - - return nil + return err } func getVolumeFilter(replicaPlacement *super_block.ReplicaPlacement, volumeId uint32, collectionPattern string) func(message *master_pb.VolumeInformationMessage) bool {