diff --git a/weed/shell/command_volume_check_disk.go b/weed/shell/command_volume_check_disk.go index 64b510383..b53229dbd 100644 --- a/weed/shell/command_volume_check_disk.go +++ b/weed/shell/command_volume_check_disk.go @@ -49,6 +49,8 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write slowMode := fsckCommand.Bool("slow", false, "slow mode checks all replicas even file counts are the same") verbose := fsckCommand.Bool("v", false, "verbose mode") applyChanges := fsckCommand.Bool("force", false, "apply the fix") + missingKeysThreshold := fsckCommand.Float64("missingKeysThreshold", 0.3, "repair when missing keys is not more than this limit") + if err = fsckCommand.Parse(args); err != nil { return nil } @@ -101,10 +103,10 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write } // find and make up the differnces - if err := c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges); err != nil { + if err := c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges, *missingKeysThreshold); err != nil { return err } - if err := c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges); err != nil { + if err := c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges, *missingKeysThreshold); err != nil { return err } replicas = replicas[1:] @@ -114,7 +116,7 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write return nil } -func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool) error { +func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool, missingKeysThreshold float64) error { // find missing keys // hash join, can be more efficient @@ -129,6 +131,12 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m }) fmt.Fprintf(writer, "volume %d %s has %d entries, %s missed %d entries\n", source.info.Id, source.location.dataNode.Id, counter, target.location.dataNode.Id, len(missingNeedles)) + missingNeedlesPercent := float64(len(missingNeedles)) / float64(counter) + if missingNeedlesPercent > missingKeysThreshold { + return fmt.Errorf( + "failed to start repair volume %d, percentage of missing keys is greater than the threshold: %.2f > %.2f", + source.info.Id, missingNeedlesPercent, missingKeysThreshold) + } for _, needleValue := range missingNeedles { @@ -151,7 +159,6 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m } - return nil } @@ -161,8 +168,8 @@ func (c *commandVolumeCheckDisk) readSourceNeedleBlob(sourceVolumeServer string, resp, err := client.ReadNeedleBlob(context.Background(), &volume_server_pb.ReadNeedleBlobRequest{ VolumeId: volumeId, NeedleId: uint64(needleValue.Key), - Offset: needleValue.Offset.ToActualOffset(), - Size: int32(needleValue.Size), + Offset: needleValue.Offset.ToActualOffset(), + Size: int32(needleValue.Size), }) if err != nil { return err @@ -177,9 +184,9 @@ func (c *commandVolumeCheckDisk) writeNeedleBlobToTarget(targetVolumeServer stri return operation.WithVolumeServerClient(targetVolumeServer, c.env.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, err := client.WriteNeedleBlob(context.Background(), &volume_server_pb.WriteNeedleBlobRequest{ - VolumeId: volumeId, - NeedleId: uint64(needleValue.Key), - Size: int32(needleValue.Size), + VolumeId: volumeId, + NeedleId: uint64(needleValue.Key), + Size: int32(needleValue.Size), NeedleBlob: needleBlob, }) return err