fix(ec): volumes created by foreign collection due to bug in ec balance (#4864)

* fix(ec): ignore 0 byte data files

refers to parts of #4861

Signed-off-by: Tobias Gurtzick <magic@wizardtales.com>

* fix(ec): ignore volumes not from the current collection during balance

fixes #4861

Signed-off-by: Tobias Gurtzick <magic@wizardtales.com>

---------

Signed-off-by: Tobias Gurtzick <magic@wizardtales.com>
This commit is contained in:
Tobias Gurtzick 2023-09-25 19:35:43 +02:00 committed by GitHub
parent faabc695b6
commit 78dbac7702
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 8 deletions

View file

@ -3,12 +3,13 @@ package shell
import (
"flag"
"fmt"
"io"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"golang.org/x/exp/slices"
"io"
)
func init() {
@ -184,7 +185,7 @@ func balanceEcVolumes(commandEnv *CommandEnv, collection string, allEcNodes []*E
func deleteDuplicatedEcShards(commandEnv *CommandEnv, allEcNodes []*EcNode, collection string, applyBalancing bool) error {
// vid => []ecNode
vidLocations := collectVolumeIdToEcNodes(allEcNodes)
vidLocations := collectVolumeIdToEcNodes(allEcNodes, collection)
// deduplicate ec shards
for vid, locations := range vidLocations {
if err := doDeduplicateEcShards(commandEnv, collection, vid, locations, applyBalancing); err != nil {
@ -230,7 +231,7 @@ func doDeduplicateEcShards(commandEnv *CommandEnv, collection string, vid needle
func balanceEcShardsAcrossRacks(commandEnv *CommandEnv, allEcNodes []*EcNode, racks map[RackId]*EcRack, collection string, applyBalancing bool) error {
// collect vid => []ecNode, since previous steps can change the locations
vidLocations := collectVolumeIdToEcNodes(allEcNodes)
vidLocations := collectVolumeIdToEcNodes(allEcNodes, collection)
// spread the ec shards evenly
for vid, locations := range vidLocations {
if err := doBalanceEcShardsAcrossRacks(commandEnv, collection, vid, locations, racks, applyBalancing); err != nil {
@ -309,7 +310,7 @@ func pickOneRack(rackToEcNodes map[RackId]*EcRack, rackToShardCount map[string]i
func balanceEcShardsWithinRacks(commandEnv *CommandEnv, allEcNodes []*EcNode, racks map[RackId]*EcRack, collection string, applyBalancing bool) error {
// collect vid => []ecNode, since previous steps can change the locations
vidLocations := collectVolumeIdToEcNodes(allEcNodes)
vidLocations := collectVolumeIdToEcNodes(allEcNodes, collection)
// spread the ec shards evenly
for vid, locations := range vidLocations {
@ -520,7 +521,7 @@ func pickNEcShardsToMoveFrom(ecNodes []*EcNode, vid needle.VolumeId, n int) map[
return picked
}
func collectVolumeIdToEcNodes(allEcNodes []*EcNode) map[needle.VolumeId][]*EcNode {
func collectVolumeIdToEcNodes(allEcNodes []*EcNode, collection string) map[needle.VolumeId][]*EcNode {
vidLocations := make(map[needle.VolumeId][]*EcNode)
for _, ecNode := range allEcNodes {
diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]
@ -528,7 +529,10 @@ func collectVolumeIdToEcNodes(allEcNodes []*EcNode) map[needle.VolumeId][]*EcNod
continue
}
for _, shardInfo := range diskInfo.EcShardInfos {
vidLocations[needle.VolumeId(shardInfo.Id)] = append(vidLocations[needle.VolumeId(shardInfo.Id)], ecNode)
// ignore if not in current collection
if shardInfo.Collection == collection {
vidLocations[needle.VolumeId(shardInfo.Id)] = append(vidLocations[needle.VolumeId(shardInfo.Id)], ecNode)
}
}
}
return vidLocations

View file

@ -2,13 +2,14 @@ package storage
import (
"fmt"
"golang.org/x/exp/slices"
"os"
"path"
"regexp"
"strconv"
"strings"
"golang.org/x/exp/slices"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
)
@ -163,7 +164,15 @@ func (l *DiskLocation) loadAllEcShards() (err error) {
continue
}
if re.MatchString(ext) {
info, err := fileInfo.Info()
if err != nil {
continue
}
// 0 byte files should be only appearing erroneously for ec data files
// so we ignore them
if re.MatchString(ext) && info.Size() > 0 {
if prevVolumeId == 0 || volumeId == prevVolumeId {
sameVolumeShards = append(sameVolumeShards, fileInfo.Name())
} else {