ahead of time volume assignment

This commit is contained in:
qieqieplus 2021-05-06 18:46:14 +08:00
parent c48ef78670
commit c4d32f6937
10 changed files with 202 additions and 70 deletions

View file

@ -4,15 +4,68 @@ import (
"context" "context"
"fmt" "fmt"
"github.com/chrislusf/raft" "github.com/chrislusf/raft"
"github.com/chrislusf/seaweedfs/weed/storage/types" "reflect"
"sync"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
"github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/topology" "github.com/chrislusf/seaweedfs/weed/topology"
) )
func (ms *MasterServer) ProcessGrowRequest() {
go func() {
filter := sync.Map{}
for {
req, ok := <-ms.vgCh
if !ok {
break
}
if !ms.Topo.IsLeader() {
//discard buffered requests
time.Sleep(time.Second * 1)
continue
}
// filter out identical requests being processed
found := false
filter.Range(func(k, v interface{}) bool {
if reflect.DeepEqual(k, req) {
found = true
}
return !found
})
// not atomic but it's okay
if !found && ms.shouldVolumeGrow(req.Option) {
filter.Store(req, nil)
// we have lock called inside vg
go func() {
glog.V(1).Infoln("starting automatic volume grow")
start := time.Now()
_, err := ms.vg.AutomaticGrowByType(req.Option, ms.grpcDialOption, ms.Topo, req.Count)
glog.V(1).Infoln("finished automatic volume grow, cost ", time.Now().Sub(start))
if req.ErrCh != nil {
req.ErrCh <- err
close(req.ErrCh)
}
filter.Delete(req)
}()
} else {
glog.V(4).Infoln("discard volume grow request")
}
}
}()
}
func (ms *MasterServer) LookupVolume(ctx context.Context, req *master_pb.LookupVolumeRequest) (*master_pb.LookupVolumeResponse, error) { func (ms *MasterServer) LookupVolume(ctx context.Context, req *master_pb.LookupVolumeRequest) (*master_pb.LookupVolumeResponse, error) {
if !ms.Topo.IsLeader() { if !ms.Topo.IsLeader() {
@ -68,31 +121,32 @@ func (ms *MasterServer) Assign(ctx context.Context, req *master_pb.AssignRequest
ReplicaPlacement: replicaPlacement, ReplicaPlacement: replicaPlacement,
Ttl: ttl, Ttl: ttl,
DiskType: diskType, DiskType: diskType,
Prealloacte: ms.preallocateSize, Preallocate: ms.preallocateSize,
DataCenter: req.DataCenter, DataCenter: req.DataCenter,
Rack: req.Rack, Rack: req.Rack,
DataNode: req.DataNode, DataNode: req.DataNode,
MemoryMapMaxSizeMb: req.MemoryMapMaxSizeMb, MemoryMapMaxSizeMb: req.MemoryMapMaxSizeMb,
} }
if !ms.Topo.HasWritableVolume(option) { if ms.shouldVolumeGrow(option) {
if ms.Topo.AvailableSpaceFor(option) <= 0 { if ms.Topo.AvailableSpaceFor(option) <= 0 {
return nil, fmt.Errorf("no free volumes left for " + option.String()) return nil, fmt.Errorf("no free volumes left for " + option.String())
} }
ms.vgLock.Lock() ms.vgCh <- &topology.VolumeGrowRequest{
if !ms.Topo.HasWritableVolume(option) { Option: option,
if _, err = ms.vg.AutomaticGrowByType(option, ms.grpcDialOption, ms.Topo, int(req.WritableVolumeCount)); err != nil { Count: int(req.WritableVolumeCount),
ms.vgLock.Unlock()
return nil, fmt.Errorf("Cannot grow volume group! %v", err)
} }
} }
ms.vgLock.Unlock()
}
fid, count, dn, err := ms.Topo.PickForWrite(req.Count, option)
if err != nil {
return nil, fmt.Errorf("%v", err)
}
var (
lastErr error
maxTimeout = time.Second * 10
startTime = time.Now()
)
for time.Now().Sub(startTime) < maxTimeout {
fid, count, dn, err := ms.Topo.PickForWrite(req.Count, option)
if err == nil {
return &master_pb.AssignResponse{ return &master_pb.AssignResponse{
Fid: fid, Fid: fid,
Url: dn.Url(), Url: dn.Url(),
@ -100,6 +154,12 @@ func (ms *MasterServer) Assign(ctx context.Context, req *master_pb.AssignRequest
Count: count, Count: count,
Auth: string(security.GenJwt(ms.guard.SigningKey, ms.guard.ExpiresAfterSec, fid)), Auth: string(security.GenJwt(ms.guard.SigningKey, ms.guard.ExpiresAfterSec, fid)),
}, nil }, nil
}
//glog.V(4).Infoln("waiting for volume growing...")
lastErr = err
time.Sleep(200 * time.Millisecond)
}
return nil, lastErr
} }
func (ms *MasterServer) Statistics(ctx context.Context, req *master_pb.StatisticsRequest) (*master_pb.StatisticsResponse, error) { func (ms *MasterServer) Statistics(ctx context.Context, req *master_pb.StatisticsRequest) (*master_pb.StatisticsResponse, error) {

View file

@ -53,7 +53,7 @@ type MasterServer struct {
Topo *topology.Topology Topo *topology.Topology
vg *topology.VolumeGrowth vg *topology.VolumeGrowth
vgLock sync.Mutex vgCh chan *topology.VolumeGrowRequest
boundedLeaderChan chan int boundedLeaderChan chan int
@ -82,6 +82,12 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers []string) *Maste
v.SetDefault("master.replication.treat_replication_as_minimums", false) v.SetDefault("master.replication.treat_replication_as_minimums", false)
replicationAsMin := v.GetBool("master.replication.treat_replication_as_minimums") replicationAsMin := v.GetBool("master.replication.treat_replication_as_minimums")
v.SetDefault("master.volume_growth.copy_1", 7)
v.SetDefault("master.volume_growth.copy_2", 6)
v.SetDefault("master.volume_growth.copy_3", 3)
v.SetDefault("master.volume_growth.copy_other", 1)
v.SetDefault("master.volume_growth.threshold", 0.9)
var preallocateSize int64 var preallocateSize int64
if option.VolumePreallocate { if option.VolumePreallocate {
preallocateSize = int64(option.VolumeSizeLimitMB) * (1 << 20) preallocateSize = int64(option.VolumeSizeLimitMB) * (1 << 20)
@ -91,6 +97,7 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers []string) *Maste
ms := &MasterServer{ ms := &MasterServer{
option: option, option: option,
preallocateSize: preallocateSize, preallocateSize: preallocateSize,
vgCh: make(chan *topology.VolumeGrowRequest, 1 << 6),
clientChans: make(map[string]chan *master_pb.VolumeLocation), clientChans: make(map[string]chan *master_pb.VolumeLocation),
grpcDialOption: grpcDialOption, grpcDialOption: grpcDialOption,
MasterClient: wdclient.NewMasterClient(grpcDialOption, "master", option.Host, 0, "", peers), MasterClient: wdclient.NewMasterClient(grpcDialOption, "master", option.Host, 0, "", peers),
@ -128,7 +135,14 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers []string) *Maste
r.HandleFunc("/{fileId}", ms.redirectHandler) r.HandleFunc("/{fileId}", ms.redirectHandler)
} }
ms.Topo.StartRefreshWritableVolumes(ms.grpcDialOption, ms.option.GarbageThreshold, ms.preallocateSize) ms.Topo.StartRefreshWritableVolumes(
ms.grpcDialOption,
ms.option.GarbageThreshold,
v.GetFloat64("master.volume_growth.threshold"),
ms.preallocateSize,
)
ms.ProcessGrowRequest()
ms.startAdminScripts() ms.startAdminScripts()

View file

@ -10,6 +10,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/stats" "github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/topology"
) )
func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volumeLocations map[string]operation.LookupResult) { func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volumeLocations map[string]operation.LookupResult) {
@ -111,19 +112,20 @@ func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request)
return return
} }
if !ms.Topo.HasWritableVolume(option) { if ms.shouldVolumeGrow(option) {
if ms.Topo.AvailableSpaceFor(option) <= 0 { if ms.Topo.AvailableSpaceFor(option) <= 0 {
writeJsonQuiet(w, r, http.StatusNotFound, operation.AssignResult{Error: "No free volumes left for " + option.String()}) writeJsonQuiet(w, r, http.StatusNotFound, operation.AssignResult{Error: "No free volumes left for " + option.String()})
return return
} }
ms.vgLock.Lock() errCh := make(chan error, 1)
defer ms.vgLock.Unlock() ms.vgCh <- &topology.VolumeGrowRequest{
if !ms.Topo.HasWritableVolume(option) { Option: option,
if _, err = ms.vg.AutomaticGrowByType(option, ms.grpcDialOption, ms.Topo, writableVolumeCount); err != nil { Count: writableVolumeCount,
writeJsonError(w, r, http.StatusInternalServerError, ErrCh: errCh,
fmt.Errorf("Cannot grow volume group! %v", err))
return
} }
if err := <- errCh; err != nil {
writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("cannot grow volume group! %v", err))
return
} }
} }
fid, count, dn, err := ms.Topo.PickForWrite(requestedCount, option) fid, count, dn, err := ms.Topo.PickForWrite(requestedCount, option)

View file

@ -3,7 +3,6 @@ package weed_server
import ( import (
"context" "context"
"fmt" "fmt"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"math/rand" "math/rand"
"net/http" "net/http"
"strconv" "strconv"
@ -14,6 +13,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/backend/memory_map" "github.com/chrislusf/seaweedfs/weed/storage/backend/memory_map"
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/topology" "github.com/chrislusf/seaweedfs/weed/topology"
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
) )
@ -136,9 +136,11 @@ func (ms *MasterServer) submitFromMasterServerHandler(w http.ResponseWriter, r *
} }
} }
func (ms *MasterServer) HasWritableVolume(option *topology.VolumeGrowOption) bool { func (ms *MasterServer) shouldVolumeGrow(option *topology.VolumeGrowOption) bool {
vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType) vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
return vl.GetActiveVolumeCount(option) > 0 active, high := vl.GetActiveVolumeCount(option)
//glog.V(0).Infof("active volume: %d, high usage volume: %d\n", active, high)
return active <= high
} }
func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGrowOption, error) { func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGrowOption, error) {
@ -172,7 +174,7 @@ func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGr
ReplicaPlacement: replicaPlacement, ReplicaPlacement: replicaPlacement,
Ttl: ttl, Ttl: ttl,
DiskType: diskType, DiskType: diskType,
Prealloacte: preallocate, Preallocate: preallocate,
DataCenter: r.FormValue("dataCenter"), DataCenter: r.FormValue("dataCenter"),
Rack: r.FormValue("rack"), Rack: r.FormValue("rack"),
DataNode: r.FormValue("dataNode"), DataNode: r.FormValue("dataNode"),

View file

@ -22,7 +22,7 @@ func AllocateVolume(dn *DataNode, grpcDialOption grpc.DialOption, vid needle.Vol
Collection: option.Collection, Collection: option.Collection,
Replication: option.ReplicaPlacement.String(), Replication: option.ReplicaPlacement.String(),
Ttl: option.Ttl.String(), Ttl: option.Ttl.String(),
Preallocate: option.Prealloacte, Preallocate: option.Preallocate,
MemoryMapMaxSizeMb: option.MemoryMapMaxSizeMb, MemoryMapMaxSizeMb: option.MemoryMapMaxSizeMb,
DiskType: string(option.DiskType), DiskType: string(option.DiskType),
}) })

View file

@ -25,7 +25,7 @@ type Node interface {
SetParent(Node) SetParent(Node)
LinkChildNode(node Node) LinkChildNode(node Node)
UnlinkChildNode(nodeId NodeId) UnlinkChildNode(nodeId NodeId)
CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64, growThreshold float64)
IsDataNode() bool IsDataNode() bool
IsRack() bool IsRack() bool
@ -235,20 +235,22 @@ func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) {
} }
} }
func (n *NodeImpl) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64) { func (n *NodeImpl) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64, growThreshold float64) {
if n.IsRack() { if n.IsRack() {
for _, c := range n.Children() { for _, c := range n.Children() {
dn := c.(*DataNode) //can not cast n to DataNode dn := c.(*DataNode) //can not cast n to DataNode
for _, v := range dn.GetVolumes() { for _, v := range dn.GetVolumes() {
if uint64(v.Size) >= volumeSizeLimit { if v.Size >= volumeSizeLimit {
//fmt.Println("volume",v.Id,"size",v.Size,">",volumeSizeLimit) //fmt.Println("volume",v.Id,"size",v.Size,">",volumeSizeLimit)
n.GetTopology().chanFullVolumes <- v n.GetTopology().chanFullVolumes <- &v
}else if float64(v.Size) > float64(volumeSizeLimit) * growThreshold {
n.GetTopology().chanCrowdedVolumes <- &v
} }
} }
} }
} else { } else {
for _, c := range n.Children() { for _, c := range n.Children() {
c.CollectDeadNodeAndFullVolumes(freshThreshHold, volumeSizeLimit) c.CollectDeadNodeAndFullVolumes(freshThreshHold, volumeSizeLimit, growThreshold)
} }
} }
} }

View file

@ -34,7 +34,8 @@ type Topology struct {
Sequence sequence.Sequencer Sequence sequence.Sequencer
chanFullVolumes chan storage.VolumeInfo chanFullVolumes chan *storage.VolumeInfo
chanCrowdedVolumes chan *storage.VolumeInfo
Configuration *Configuration Configuration *Configuration
@ -56,7 +57,8 @@ func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, puls
t.Sequence = seq t.Sequence = seq
t.chanFullVolumes = make(chan storage.VolumeInfo) t.chanFullVolumes = make(chan *storage.VolumeInfo)
t.chanCrowdedVolumes = make(chan *storage.VolumeInfo)
t.Configuration = &Configuration{} t.Configuration = &Configuration{}
@ -122,9 +124,11 @@ func (t *Topology) NextVolumeId() (needle.VolumeId, error) {
return next, nil return next, nil
} }
// deprecated
func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool { func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool {
vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType) vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
return vl.GetActiveVolumeCount(option) > 0 active, _ := vl.GetActiveVolumeCount(option)
return active > 0
} }
func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) { func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) {

View file

@ -10,12 +10,12 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/storage"
) )
func (t *Topology) StartRefreshWritableVolumes(grpcDialOption grpc.DialOption, garbageThreshold float64, preallocate int64) { func (t *Topology) StartRefreshWritableVolumes(grpcDialOption grpc.DialOption, garbageThreshold float64, growThreshold float64, preallocate int64) {
go func() { go func() {
for { for {
if t.IsLeader() { if t.IsLeader() {
freshThreshHold := time.Now().Unix() - 3*t.pulse //3 times of sleep interval freshThreshHold := time.Now().Unix() - 3*t.pulse //3 times of sleep interval
t.CollectDeadNodeAndFullVolumes(freshThreshHold, t.volumeSizeLimit) t.CollectDeadNodeAndFullVolumes(freshThreshHold, t.volumeSizeLimit, growThreshold)
} }
time.Sleep(time.Duration(float32(t.pulse*1e3)*(1+rand.Float32())) * time.Millisecond) time.Sleep(time.Duration(float32(t.pulse*1e3)*(1+rand.Float32())) * time.Millisecond)
} }
@ -31,13 +31,15 @@ func (t *Topology) StartRefreshWritableVolumes(grpcDialOption grpc.DialOption, g
go func() { go func() {
for { for {
select { select {
case v := <-t.chanFullVolumes: case fv := <-t.chanFullVolumes:
t.SetVolumeCapacityFull(v) t.SetVolumeCapacityFull(fv)
case cv := <-t.chanCrowdedVolumes:
t.SetVolumeCrowded(cv)
} }
} }
}() }()
} }
func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool { func (t *Topology) SetVolumeCapacityFull(volumeInfo *storage.VolumeInfo) bool {
diskType := types.ToDiskType(volumeInfo.DiskType) diskType := types.ToDiskType(volumeInfo.DiskType)
vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl, diskType) vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl, diskType)
if !vl.SetVolumeCapacityFull(volumeInfo.Id) { if !vl.SetVolumeCapacityFull(volumeInfo.Id) {
@ -60,6 +62,13 @@ func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool {
} }
return true return true
} }
func (t *Topology) SetVolumeCrowded(volumeInfo *storage.VolumeInfo) {
diskType := types.ToDiskType(volumeInfo.DiskType)
vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl, diskType)
vl.SetVolumeCrowded(volumeInfo.Id)
}
func (t *Topology) UnRegisterDataNode(dn *DataNode) { func (t *Topology) UnRegisterDataNode(dn *DataNode) {
for _, v := range dn.GetVolumes() { for _, v := range dn.GetVolumes() {
glog.V(0).Infoln("Removing Volume", v.Id, "from the dead volume server", dn.Id()) glog.V(0).Infoln("Removing Volume", v.Id, "from the dead volume server", dn.Id())

View file

@ -3,18 +3,17 @@ package topology
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"math/rand" "math/rand"
"sync" "sync"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/util"
"google.golang.org/grpc" "google.golang.org/grpc"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
) )
/* /*
@ -25,12 +24,18 @@ This package is created to resolve these replica placement issues:
4. volume allocation for each bucket 4. volume allocation for each bucket
*/ */
type VolumeGrowRequest struct {
Option *VolumeGrowOption
Count int
ErrCh chan error
}
type VolumeGrowOption struct { type VolumeGrowOption struct {
Collection string `json:"collection,omitempty"` Collection string `json:"collection,omitempty"`
ReplicaPlacement *super_block.ReplicaPlacement `json:"replication,omitempty"` ReplicaPlacement *super_block.ReplicaPlacement `json:"replication,omitempty"`
Ttl *needle.TTL `json:"ttl,omitempty"` Ttl *needle.TTL `json:"ttl,omitempty"`
DiskType types.DiskType `json:"disk,omitempty"` DiskType types.DiskType `json:"disk,omitempty"`
Prealloacte int64 `json:"prealloacte,omitempty"` Preallocate int64 `json:"preallocate,omitempty"`
DataCenter string `json:"dataCenter,omitempty"` DataCenter string `json:"dataCenter,omitempty"`
Rack string `json:"rack,omitempty"` Rack string `json:"rack,omitempty"`
DataNode string `json:"dataNode,omitempty"` DataNode string `json:"dataNode,omitempty"`
@ -46,6 +51,11 @@ func (o *VolumeGrowOption) String() string {
return string(blob) return string(blob)
} }
func (o *VolumeGrowOption) Threshold() float64 {
v := util.GetViper()
return v.GetFloat64("master.volume_growth.threshold")
}
func NewDefaultVolumeGrowth() *VolumeGrowth { func NewDefaultVolumeGrowth() *VolumeGrowth {
return &VolumeGrowth{} return &VolumeGrowth{}
} }
@ -54,10 +64,6 @@ func NewDefaultVolumeGrowth() *VolumeGrowth {
// given copyCount, how many logical volumes to create // given copyCount, how many logical volumes to create
func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) { func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) {
v := util.GetViper() v := util.GetViper()
v.SetDefault("master.volume_growth.copy_1", 7)
v.SetDefault("master.volume_growth.copy_2", 6)
v.SetDefault("master.volume_growth.copy_3", 3)
v.SetDefault("master.volume_growth.copy_other", 1)
switch copyCount { switch copyCount {
case 1: case 1:
count = v.GetInt("master.volume_growth.copy_1") count = v.GetInt("master.volume_growth.copy_1")

View file

@ -27,6 +27,7 @@ type volumeState string
const ( const (
readOnlyState volumeState = "ReadOnly" readOnlyState volumeState = "ReadOnly"
oversizedState = "Oversized" oversizedState = "Oversized"
crowdedState = "Crowded"
) )
type stateIndicator func(copyState) bool type stateIndicator func(copyState) bool
@ -107,6 +108,7 @@ type VolumeLayout struct {
diskType types.DiskType diskType types.DiskType
vid2location map[needle.VolumeId]*VolumeLocationList vid2location map[needle.VolumeId]*VolumeLocationList
writables []needle.VolumeId // transient array of writable volume id writables []needle.VolumeId // transient array of writable volume id
crowded map[needle.VolumeId]interface{}
readonlyVolumes *volumesBinaryState // readonly volumes readonlyVolumes *volumesBinaryState // readonly volumes
oversizedVolumes *volumesBinaryState // oversized volumes oversizedVolumes *volumesBinaryState // oversized volumes
volumeSizeLimit uint64 volumeSizeLimit uint64
@ -127,6 +129,7 @@ func NewVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType
diskType: diskType, diskType: diskType,
vid2location: make(map[needle.VolumeId]*VolumeLocationList), vid2location: make(map[needle.VolumeId]*VolumeLocationList),
writables: *new([]needle.VolumeId), writables: *new([]needle.VolumeId),
crowded: make(map[needle.VolumeId]interface{}),
readonlyVolumes: NewVolumesBinaryState(readOnlyState, rp, ExistCopies()), readonlyVolumes: NewVolumesBinaryState(readOnlyState, rp, ExistCopies()),
oversizedVolumes: NewVolumesBinaryState(oversizedState, rp, ExistCopies()), oversizedVolumes: NewVolumesBinaryState(oversizedState, rp, ExistCopies()),
volumeSizeLimit: volumeSizeLimit, volumeSizeLimit: volumeSizeLimit,
@ -273,7 +276,7 @@ func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (*n
lenWriters := len(vl.writables) lenWriters := len(vl.writables)
if lenWriters <= 0 { if lenWriters <= 0 {
glog.V(0).Infoln("No more writable volumes!") //glog.V(0).Infoln("No more writable volumes!")
return nil, 0, nil, errors.New("No more writable volumes!") return nil, 0, nil, errors.New("No more writable volumes!")
} }
if option.DataCenter == "" { if option.DataCenter == "" {
@ -307,14 +310,13 @@ func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (*n
return &vid, count, locationList, nil return &vid, count, locationList, nil
} }
func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) int { func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) (active, crowded int) {
vl.accessLock.RLock() vl.accessLock.RLock()
defer vl.accessLock.RUnlock() defer vl.accessLock.RUnlock()
if option.DataCenter == "" { if option.DataCenter == "" {
return len(vl.writables) return len(vl.writables), len(vl.crowded)
} }
counter := 0
for _, v := range vl.writables { for _, v := range vl.writables {
for _, dn := range vl.vid2location[v].list { for _, dn := range vl.vid2location[v].list {
if dn.GetDataCenter().Id() == NodeId(option.DataCenter) { if dn.GetDataCenter().Id() == NodeId(option.DataCenter) {
@ -324,11 +326,15 @@ func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) int {
if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) { if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) {
continue continue
} }
counter++ active++
info, _ := dn.GetVolumesById(v)
if float64(info.Size) > float64(vl.volumeSizeLimit)*option.Threshold() {
crowded++
} }
} }
} }
return counter }
return
} }
func (vl *VolumeLayout) removeFromWritable(vid needle.VolumeId) bool { func (vl *VolumeLayout) removeFromWritable(vid needle.VolumeId) bool {
@ -342,6 +348,7 @@ func (vl *VolumeLayout) removeFromWritable(vid needle.VolumeId) bool {
if toDeleteIndex >= 0 { if toDeleteIndex >= 0 {
glog.V(0).Infoln("Volume", vid, "becomes unwritable") glog.V(0).Infoln("Volume", vid, "becomes unwritable")
vl.writables = append(vl.writables[0:toDeleteIndex], vl.writables[toDeleteIndex+1:]...) vl.writables = append(vl.writables[0:toDeleteIndex], vl.writables[toDeleteIndex+1:]...)
vl.removeFromCrowded(vid)
return true return true
} }
return false return false
@ -408,6 +415,32 @@ func (vl *VolumeLayout) SetVolumeCapacityFull(vid needle.VolumeId) bool {
return vl.removeFromWritable(vid) return vl.removeFromWritable(vid)
} }
func (vl *VolumeLayout) removeFromCrowded(vid needle.VolumeId) {
delete(vl.crowded, vid)
}
func (vl *VolumeLayout) setVolumeCrowded(vid needle.VolumeId) {
if _, ok := vl.crowded[vid]; !ok {
vl.crowded[vid] = nil
glog.V(0).Infoln("Volume", vid, "becomes crowded")
}
}
func (vl *VolumeLayout) SetVolumeCrowded(vid needle.VolumeId) {
// since delete is guarded by accessLock.Lock(),
// and is always called in sequential order,
// RLock() should be safe enough
vl.accessLock.RLock()
defer vl.accessLock.RUnlock()
for _, v := range vl.writables {
if v == vid {
vl.setVolumeCrowded(vid)
break
}
}
}
func (vl *VolumeLayout) ToMap() map[string]interface{} { func (vl *VolumeLayout) ToMap() map[string]interface{} {
m := make(map[string]interface{}) m := make(map[string]interface{})
m["replication"] = vl.rp.String() m["replication"] = vl.rp.String()