seaweedfs/weed/replication/volume_growth.go
2013-02-10 03:31:40 -08:00

196 lines
6 KiB
Go

package replication
import (
"errors"
"fmt"
"math/rand"
"code.google.com/p/weed-fs/weed/operation"
"code.google.com/p/weed-fs/weed/storage"
"code.google.com/p/weed-fs/weed/topology"
"sync"
)
/*
This package is created to resolve these replica placement issues:
1. growth factor for each replica level, e.g., add 10 volumes for 1 copy, 20 volumes for 2 copies, 30 volumes for 3 copies
2. in time of tight storage, how to reduce replica level
3. optimizing for hot data on faster disk, cold data on cheaper storage,
4. volume allocation for each bucket
*/
type VolumeGrowth struct {
copy1factor int
copy2factor int
copy3factor int
copyAll int
accessLock sync.Mutex
}
func NewDefaultVolumeGrowth() *VolumeGrowth {
return &VolumeGrowth{copy1factor: 7, copy2factor: 6, copy3factor: 3}
}
func (vg *VolumeGrowth) GrowByType(repType storage.ReplicationType, topo *topology.Topology) (int, error) {
switch repType {
case storage.Copy000:
return vg.GrowByCountAndType(vg.copy1factor, repType, topo)
case storage.Copy001:
return vg.GrowByCountAndType(vg.copy2factor, repType, topo)
case storage.Copy010:
return vg.GrowByCountAndType(vg.copy2factor, repType, topo)
case storage.Copy100:
return vg.GrowByCountAndType(vg.copy2factor, repType, topo)
case storage.Copy110:
return vg.GrowByCountAndType(vg.copy3factor, repType, topo)
case storage.Copy200:
return vg.GrowByCountAndType(vg.copy3factor, repType, topo)
}
return 0, errors.New("Unknown Replication Type!")
}
func (vg *VolumeGrowth) GrowByCountAndType(count int, repType storage.ReplicationType, topo *topology.Topology) (counter int, err error) {
vg.accessLock.Lock()
defer vg.accessLock.Unlock()
counter = 0
switch repType {
case storage.Copy000:
for i := 0; i < count; i++ {
if ok, server, vid := topo.RandomlyReserveOneVolume(); ok {
if err = vg.grow(topo, *vid, repType, server); err == nil {
counter++
}
}
}
case storage.Copy001:
for i := 0; i < count; i++ {
//randomly pick one server, and then choose from the same rack
if ok, server1, vid := topo.RandomlyReserveOneVolume(); ok {
rack := server1.Parent()
exclusion := make(map[string]topology.Node)
exclusion[server1.String()] = server1
newNodeList := topology.NewNodeList(rack.Children(), exclusion)
if newNodeList.FreeSpace() > 0 {
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), *vid); ok2 {
if err = vg.grow(topo, *vid, repType, server1, server2); err == nil {
counter++
}
}
}
}
}
case storage.Copy010:
for i := 0; i < count; i++ {
//randomly pick one server, and then choose from the same rack
if ok, server1, vid := topo.RandomlyReserveOneVolume(); ok {
rack := server1.Parent()
dc := rack.Parent()
exclusion := make(map[string]topology.Node)
exclusion[rack.String()] = rack
newNodeList := topology.NewNodeList(dc.Children(), exclusion)
if newNodeList.FreeSpace() > 0 {
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), *vid); ok2 {
if err = vg.grow(topo, *vid, repType, server1, server2); err == nil {
counter++
}
}
}
}
}
case storage.Copy100:
for i := 0; i < count; i++ {
nl := topology.NewNodeList(topo.Children(), nil)
picked, ret := nl.RandomlyPickN(2, 1)
vid := topo.NextVolumeId()
if ret {
var servers []*topology.DataNode
for _, n := range picked {
if n.FreeSpace() > 0 {
if ok, server := n.ReserveOneVolume(rand.Intn(n.FreeSpace()), vid); ok {
servers = append(servers, server)
}
}
}
if len(servers) == 2 {
if err = vg.grow(topo, vid, repType, servers...); err == nil {
counter++
}
}
}
}
case storage.Copy110:
for i := 0; i < count; i++ {
nl := topology.NewNodeList(topo.Children(), nil)
picked, ret := nl.RandomlyPickN(2, 2)
vid := topo.NextVolumeId()
if ret {
var servers []*topology.DataNode
dc1, dc2 := picked[0], picked[1]
if dc2.FreeSpace() > dc1.FreeSpace() {
dc1, dc2 = dc2, dc1
}
if dc1.FreeSpace() > 0 {
if ok, server1 := dc1.ReserveOneVolume(rand.Intn(dc1.FreeSpace()), vid); ok {
servers = append(servers, server1)
rack := server1.Parent()
exclusion := make(map[string]topology.Node)
exclusion[rack.String()] = rack
newNodeList := topology.NewNodeList(dc1.Children(), exclusion)
if newNodeList.FreeSpace() > 0 {
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), vid); ok2 {
servers = append(servers, server2)
}
}
}
}
if dc2.FreeSpace() > 0 {
if ok, server := dc2.ReserveOneVolume(rand.Intn(dc2.FreeSpace()), vid); ok {
servers = append(servers, server)
}
}
if len(servers) == 3 {
if err = vg.grow(topo, vid, repType, servers...); err == nil {
counter++
}
}
}
}
case storage.Copy200:
for i := 0; i < count; i++ {
nl := topology.NewNodeList(topo.Children(), nil)
picked, ret := nl.RandomlyPickN(3, 1)
vid := topo.NextVolumeId()
if ret {
var servers []*topology.DataNode
for _, n := range picked {
if n.FreeSpace() > 0 {
if ok, server := n.ReserveOneVolume(rand.Intn(n.FreeSpace()), vid); ok {
servers = append(servers, server)
}
}
}
if len(servers) == 3 {
if err = vg.grow(topo, vid, repType, servers...); err == nil {
counter++
}
}
}
}
}
return
}
func (vg *VolumeGrowth) grow(topo *topology.Topology, vid storage.VolumeId, repType storage.ReplicationType, servers ...*topology.DataNode) error {
for _, server := range servers {
if err := operation.AllocateVolume(server, vid, repType); err == nil {
vi := storage.VolumeInfo{Id: vid, Size: 0, RepType: repType, Version: storage.CurrentVersion}
server.AddOrUpdateVolume(vi)
topo.RegisterVolumeLayout(&vi, server)
fmt.Println("Created Volume", vid, "on", server)
} else {
fmt.Println("Failed to assign", vid, "to", servers)
return errors.New("Failed to assign " + vid.String())
}
}
return nil
}