2012-08-27 20:52:02 +00:00
|
|
|
package replication
|
|
|
|
|
|
|
|
import (
|
2013-09-02 06:58:21 +00:00
|
|
|
"code.google.com/p/weed-fs/go/glog"
|
2013-02-10 11:49:51 +00:00
|
|
|
"code.google.com/p/weed-fs/go/operation"
|
|
|
|
"code.google.com/p/weed-fs/go/storage"
|
|
|
|
"code.google.com/p/weed-fs/go/topology"
|
2013-02-27 06:54:22 +00:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
2013-01-17 08:56:56 +00:00
|
|
|
"sync"
|
2012-08-27 20:52:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
/*
|
|
|
|
This package is created to resolve these replica placement issues:
|
|
|
|
1. growth factor for each replica level, e.g., add 10 volumes for 1 copy, 20 volumes for 2 copies, 30 volumes for 3 copies
|
|
|
|
2. in time of tight storage, how to reduce replica level
|
|
|
|
3. optimizing for hot data on faster disk, cold data on cheaper storage,
|
|
|
|
4. volume allocation for each bucket
|
|
|
|
*/
|
|
|
|
|
|
|
|
type VolumeGrowth struct {
|
|
|
|
copy1factor int
|
|
|
|
copy2factor int
|
|
|
|
copy3factor int
|
|
|
|
copyAll int
|
2012-11-13 20:13:40 +00:00
|
|
|
|
2013-01-17 08:56:56 +00:00
|
|
|
accessLock sync.Mutex
|
2012-08-27 20:52:02 +00:00
|
|
|
}
|
|
|
|
|
2012-09-17 00:31:15 +00:00
|
|
|
func NewDefaultVolumeGrowth() *VolumeGrowth {
|
|
|
|
return &VolumeGrowth{copy1factor: 7, copy2factor: 6, copy3factor: 3}
|
|
|
|
}
|
|
|
|
|
2013-11-12 10:21:22 +00:00
|
|
|
func (vg *VolumeGrowth) AutomaticGrowByType(collection string, repType storage.ReplicationType, dataCenter string, topo *topology.Topology) (count int, err error) {
|
2013-07-24 17:31:51 +00:00
|
|
|
factor := 1
|
2012-09-17 00:31:15 +00:00
|
|
|
switch repType {
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy000:
|
2013-07-24 17:31:51 +00:00
|
|
|
factor = 1
|
2013-11-12 10:21:22 +00:00
|
|
|
count, err = vg.GrowByCountAndType(vg.copy1factor, collection, repType, dataCenter, topo)
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy001:
|
2013-09-02 06:58:21 +00:00
|
|
|
factor = 2
|
2013-11-12 10:21:22 +00:00
|
|
|
count, err = vg.GrowByCountAndType(vg.copy2factor, collection, repType, dataCenter, topo)
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy010:
|
2013-09-02 06:58:21 +00:00
|
|
|
factor = 2
|
2013-11-12 10:21:22 +00:00
|
|
|
count, err = vg.GrowByCountAndType(vg.copy2factor, collection, repType, dataCenter, topo)
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy100:
|
2013-09-02 06:58:21 +00:00
|
|
|
factor = 2
|
2013-11-12 10:21:22 +00:00
|
|
|
count, err = vg.GrowByCountAndType(vg.copy2factor, collection, repType, dataCenter, topo)
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy110:
|
2013-07-24 17:31:51 +00:00
|
|
|
factor = 3
|
2013-11-12 10:21:22 +00:00
|
|
|
count, err = vg.GrowByCountAndType(vg.copy3factor, collection, repType, dataCenter, topo)
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy200:
|
2013-07-24 17:31:51 +00:00
|
|
|
factor = 3
|
2013-11-12 10:21:22 +00:00
|
|
|
count, err = vg.GrowByCountAndType(vg.copy3factor, collection, repType, dataCenter, topo)
|
2013-07-24 17:31:51 +00:00
|
|
|
default:
|
|
|
|
err = errors.New("Unknown Replication Type!")
|
2012-09-17 00:31:15 +00:00
|
|
|
}
|
2013-07-24 17:31:51 +00:00
|
|
|
if count > 0 && count%factor == 0 {
|
|
|
|
return count, nil
|
|
|
|
}
|
|
|
|
return count, err
|
2012-09-17 00:31:15 +00:00
|
|
|
}
|
2013-11-12 10:21:22 +00:00
|
|
|
func (vg *VolumeGrowth) GrowByCountAndType(count int, collection string, repType storage.ReplicationType, dataCenter string, topo *topology.Topology) (counter int, err error) {
|
2013-01-17 08:56:56 +00:00
|
|
|
vg.accessLock.Lock()
|
|
|
|
defer vg.accessLock.Unlock()
|
2012-11-13 20:13:40 +00:00
|
|
|
|
2012-09-17 06:18:47 +00:00
|
|
|
counter = 0
|
2012-09-17 00:31:15 +00:00
|
|
|
switch repType {
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy000:
|
2012-09-17 00:31:15 +00:00
|
|
|
for i := 0; i < count; i++ {
|
2013-06-20 01:10:38 +00:00
|
|
|
if ok, server, vid := topo.RandomlyReserveOneVolume(dataCenter); ok {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err = vg.grow(topo, *vid, collection, repType, server); err == nil {
|
2012-09-17 06:18:47 +00:00
|
|
|
counter++
|
2013-06-20 01:10:38 +00:00
|
|
|
} else {
|
|
|
|
return counter, err
|
2012-09-17 06:18:47 +00:00
|
|
|
}
|
2013-06-20 01:10:38 +00:00
|
|
|
} else {
|
|
|
|
return counter, fmt.Errorf("Failed to grown volume for data center %s", dataCenter)
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy001:
|
|
|
|
for i := 0; i < count; i++ {
|
2013-06-20 01:10:38 +00:00
|
|
|
//randomly pick one server from the datacenter, and then choose from the same rack
|
|
|
|
if ok, server1, vid := topo.RandomlyReserveOneVolume(dataCenter); ok {
|
2012-09-30 09:20:33 +00:00
|
|
|
rack := server1.Parent()
|
|
|
|
exclusion := make(map[string]topology.Node)
|
|
|
|
exclusion[server1.String()] = server1
|
|
|
|
newNodeList := topology.NewNodeList(rack.Children(), exclusion)
|
|
|
|
if newNodeList.FreeSpace() > 0 {
|
|
|
|
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), *vid); ok2 {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err = vg.grow(topo, *vid, collection, repType, server1, server2); err == nil {
|
2012-09-30 09:20:33 +00:00
|
|
|
counter++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case storage.Copy010:
|
|
|
|
for i := 0; i < count; i++ {
|
2013-06-20 01:10:38 +00:00
|
|
|
//randomly pick one server from the datacenter, and then choose from the a different rack
|
|
|
|
if ok, server1, vid := topo.RandomlyReserveOneVolume(dataCenter); ok {
|
2012-09-30 09:20:33 +00:00
|
|
|
rack := server1.Parent()
|
|
|
|
dc := rack.Parent()
|
|
|
|
exclusion := make(map[string]topology.Node)
|
|
|
|
exclusion[rack.String()] = rack
|
|
|
|
newNodeList := topology.NewNodeList(dc.Children(), exclusion)
|
|
|
|
if newNodeList.FreeSpace() > 0 {
|
|
|
|
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), *vid); ok2 {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err = vg.grow(topo, *vid, collection, repType, server1, server2); err == nil {
|
2012-09-30 09:20:33 +00:00
|
|
|
counter++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case storage.Copy100:
|
2012-09-17 00:31:15 +00:00
|
|
|
for i := 0; i < count; i++ {
|
2012-09-03 08:50:04 +00:00
|
|
|
nl := topology.NewNodeList(topo.Children(), nil)
|
2013-06-20 01:10:38 +00:00
|
|
|
picked, ret := nl.RandomlyPickN(2, 1, dataCenter)
|
2012-09-03 08:50:04 +00:00
|
|
|
vid := topo.NextVolumeId()
|
|
|
|
if ret {
|
2012-09-08 23:25:44 +00:00
|
|
|
var servers []*topology.DataNode
|
2012-09-03 08:50:04 +00:00
|
|
|
for _, n := range picked {
|
2012-09-17 02:18:37 +00:00
|
|
|
if n.FreeSpace() > 0 {
|
2013-06-20 01:10:38 +00:00
|
|
|
if ok, server := n.ReserveOneVolume(rand.Intn(n.FreeSpace()), vid, ""); ok {
|
2012-09-17 02:18:37 +00:00
|
|
|
servers = append(servers, server)
|
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(servers) == 2 {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err = vg.grow(topo, vid, collection, repType, servers...); err == nil {
|
2012-09-17 06:18:47 +00:00
|
|
|
counter++
|
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
2013-06-20 01:10:38 +00:00
|
|
|
} else {
|
|
|
|
return counter, fmt.Errorf("Failed to grown volume on data center %s and another data center", dataCenter)
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy110:
|
2012-09-17 00:31:15 +00:00
|
|
|
for i := 0; i < count; i++ {
|
2012-09-03 08:50:04 +00:00
|
|
|
nl := topology.NewNodeList(topo.Children(), nil)
|
2013-06-20 01:10:38 +00:00
|
|
|
picked, ret := nl.RandomlyPickN(2, 2, dataCenter)
|
2012-09-03 08:50:04 +00:00
|
|
|
vid := topo.NextVolumeId()
|
|
|
|
if ret {
|
2012-09-08 23:25:44 +00:00
|
|
|
var servers []*topology.DataNode
|
2012-09-30 09:20:33 +00:00
|
|
|
dc1, dc2 := picked[0], picked[1]
|
|
|
|
if dc2.FreeSpace() > dc1.FreeSpace() {
|
|
|
|
dc1, dc2 = dc2, dc1
|
|
|
|
}
|
|
|
|
if dc1.FreeSpace() > 0 {
|
2013-06-20 01:10:38 +00:00
|
|
|
if ok, server1 := dc1.ReserveOneVolume(rand.Intn(dc1.FreeSpace()), vid, ""); ok {
|
2012-09-30 09:20:33 +00:00
|
|
|
servers = append(servers, server1)
|
|
|
|
rack := server1.Parent()
|
|
|
|
exclusion := make(map[string]topology.Node)
|
|
|
|
exclusion[rack.String()] = rack
|
|
|
|
newNodeList := topology.NewNodeList(dc1.Children(), exclusion)
|
|
|
|
if newNodeList.FreeSpace() > 0 {
|
|
|
|
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), vid); ok2 {
|
|
|
|
servers = append(servers, server2)
|
|
|
|
}
|
2012-09-17 02:18:37 +00:00
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
2012-09-30 09:20:33 +00:00
|
|
|
if dc2.FreeSpace() > 0 {
|
2013-06-20 01:10:38 +00:00
|
|
|
if ok, server := dc2.ReserveOneVolume(rand.Intn(dc2.FreeSpace()), vid, ""); ok {
|
2012-09-30 09:20:33 +00:00
|
|
|
servers = append(servers, server)
|
|
|
|
}
|
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
if len(servers) == 3 {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err = vg.grow(topo, vid, collection, repType, servers...); err == nil {
|
2012-09-17 06:18:47 +00:00
|
|
|
counter++
|
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-09-30 09:20:33 +00:00
|
|
|
case storage.Copy200:
|
2012-09-17 00:31:15 +00:00
|
|
|
for i := 0; i < count; i++ {
|
2012-09-30 09:20:33 +00:00
|
|
|
nl := topology.NewNodeList(topo.Children(), nil)
|
2013-06-20 01:10:38 +00:00
|
|
|
picked, ret := nl.RandomlyPickN(3, 1, dataCenter)
|
2012-09-30 09:20:33 +00:00
|
|
|
vid := topo.NextVolumeId()
|
|
|
|
if ret {
|
|
|
|
var servers []*topology.DataNode
|
|
|
|
for _, n := range picked {
|
|
|
|
if n.FreeSpace() > 0 {
|
2013-06-20 01:10:38 +00:00
|
|
|
if ok, server := n.ReserveOneVolume(rand.Intn(n.FreeSpace()), vid, ""); ok {
|
2012-09-30 09:20:33 +00:00
|
|
|
servers = append(servers, server)
|
2012-09-17 06:18:47 +00:00
|
|
|
}
|
2012-09-17 00:31:15 +00:00
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
2012-09-30 09:20:33 +00:00
|
|
|
if len(servers) == 3 {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err = vg.grow(topo, vid, collection, repType, servers...); err == nil {
|
2012-09-30 09:20:33 +00:00
|
|
|
counter++
|
|
|
|
}
|
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
2012-08-27 20:52:02 +00:00
|
|
|
}
|
2012-09-17 06:18:47 +00:00
|
|
|
return
|
2012-08-27 20:52:02 +00:00
|
|
|
}
|
2013-11-12 10:21:22 +00:00
|
|
|
func (vg *VolumeGrowth) grow(topo *topology.Topology, vid storage.VolumeId, collection string, repType storage.ReplicationType, servers ...*topology.DataNode) error {
|
2012-09-03 08:50:04 +00:00
|
|
|
for _, server := range servers {
|
2013-11-12 10:21:22 +00:00
|
|
|
if err := operation.AllocateVolume(server, vid, collection, repType); err == nil {
|
|
|
|
vi := storage.VolumeInfo{Id: vid, Size: 0, Collection: collection, RepType: repType, Version: storage.CurrentVersion}
|
2012-09-17 00:31:15 +00:00
|
|
|
server.AddOrUpdateVolume(vi)
|
2012-09-19 08:45:30 +00:00
|
|
|
topo.RegisterVolumeLayout(&vi, server)
|
2013-08-09 06:57:22 +00:00
|
|
|
glog.V(0).Infoln("Created Volume", vid, "on", server)
|
2012-09-17 00:31:15 +00:00
|
|
|
} else {
|
2013-08-09 06:57:22 +00:00
|
|
|
glog.V(0).Infoln("Failed to assign", vid, "to", servers, "error", err)
|
2013-08-13 06:48:10 +00:00
|
|
|
return errors.New("Failed to assign " + vid.String() + ", " + err.Error())
|
2012-09-17 00:31:15 +00:00
|
|
|
}
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|
2012-09-17 06:18:47 +00:00
|
|
|
return nil
|
2012-09-03 08:50:04 +00:00
|
|
|
}
|