seaweedfs/go/replication/volume_growth.go

215 lines
6.8 KiB
Go
Raw Normal View History

package replication
import (
"code.google.com/p/weed-fs/go/operation"
"code.google.com/p/weed-fs/go/storage"
"code.google.com/p/weed-fs/go/topology"
2013-02-27 14:54:22 +08:00
"errors"
"fmt"
"code.google.com/p/weed-fs/go/glog"
2013-02-27 14:54:22 +08:00
"math/rand"
2013-01-17 16:56:56 +08:00
"sync"
)
/*
This package is created to resolve these replica placement issues:
1. growth factor for each replica level, e.g., add 10 volumes for 1 copy, 20 volumes for 2 copies, 30 volumes for 3 copies
2. in time of tight storage, how to reduce replica level
3. optimizing for hot data on faster disk, cold data on cheaper storage,
4. volume allocation for each bucket
*/
type VolumeGrowth struct {
copy1factor int
copy2factor int
copy3factor int
copyAll int
2012-11-14 04:13:40 +08:00
2013-01-17 16:56:56 +08:00
accessLock sync.Mutex
}
func NewDefaultVolumeGrowth() *VolumeGrowth {
return &VolumeGrowth{copy1factor: 7, copy2factor: 6, copy3factor: 3}
}
func (vg *VolumeGrowth) AutomaticGrowByType(repType storage.ReplicationType, dataCenter string, topo *topology.Topology) (count int, err error) {
factor := 1
switch repType {
2012-09-30 17:20:33 +08:00
case storage.Copy000:
factor = 1
count, err = vg.GrowByCountAndType(vg.copy1factor, repType, dataCenter, topo)
2012-09-30 17:20:33 +08:00
case storage.Copy001:
factor = 2
count, err = vg.GrowByCountAndType(vg.copy2factor, repType, dataCenter, topo)
2012-09-30 17:20:33 +08:00
case storage.Copy010:
factor = 2
count, err = vg.GrowByCountAndType(vg.copy2factor, repType, dataCenter, topo)
2012-09-30 17:20:33 +08:00
case storage.Copy100:
factor = 2
count, err = vg.GrowByCountAndType(vg.copy2factor, repType, dataCenter, topo)
2012-09-30 17:20:33 +08:00
case storage.Copy110:
factor = 3
count, err = vg.GrowByCountAndType(vg.copy3factor, repType, dataCenter, topo)
2012-09-30 17:20:33 +08:00
case storage.Copy200:
factor = 3
count, err = vg.GrowByCountAndType(vg.copy3factor, repType, dataCenter, topo)
default:
err = errors.New("Unknown Replication Type!")
}
if count > 0 && count%factor == 0 {
return count, nil
}
return count, err
}
func (vg *VolumeGrowth) GrowByCountAndType(count int, repType storage.ReplicationType, dataCenter string, topo *topology.Topology) (counter int, err error) {
2013-01-17 16:56:56 +08:00
vg.accessLock.Lock()
defer vg.accessLock.Unlock()
2012-11-14 04:13:40 +08:00
counter = 0
switch repType {
2012-09-30 17:20:33 +08:00
case storage.Copy000:
for i := 0; i < count; i++ {
if ok, server, vid := topo.RandomlyReserveOneVolume(dataCenter); ok {
if err = vg.grow(topo, *vid, repType, server); err == nil {
counter++
} else {
return counter, err
}
} else {
return counter, fmt.Errorf("Failed to grown volume for data center %s", dataCenter)
2012-09-03 16:50:04 +08:00
}
}
2012-09-30 17:20:33 +08:00
case storage.Copy001:
for i := 0; i < count; i++ {
//randomly pick one server from the datacenter, and then choose from the same rack
if ok, server1, vid := topo.RandomlyReserveOneVolume(dataCenter); ok {
2012-09-30 17:20:33 +08:00
rack := server1.Parent()
exclusion := make(map[string]topology.Node)
exclusion[server1.String()] = server1
newNodeList := topology.NewNodeList(rack.Children(), exclusion)
if newNodeList.FreeSpace() > 0 {
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), *vid); ok2 {
if err = vg.grow(topo, *vid, repType, server1, server2); err == nil {
counter++
}
}
}
}
}
case storage.Copy010:
for i := 0; i < count; i++ {
//randomly pick one server from the datacenter, and then choose from the a different rack
if ok, server1, vid := topo.RandomlyReserveOneVolume(dataCenter); ok {
2012-09-30 17:20:33 +08:00
rack := server1.Parent()
dc := rack.Parent()
exclusion := make(map[string]topology.Node)
exclusion[rack.String()] = rack
newNodeList := topology.NewNodeList(dc.Children(), exclusion)
if newNodeList.FreeSpace() > 0 {
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), *vid); ok2 {
if err = vg.grow(topo, *vid, repType, server1, server2); err == nil {
counter++
}
}
}
}
}
case storage.Copy100:
for i := 0; i < count; i++ {
2012-09-03 16:50:04 +08:00
nl := topology.NewNodeList(topo.Children(), nil)
picked, ret := nl.RandomlyPickN(2, 1, dataCenter)
2012-09-03 16:50:04 +08:00
vid := topo.NextVolumeId()
if ret {
2012-09-09 07:25:44 +08:00
var servers []*topology.DataNode
2012-09-03 16:50:04 +08:00
for _, n := range picked {
if n.FreeSpace() > 0 {
if ok, server := n.ReserveOneVolume(rand.Intn(n.FreeSpace()), vid, ""); ok {
servers = append(servers, server)
}
2012-09-03 16:50:04 +08:00
}
}
if len(servers) == 2 {
2012-09-30 17:20:33 +08:00
if err = vg.grow(topo, vid, repType, servers...); err == nil {
counter++
}
2012-09-03 16:50:04 +08:00
}
} else {
return counter, fmt.Errorf("Failed to grown volume on data center %s and another data center", dataCenter)
2012-09-03 16:50:04 +08:00
}
}
2012-09-30 17:20:33 +08:00
case storage.Copy110:
for i := 0; i < count; i++ {
2012-09-03 16:50:04 +08:00
nl := topology.NewNodeList(topo.Children(), nil)
picked, ret := nl.RandomlyPickN(2, 2, dataCenter)
2012-09-03 16:50:04 +08:00
vid := topo.NextVolumeId()
if ret {
2012-09-09 07:25:44 +08:00
var servers []*topology.DataNode
2012-09-30 17:20:33 +08:00
dc1, dc2 := picked[0], picked[1]
if dc2.FreeSpace() > dc1.FreeSpace() {
dc1, dc2 = dc2, dc1
}
if dc1.FreeSpace() > 0 {
if ok, server1 := dc1.ReserveOneVolume(rand.Intn(dc1.FreeSpace()), vid, ""); ok {
2012-09-30 17:20:33 +08:00
servers = append(servers, server1)
rack := server1.Parent()
exclusion := make(map[string]topology.Node)
exclusion[rack.String()] = rack
newNodeList := topology.NewNodeList(dc1.Children(), exclusion)
if newNodeList.FreeSpace() > 0 {
if ok2, server2 := newNodeList.ReserveOneVolume(rand.Intn(newNodeList.FreeSpace()), vid); ok2 {
servers = append(servers, server2)
}
}
2012-09-03 16:50:04 +08:00
}
}
2012-09-30 17:20:33 +08:00
if dc2.FreeSpace() > 0 {
if ok, server := dc2.ReserveOneVolume(rand.Intn(dc2.FreeSpace()), vid, ""); ok {
2012-09-30 17:20:33 +08:00
servers = append(servers, server)
}
}
2012-09-03 16:50:04 +08:00
if len(servers) == 3 {
2012-09-30 17:20:33 +08:00
if err = vg.grow(topo, vid, repType, servers...); err == nil {
counter++
}
2012-09-03 16:50:04 +08:00
}
}
}
2012-09-30 17:20:33 +08:00
case storage.Copy200:
for i := 0; i < count; i++ {
2012-09-30 17:20:33 +08:00
nl := topology.NewNodeList(topo.Children(), nil)
picked, ret := nl.RandomlyPickN(3, 1, dataCenter)
2012-09-30 17:20:33 +08:00
vid := topo.NextVolumeId()
if ret {
var servers []*topology.DataNode
for _, n := range picked {
if n.FreeSpace() > 0 {
if ok, server := n.ReserveOneVolume(rand.Intn(n.FreeSpace()), vid, ""); ok {
2012-09-30 17:20:33 +08:00
servers = append(servers, server)
}
}
2012-09-03 16:50:04 +08:00
}
2012-09-30 17:20:33 +08:00
if len(servers) == 3 {
if err = vg.grow(topo, vid, repType, servers...); err == nil {
counter++
}
}
2012-09-03 16:50:04 +08:00
}
}
}
return
}
func (vg *VolumeGrowth) grow(topo *topology.Topology, vid storage.VolumeId, repType storage.ReplicationType, servers ...*topology.DataNode) error {
2012-09-03 16:50:04 +08:00
for _, server := range servers {
2012-09-21 09:02:56 +08:00
if err := operation.AllocateVolume(server, vid, repType); err == nil {
2013-01-17 16:56:56 +08:00
vi := storage.VolumeInfo{Id: vid, Size: 0, RepType: repType, Version: storage.CurrentVersion}
server.AddOrUpdateVolume(vi)
topo.RegisterVolumeLayout(&vi, server)
glog.V(0).Infoln("Created Volume", vid, "on", server)
} else {
glog.V(0).Infoln("Failed to assign", vid, "to", servers, "error", err)
2013-08-13 14:48:10 +08:00
return errors.New("Failed to assign " + vid.String() + ", " + err.Error())
}
2012-09-03 16:50:04 +08:00
}
return nil
2012-09-03 16:50:04 +08:00
}