2014-04-13 16:29:52 +08:00
|
|
|
package topology
|
2012-08-28 04:52:02 +08:00
|
|
|
|
|
|
|
import (
|
2014-03-14 03:13:39 +08:00
|
|
|
"fmt"
|
2013-02-27 14:54:22 +08:00
|
|
|
"math/rand"
|
2013-01-17 16:56:56 +08:00
|
|
|
"sync"
|
2014-10-27 02:34:55 +08:00
|
|
|
|
2016-06-03 09:09:14 +08:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/storage"
|
2012-08-28 04:52:02 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
/*
|
|
|
|
This package is created to resolve these replica placement issues:
|
|
|
|
1. growth factor for each replica level, e.g., add 10 volumes for 1 copy, 20 volumes for 2 copies, 30 volumes for 3 copies
|
|
|
|
2. in time of tight storage, how to reduce replica level
|
|
|
|
3. optimizing for hot data on faster disk, cold data on cheaper storage,
|
|
|
|
4. volume allocation for each bucket
|
|
|
|
*/
|
|
|
|
|
2014-04-13 16:29:52 +08:00
|
|
|
type VolumeGrowOption struct {
|
|
|
|
Collection string
|
|
|
|
ReplicaPlacement *storage.ReplicaPlacement
|
2014-09-21 03:38:59 +08:00
|
|
|
Ttl *storage.TTL
|
2014-04-13 16:29:52 +08:00
|
|
|
DataCenter string
|
|
|
|
Rack string
|
|
|
|
DataNode string
|
|
|
|
}
|
|
|
|
|
2012-08-28 04:52:02 +08:00
|
|
|
type VolumeGrowth struct {
|
2013-01-17 16:56:56 +08:00
|
|
|
accessLock sync.Mutex
|
2012-08-28 04:52:02 +08:00
|
|
|
}
|
|
|
|
|
2015-01-08 15:54:50 +08:00
|
|
|
func (o *VolumeGrowOption) String() string {
|
|
|
|
return fmt.Sprintf("Collection:%s, ReplicaPlacement:%v, Ttl:%v, DataCenter:%s, Rack:%s, DataNode:%s", o.Collection, o.ReplicaPlacement, o.Ttl, o.DataCenter, o.Rack, o.DataNode)
|
|
|
|
}
|
|
|
|
|
2012-09-17 08:31:15 +08:00
|
|
|
func NewDefaultVolumeGrowth() *VolumeGrowth {
|
2014-03-03 14:16:54 +08:00
|
|
|
return &VolumeGrowth{}
|
2012-09-17 08:31:15 +08:00
|
|
|
}
|
|
|
|
|
2014-03-03 14:16:54 +08:00
|
|
|
// one replication type may need rp.GetCopyCount() actual volumes
|
|
|
|
// given copyCount, how many logical volumes to create
|
|
|
|
func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) {
|
|
|
|
switch copyCount {
|
|
|
|
case 1:
|
|
|
|
count = 7
|
|
|
|
case 2:
|
|
|
|
count = 6
|
|
|
|
case 3:
|
|
|
|
count = 3
|
2013-07-25 01:31:51 +08:00
|
|
|
default:
|
2014-03-03 14:16:54 +08:00
|
|
|
count = 1
|
2012-09-17 08:31:15 +08:00
|
|
|
}
|
2014-03-03 14:16:54 +08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2014-04-13 16:29:52 +08:00
|
|
|
func (vg *VolumeGrowth) AutomaticGrowByType(option *VolumeGrowOption, topo *Topology) (count int, err error) {
|
|
|
|
count, err = vg.GrowByCountAndType(vg.findVolumeCount(option.ReplicaPlacement.GetCopyCount()), option, topo)
|
|
|
|
if count > 0 && count%option.ReplicaPlacement.GetCopyCount() == 0 {
|
2013-07-25 01:31:51 +08:00
|
|
|
return count, nil
|
|
|
|
}
|
|
|
|
return count, err
|
2012-09-17 08:31:15 +08:00
|
|
|
}
|
2014-04-13 16:29:52 +08:00
|
|
|
func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOption, topo *Topology) (counter int, err error) {
|
2013-01-17 16:56:56 +08:00
|
|
|
vg.accessLock.Lock()
|
|
|
|
defer vg.accessLock.Unlock()
|
2012-11-14 04:13:40 +08:00
|
|
|
|
2014-03-03 14:16:54 +08:00
|
|
|
for i := 0; i < targetCount; i++ {
|
2014-04-13 16:29:52 +08:00
|
|
|
if c, e := vg.findAndGrow(topo, option); e == nil {
|
2014-03-03 14:16:54 +08:00
|
|
|
counter += c
|
|
|
|
} else {
|
|
|
|
return counter, e
|
2012-09-30 17:20:33 +08:00
|
|
|
}
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2014-04-13 16:29:52 +08:00
|
|
|
func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (int, error) {
|
|
|
|
servers, e := vg.findEmptySlotsForOneVolume(topo, option)
|
2014-03-03 14:16:54 +08:00
|
|
|
if e != nil {
|
|
|
|
return 0, e
|
|
|
|
}
|
|
|
|
vid := topo.NextVolumeId()
|
2014-04-13 16:29:52 +08:00
|
|
|
err := vg.grow(topo, vid, option, servers...)
|
2014-03-03 14:16:54 +08:00
|
|
|
return len(servers), err
|
|
|
|
}
|
|
|
|
|
2014-04-13 18:06:58 +08:00
|
|
|
// 1. find the main data node
|
|
|
|
// 1.1 collect all data nodes that have 1 slots
|
|
|
|
// 2.2 collect all racks that have rp.SameRackCount+1
|
|
|
|
// 2.2 collect all data centers that have DiffRackCount+rp.SameRackCount+1
|
|
|
|
// 2. find rest data nodes
|
2014-04-13 16:29:52 +08:00
|
|
|
func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) {
|
2014-03-03 14:16:54 +08:00
|
|
|
//find main datacenter and other data centers
|
2014-04-13 16:29:52 +08:00
|
|
|
rp := option.ReplicaPlacement
|
|
|
|
mainDataCenter, otherDataCenters, dc_err := topo.RandomlyPickNodes(rp.DiffDataCenterCount+1, func(node Node) error {
|
|
|
|
if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) {
|
|
|
|
return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter)
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
2014-04-13 17:16:45 +08:00
|
|
|
if len(node.Children()) < rp.DiffRackCount+1 {
|
|
|
|
return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1)
|
|
|
|
}
|
2014-03-14 03:13:39 +08:00
|
|
|
if node.FreeSpace() < rp.DiffRackCount+rp.SameRackCount+1 {
|
|
|
|
return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1)
|
|
|
|
}
|
2014-04-13 18:06:58 +08:00
|
|
|
possibleRacksCount := 0
|
|
|
|
for _, rack := range node.Children() {
|
|
|
|
possibleDataNodesCount := 0
|
|
|
|
for _, n := range rack.Children() {
|
|
|
|
if n.FreeSpace() >= 1 {
|
|
|
|
possibleDataNodesCount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if possibleDataNodesCount >= rp.SameRackCount+1 {
|
|
|
|
possibleRacksCount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if possibleRacksCount < rp.DiffRackCount+1 {
|
|
|
|
return fmt.Errorf("Only has %d racks with more than %d free data nodes, not enough for %d.", possibleRacksCount, rp.SameRackCount+1, rp.DiffRackCount+1)
|
|
|
|
}
|
2014-03-14 03:13:39 +08:00
|
|
|
return nil
|
2014-03-03 14:16:54 +08:00
|
|
|
})
|
|
|
|
if dc_err != nil {
|
|
|
|
return nil, dc_err
|
|
|
|
}
|
|
|
|
|
|
|
|
//find main rack and other racks
|
2014-04-13 16:29:52 +08:00
|
|
|
mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).RandomlyPickNodes(rp.DiffRackCount+1, func(node Node) error {
|
|
|
|
if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) {
|
|
|
|
return fmt.Errorf("Not matching preferred rack:%s", option.Rack)
|
|
|
|
}
|
2014-04-13 17:26:22 +08:00
|
|
|
if node.FreeSpace() < rp.SameRackCount+1 {
|
|
|
|
return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1)
|
|
|
|
}
|
2014-04-13 17:16:45 +08:00
|
|
|
if len(node.Children()) < rp.SameRackCount+1 {
|
2014-04-13 18:06:58 +08:00
|
|
|
// a bit faster way to test free racks
|
2014-04-13 17:16:45 +08:00
|
|
|
return fmt.Errorf("Only has %d data nodes, not enough for %d.", len(node.Children()), rp.SameRackCount+1)
|
|
|
|
}
|
2014-04-13 18:06:58 +08:00
|
|
|
possibleDataNodesCount := 0
|
2014-04-13 17:26:22 +08:00
|
|
|
for _, n := range node.Children() {
|
|
|
|
if n.FreeSpace() >= 1 {
|
2014-04-13 18:06:58 +08:00
|
|
|
possibleDataNodesCount++
|
2014-04-13 17:26:22 +08:00
|
|
|
}
|
|
|
|
}
|
2014-04-13 18:06:58 +08:00
|
|
|
if possibleDataNodesCount < rp.SameRackCount+1 {
|
|
|
|
return fmt.Errorf("Only has %d data nodes with a slot, not enough for %d.", possibleDataNodesCount, rp.SameRackCount+1)
|
2014-03-14 03:13:39 +08:00
|
|
|
}
|
|
|
|
return nil
|
2014-03-03 14:16:54 +08:00
|
|
|
})
|
|
|
|
if rack_err != nil {
|
|
|
|
return nil, rack_err
|
|
|
|
}
|
|
|
|
|
|
|
|
//find main rack and other racks
|
2014-04-13 16:29:52 +08:00
|
|
|
mainServer, otherServers, server_err := mainRack.(*Rack).RandomlyPickNodes(rp.SameRackCount+1, func(node Node) error {
|
|
|
|
if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) {
|
|
|
|
return fmt.Errorf("Not matching preferred data node:%s", option.DataNode)
|
|
|
|
}
|
2014-03-14 03:13:39 +08:00
|
|
|
if node.FreeSpace() < 1 {
|
|
|
|
return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1)
|
|
|
|
}
|
|
|
|
return nil
|
2014-03-03 14:16:54 +08:00
|
|
|
})
|
|
|
|
if server_err != nil {
|
|
|
|
return nil, server_err
|
|
|
|
}
|
|
|
|
|
2014-04-13 16:29:52 +08:00
|
|
|
servers = append(servers, mainServer.(*DataNode))
|
2014-03-03 14:16:54 +08:00
|
|
|
for _, server := range otherServers {
|
2014-04-13 16:29:52 +08:00
|
|
|
servers = append(servers, server.(*DataNode))
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
for _, rack := range otherRacks {
|
|
|
|
r := rand.Intn(rack.FreeSpace())
|
|
|
|
if server, e := rack.ReserveOneVolume(r); e == nil {
|
|
|
|
servers = append(servers, server)
|
|
|
|
} else {
|
|
|
|
return servers, e
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
for _, datacenter := range otherDataCenters {
|
|
|
|
r := rand.Intn(datacenter.FreeSpace())
|
|
|
|
if server, e := datacenter.ReserveOneVolume(r); e == nil {
|
|
|
|
servers = append(servers, server)
|
|
|
|
} else {
|
|
|
|
return servers, e
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
2012-08-28 04:52:02 +08:00
|
|
|
}
|
2012-09-17 14:18:47 +08:00
|
|
|
return
|
2012-08-28 04:52:02 +08:00
|
|
|
}
|
2014-03-03 14:16:54 +08:00
|
|
|
|
2014-04-13 16:29:52 +08:00
|
|
|
func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error {
|
2012-09-03 16:50:04 +08:00
|
|
|
for _, server := range servers {
|
2014-09-21 03:38:59 +08:00
|
|
|
if err := AllocateVolume(server, vid, option); err == nil {
|
|
|
|
vi := storage.VolumeInfo{
|
|
|
|
Id: vid,
|
|
|
|
Size: 0,
|
|
|
|
Collection: option.Collection,
|
|
|
|
ReplicaPlacement: option.ReplicaPlacement,
|
|
|
|
Ttl: option.Ttl,
|
|
|
|
Version: storage.CurrentVersion,
|
|
|
|
}
|
2012-09-17 08:31:15 +08:00
|
|
|
server.AddOrUpdateVolume(vi)
|
2014-03-19 19:48:13 +08:00
|
|
|
topo.RegisterVolumeLayout(vi, server)
|
2015-01-19 09:03:38 +08:00
|
|
|
glog.V(0).Infoln("Created Volume", vid, "on", server.NodeImpl.String())
|
2012-09-17 08:31:15 +08:00
|
|
|
} else {
|
2015-02-26 13:29:12 +08:00
|
|
|
glog.V(0).Infoln("Failed to assign volume", vid, "to", servers, "error", err)
|
2015-01-14 09:04:41 +08:00
|
|
|
return fmt.Errorf("Failed to assign %d: %v", vid, err)
|
2012-09-17 08:31:15 +08:00
|
|
|
}
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
2012-09-17 14:18:47 +08:00
|
|
|
return nil
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|