2012-08-24 13:33:37 +08:00
package topology
import (
2014-10-27 02:34:55 +08:00
"errors"
2019-04-22 01:14:17 +08:00
"fmt"
2021-09-13 13:47:52 +08:00
"github.com/chrislusf/seaweedfs/weed/pb"
2021-02-16 18:47:02 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/types"
2014-10-27 02:34:55 +08:00
"math/rand"
2019-05-24 14:34:29 +08:00
"sync"
2020-08-10 16:37:47 +08:00
"time"
2014-10-27 02:34:55 +08:00
2015-05-04 03:37:49 +08:00
"github.com/chrislusf/raft"
2019-12-24 04:48:20 +08:00
2016-06-03 09:09:14 +08:00
"github.com/chrislusf/seaweedfs/weed/glog"
2018-07-11 17:01:33 +08:00
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
2016-06-03 09:09:14 +08:00
"github.com/chrislusf/seaweedfs/weed/sequence"
"github.com/chrislusf/seaweedfs/weed/storage"
2019-04-19 12:43:36 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/needle"
2019-12-24 04:48:20 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
2016-06-03 09:09:14 +08:00
"github.com/chrislusf/seaweedfs/weed/util"
2012-08-24 13:33:37 +08:00
)
type Topology struct {
2019-07-22 12:49:10 +08:00
vacuumLockCounter int64
2012-09-03 05:33:48 +08:00
NodeImpl
2012-09-10 15:18:07 +08:00
2019-05-24 14:34:29 +08:00
collectionMap * util . ConcurrentReadMap
ecShardMap map [ needle . VolumeId ] * EcShardLocations
ecShardMapLock sync . RWMutex
2012-09-10 15:18:07 +08:00
pulse int64
2020-04-02 03:18:40 +08:00
volumeSizeLimit uint64
replicationAsMin bool
2012-09-10 15:18:07 +08:00
2014-04-17 14:43:27 +08:00
Sequence sequence . Sequencer
2012-09-19 05:05:12 +08:00
2021-05-12 01:05:31 +08:00
chanFullVolumes chan storage . VolumeInfo
chanCrowdedVolumes chan storage . VolumeInfo
2012-09-24 17:01:53 +08:00
2021-06-22 13:56:07 +08:00
Configuration * Configuration
2014-03-16 14:03:49 +08:00
RaftServer raft . Server
2012-08-31 16:35:11 +08:00
}
2012-08-28 16:04:39 +08:00
2020-04-02 03:18:40 +08:00
func NewTopology ( id string , seq sequence . Sequencer , volumeSizeLimit uint64 , pulse int , replicationAsMin bool ) * Topology {
2012-09-03 05:33:48 +08:00
t := & Topology { }
t . id = NodeId ( id )
t . nodeType = "Topology"
2012-09-19 16:45:30 +08:00
t . NodeImpl . value = t
2021-02-16 18:47:02 +08:00
t . diskUsages = newDiskUsages ( )
2012-09-03 05:33:48 +08:00
t . children = make ( map [ NodeId ] Node )
2014-12-09 12:29:25 +08:00
t . collectionMap = util . NewConcurrentReadMap ( )
2019-05-24 14:34:29 +08:00
t . ecShardMap = make ( map [ needle . VolumeId ] * EcShardLocations )
2012-09-10 15:18:07 +08:00
t . pulse = int64 ( pulse )
t . volumeSizeLimit = volumeSizeLimit
2020-04-02 03:18:40 +08:00
t . replicationAsMin = replicationAsMin
2012-09-19 16:45:30 +08:00
2014-04-17 14:43:27 +08:00
t . Sequence = seq
2012-09-19 16:45:30 +08:00
2021-05-12 01:05:31 +08:00
t . chanFullVolumes = make ( chan storage . VolumeInfo )
t . chanCrowdedVolumes = make ( chan storage . VolumeInfo )
2012-09-19 16:45:30 +08:00
2017-02-13 13:58:44 +08:00
t . Configuration = & Configuration { }
2012-09-24 17:01:53 +08:00
2017-02-13 13:58:44 +08:00
return t
2012-09-03 05:33:48 +08:00
}
2012-09-17 08:31:15 +08:00
2014-03-16 14:03:49 +08:00
func ( t * Topology ) IsLeader ( ) bool {
2019-02-15 16:09:48 +08:00
if t . RaftServer != nil {
2020-01-10 16:37:44 +08:00
if t . RaftServer . State ( ) == raft . Leader {
return true
}
2021-06-22 13:56:07 +08:00
if leader , err := t . Leader ( ) ; err == nil {
2021-09-13 13:47:52 +08:00
if pb . ServerAddress ( t . RaftServer . Name ( ) ) == leader {
2021-06-22 13:56:07 +08:00
return true
}
}
2019-01-29 02:36:16 +08:00
}
2014-04-12 07:23:58 +08:00
return false
2014-03-16 14:03:49 +08:00
}
2021-09-13 13:47:52 +08:00
func ( t * Topology ) Leader ( ) ( pb . ServerAddress , error ) {
var l pb . ServerAddress
2020-08-11 11:42:27 +08:00
for count := 0 ; count < 3 ; count ++ {
2020-08-10 16:37:47 +08:00
if t . RaftServer != nil {
2021-09-13 13:47:52 +08:00
l = pb . ServerAddress ( t . RaftServer . Leader ( ) )
2020-08-10 16:37:47 +08:00
} else {
return "" , errors . New ( "Raft Server not ready yet!" )
}
if l != "" {
break
} else {
2020-08-11 11:42:27 +08:00
time . Sleep ( time . Duration ( 5 + count ) * time . Second )
2020-08-10 16:37:47 +08:00
}
2014-03-16 14:03:49 +08:00
}
2014-04-12 07:23:58 +08:00
return l , nil
2014-03-16 14:03:49 +08:00
}
2019-06-06 14:20:26 +08:00
func ( t * Topology ) Lookup ( collection string , vid needle . VolumeId ) ( dataNodes [ ] * DataNode ) {
2020-08-11 11:42:27 +08:00
// maybe an issue if lots of collections?
2013-11-12 18:21:22 +08:00
if collection == "" {
2016-05-31 03:30:26 +08:00
for _ , c := range t . collectionMap . Items ( ) {
2014-12-09 12:29:25 +08:00
if list := c . ( * Collection ) . Lookup ( vid ) ; list != nil {
2012-09-24 17:01:53 +08:00
return list
}
}
2013-11-12 18:21:22 +08:00
} else {
2016-05-31 03:30:26 +08:00
if c , ok := t . collectionMap . Find ( collection ) ; ok {
2014-12-09 12:29:25 +08:00
return c . ( * Collection ) . Lookup ( vid )
2013-11-12 18:21:22 +08:00
}
2012-09-24 17:01:53 +08:00
}
2019-06-06 14:20:26 +08:00
if locations , found := t . LookupEcShards ( vid ) ; found {
for _ , loc := range locations . Locations {
dataNodes = append ( dataNodes , loc ... )
}
return dataNodes
}
2012-09-24 17:01:53 +08:00
return nil
2012-09-24 06:45:26 +08:00
}
2019-04-19 12:43:36 +08:00
func ( t * Topology ) NextVolumeId ( ) ( needle . VolumeId , error ) {
2012-09-03 05:33:48 +08:00
vid := t . GetMaxVolumeId ( )
2014-03-16 14:03:49 +08:00
next := vid . Next ( )
2019-02-25 10:47:41 +08:00
if _ , err := t . RaftServer . Do ( NewMaxVolumeIdCommand ( next ) ) ; err != nil {
return 0 , err
}
return next , nil
2012-08-28 16:04:39 +08:00
}
2012-09-10 15:18:07 +08:00
2021-05-06 18:46:14 +08:00
// deprecated
2014-10-26 14:45:31 +08:00
func ( t * Topology ) HasWritableVolume ( option * VolumeGrowOption ) bool {
2020-12-14 03:59:32 +08:00
vl := t . GetVolumeLayout ( option . Collection , option . ReplicaPlacement , option . Ttl , option . DiskType )
2021-05-06 18:46:14 +08:00
active , _ := vl . GetActiveVolumeCount ( option )
return active > 0
2014-04-13 16:29:52 +08:00
}
2021-09-06 14:17:15 +08:00
func ( t * Topology ) PickForWrite ( count uint64 , option * VolumeGrowOption ) ( string , uint64 , * VolumeLocationList , error ) {
2020-12-14 03:59:32 +08:00
vid , count , datanodes , err := t . GetVolumeLayout ( option . Collection , option . ReplicaPlacement , option . Ttl , option . DiskType ) . PickForWrite ( count , option )
2019-04-22 01:14:17 +08:00
if err != nil {
2019-10-29 21:28:28 +08:00
return "" , 0 , nil , fmt . Errorf ( "failed to find writable volumes for collection:%s replication:%s ttl:%s error: %v" , option . Collection , option . ReplicaPlacement . String ( ) , option . Ttl . String ( ) , err )
2019-04-22 01:14:17 +08:00
}
if datanodes . Length ( ) == 0 {
2019-10-29 21:28:28 +08:00
return "" , 0 , nil , fmt . Errorf ( "no writable volumes available for collection:%s replication:%s ttl:%s" , option . Collection , option . ReplicaPlacement . String ( ) , option . Ttl . String ( ) )
2012-09-17 08:31:15 +08:00
}
2019-10-30 15:49:58 +08:00
fileId := t . Sequence . NextFileId ( count )
2021-09-06 14:17:15 +08:00
return needle . NewFileId ( * vid , fileId , rand . Uint32 ( ) ) . String ( ) , count , datanodes , nil
2012-09-17 08:31:15 +08:00
}
2021-02-16 18:47:02 +08:00
func ( t * Topology ) GetVolumeLayout ( collectionName string , rp * super_block . ReplicaPlacement , ttl * needle . TTL , diskType types . DiskType ) * VolumeLayout {
2014-12-09 12:29:25 +08:00
return t . collectionMap . Get ( collectionName , func ( ) interface { } {
2020-04-02 03:18:40 +08:00
return NewCollection ( collectionName , t . volumeSizeLimit , t . replicationAsMin )
2020-12-14 03:59:32 +08:00
} ) . ( * Collection ) . GetOrCreateVolumeLayout ( rp , ttl , diskType )
2012-09-14 16:17:13 +08:00
}
2019-05-31 00:27:23 +08:00
func ( t * Topology ) ListCollections ( includeNormalVolumes , includeEcVolumes bool ) ( ret [ ] string ) {
2019-05-31 00:17:58 +08:00
mapOfCollections := make ( map [ string ] bool )
2019-03-17 04:43:16 +08:00
for _ , c := range t . collectionMap . Items ( ) {
2019-05-31 00:17:58 +08:00
mapOfCollections [ c . ( * Collection ) . Name ] = true
}
2019-05-31 00:27:23 +08:00
if includeEcVolumes {
t . ecShardMapLock . RLock ( )
for _ , ecVolumeLocation := range t . ecShardMap {
mapOfCollections [ ecVolumeLocation . Collection ] = true
}
t . ecShardMapLock . RUnlock ( )
2019-05-31 00:17:58 +08:00
}
2020-01-10 16:37:44 +08:00
for k := range mapOfCollections {
2019-05-31 00:17:58 +08:00
ret = append ( ret , k )
2019-03-17 04:43:16 +08:00
}
return ret
}
2016-05-31 03:30:26 +08:00
func ( t * Topology ) FindCollection ( collectionName string ) ( * Collection , bool ) {
c , hasCollection := t . collectionMap . Find ( collectionName )
2017-07-14 20:04:33 +08:00
if ! hasCollection {
return nil , false
}
2014-12-09 12:29:25 +08:00
return c . ( * Collection ) , hasCollection
2014-03-11 02:43:54 +08:00
}
func ( t * Topology ) DeleteCollection ( collectionName string ) {
2016-05-31 03:30:26 +08:00
t . collectionMap . Delete ( collectionName )
2014-03-11 02:43:54 +08:00
}
2021-02-16 18:47:02 +08:00
func ( t * Topology ) DeleteLayout ( collectionName string , rp * super_block . ReplicaPlacement , ttl * needle . TTL , diskType types . DiskType ) {
2020-12-13 19:11:24 +08:00
collection , found := t . FindCollection ( collectionName )
if ! found {
return
}
2020-12-14 03:59:32 +08:00
collection . DeleteVolumeLayout ( rp , ttl , diskType )
2020-12-13 20:14:50 +08:00
if len ( collection . storageType2VolumeLayout . Items ( ) ) == 0 {
t . DeleteCollection ( collectionName )
}
2020-12-13 19:11:24 +08:00
}
2014-03-19 19:48:13 +08:00
func ( t * Topology ) RegisterVolumeLayout ( v storage . VolumeInfo , dn * DataNode ) {
2021-02-16 18:47:02 +08:00
diskType := types . ToDiskType ( v . DiskType )
2020-12-14 03:59:32 +08:00
vl := t . GetVolumeLayout ( v . Collection , v . ReplicaPlacement , v . Ttl , diskType )
2020-11-23 09:15:59 +08:00
vl . RegisterVolume ( & v , dn )
vl . EnsureCorrectWritables ( & v )
2014-09-21 03:38:59 +08:00
}
func ( t * Topology ) UnRegisterVolumeLayout ( v storage . VolumeInfo , dn * DataNode ) {
2020-12-13 16:58:58 +08:00
glog . Infof ( "removing volume info: %+v" , v )
2021-02-16 18:47:02 +08:00
diskType := types . ToDiskType ( v . DiskType )
2020-12-14 03:59:32 +08:00
volumeLayout := t . GetVolumeLayout ( v . Collection , v . ReplicaPlacement , v . Ttl , diskType )
2018-07-11 17:01:33 +08:00
volumeLayout . UnRegisterVolume ( & v , dn )
if volumeLayout . isEmpty ( ) {
2020-12-14 03:59:32 +08:00
t . DeleteLayout ( v . Collection , v . ReplicaPlacement , v . Ttl , diskType )
2018-07-11 17:01:33 +08:00
}
2012-09-17 08:31:15 +08:00
}
2012-09-23 11:46:31 +08:00
func ( t * Topology ) GetOrCreateDataCenter ( dcName string ) * DataCenter {
2012-09-17 08:31:15 +08:00
for _ , c := range t . Children ( ) {
dc := c . ( * DataCenter )
2012-09-23 11:46:31 +08:00
if string ( dc . Id ( ) ) == dcName {
2012-09-17 08:31:15 +08:00
return dc
}
}
2012-09-23 11:46:31 +08:00
dc := NewDataCenter ( dcName )
2012-09-17 08:31:15 +08:00
t . LinkChildNode ( dc )
return dc
}
2018-06-25 15:01:53 +08:00
2018-07-28 14:09:55 +08:00
func ( t * Topology ) SyncDataNodeRegistration ( volumes [ ] * master_pb . VolumeInformationMessage , dn * DataNode ) ( newVolumes , deletedVolumes [ ] storage . VolumeInfo ) {
2019-05-23 15:04:24 +08:00
// convert into in memory struct storage.VolumeInfo
2018-06-25 15:01:53 +08:00
var volumeInfos [ ] storage . VolumeInfo
for _ , v := range volumes {
if vi , err := storage . NewVolumeInfo ( v ) ; err == nil {
volumeInfos = append ( volumeInfos , vi )
} else {
glog . V ( 0 ) . Infof ( "Fail to convert joined volume information: %v" , err )
}
}
2019-05-23 15:04:24 +08:00
// find out the delta volumes
2020-06-05 23:18:15 +08:00
var changedVolumes [ ] storage . VolumeInfo
newVolumes , deletedVolumes , changedVolumes = dn . UpdateVolumes ( volumeInfos )
2019-04-21 02:35:20 +08:00
for _ , v := range newVolumes {
2018-06-25 15:01:53 +08:00
t . RegisterVolumeLayout ( v , dn )
}
for _ , v := range deletedVolumes {
t . UnRegisterVolumeLayout ( v , dn )
}
2020-06-05 23:18:15 +08:00
for _ , v := range changedVolumes {
2021-02-16 18:47:02 +08:00
diskType := types . ToDiskType ( v . DiskType )
2020-12-14 03:59:32 +08:00
vl := t . GetVolumeLayout ( v . Collection , v . ReplicaPlacement , v . Ttl , diskType )
adding locking to avoid nil VolumeLocationList
fix panic: runtime error: invalid memory address or nil pointer dereference
Oct 22 00:53:44 bedb-master1 weed[8055]: [signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x17658da]
Oct 22 00:53:44 bedb-master1 weed[8055]: goroutine 310 [running]:
Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*VolumeLocationList).Length(...)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/volume_location_list.go:35
Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*VolumeLayout).enoughCopies(...)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/volume_layout.go:376
Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*VolumeLayout).ensureCorrectWritables(0xc000111d50, 0xc000b55438)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/volume_layout.go:202 +0x5a
Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*Topology).SyncDataNodeRegistration(0xc00042ac60, 0xc001454d30, 0x1, 0x1, 0xc0005fc000, 0xc00135de40, 0x4, 0xc00135de50, 0x10, 0x10d, ...)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/topology.go:224 +0x616
Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/server.(*MasterServer).SendHeartbeat(0xc000162700, 0x23b97c0, 0xc000ae2c90, 0x0, 0x0)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/server/master_grpc_server.go:106 +0x325
Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/pb/master_pb._Seaweed_SendHeartbeat_Handler(0x1f8e7c0, 0xc000162700, 0x23b0a60, 0xc00024b440, 0x3172c38, 0xc000ab7100)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/pb/master_pb/master.pb.go:4250 +0xad
Oct 22 00:53:44 bedb-master1 weed[8055]: google.golang.org/grpc.(*Server).processStreamingRPC(0xc0001f31e0, 0x23bb800, 0xc000ac5500, 0xc000ab7100, 0xc0001fea80, 0x311fec0, 0x0, 0x0, 0x0)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:1329 +0xcd8
Oct 22 00:53:44 bedb-master1 weed[8055]: google.golang.org/grpc.(*Server).handleStream(0xc0001f31e0, 0x23bb800, 0xc000ac5500, 0xc000ab7100, 0x0)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:1409 +0xc5c
Oct 22 00:53:44 bedb-master1 weed[8055]: google.golang.org/grpc.(*Server).serveStreams.func1.1(0xc0001ce8b0, 0xc0001f31e0, 0x23bb800, 0xc000ac5500, 0xc000ab7100)
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:746 +0xa5
Oct 22 00:53:44 bedb-master1 weed[8055]: created by google.golang.org/grpc.(*Server).serveStreams.func1
Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:744 +0xa5
Oct 22 00:53:44 bedb-master1 systemd[1]: weedmaster.service: Main process exited, code=exited, status=2/INVALIDARGUMENT
Oct 22 00:53:44 bedb-master1 systemd[1]: weedmaster.service: Failed with result 'exit-code'.
2020-10-22 14:15:48 +08:00
vl . EnsureCorrectWritables ( & v )
2020-06-05 23:18:15 +08:00
}
2018-07-28 14:09:55 +08:00
return
2018-06-25 15:01:53 +08:00
}
2019-04-21 02:35:20 +08:00
func ( t * Topology ) IncrementalSyncDataNodeRegistration ( newVolumes , deletedVolumes [ ] * master_pb . VolumeShortInformationMessage , dn * DataNode ) {
var newVis , oldVis [ ] storage . VolumeInfo
for _ , v := range newVolumes {
vi , err := storage . NewVolumeInfoFromShort ( v )
if err != nil {
glog . V ( 0 ) . Infof ( "NewVolumeInfoFromShort %v: %v" , v , err )
continue
}
newVis = append ( newVis , vi )
}
for _ , v := range deletedVolumes {
vi , err := storage . NewVolumeInfoFromShort ( v )
if err != nil {
glog . V ( 0 ) . Infof ( "NewVolumeInfoFromShort %v: %v" , v , err )
continue
}
oldVis = append ( oldVis , vi )
}
dn . DeltaUpdateVolumes ( newVis , oldVis )
2019-04-21 14:53:37 +08:00
for _ , vi := range newVis {
t . RegisterVolumeLayout ( vi , dn )
}
for _ , vi := range oldVis {
t . UnRegisterVolumeLayout ( vi , dn )
}
2019-04-21 02:35:20 +08:00
return
}