2020-09-10 02:21:23 +08:00
package command
import (
"context"
"errors"
"fmt"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/replication"
2021-03-01 08:22:27 +08:00
"github.com/chrislusf/seaweedfs/weed/replication/sink"
2020-09-10 02:21:23 +08:00
"github.com/chrislusf/seaweedfs/weed/replication/sink/filersink"
"github.com/chrislusf/seaweedfs/weed/replication/source"
"github.com/chrislusf/seaweedfs/weed/security"
2022-06-15 11:33:18 +08:00
statsCollect "github.com/chrislusf/seaweedfs/weed/stats"
2020-09-10 02:21:23 +08:00
"github.com/chrislusf/seaweedfs/weed/util"
2020-09-10 02:33:52 +08:00
"github.com/chrislusf/seaweedfs/weed/util/grace"
2020-09-10 02:21:23 +08:00
"google.golang.org/grpc"
2022-06-09 10:53:19 +08:00
"os"
2020-09-10 02:21:23 +08:00
"strings"
"time"
)
type SyncOptions struct {
isActivePassive * bool
filerA * string
filerB * string
aPath * string
bPath * string
aReplication * string
bReplication * string
aCollection * string
bCollection * string
aTtlSec * int
bTtlSec * int
2020-12-17 01:14:05 +08:00
aDiskType * string
bDiskType * string
2020-09-10 02:21:23 +08:00
aDebug * bool
bDebug * bool
2022-06-09 10:53:19 +08:00
aFromTsMs * int64
bFromTsMs * int64
2021-01-29 07:23:46 +08:00
aProxyByFiler * bool
bProxyByFiler * bool
2022-06-15 11:33:18 +08:00
metricsHttpPort * int
2021-12-30 16:23:57 +08:00
clientId int32
2022-07-24 01:50:28 +08:00
clientEpoch int32
2020-09-10 02:21:23 +08:00
}
var (
2020-09-12 19:08:03 +08:00
syncOptions SyncOptions
2020-09-10 02:33:52 +08:00
syncCpuProfile * string
syncMemProfile * string
2020-09-10 02:21:23 +08:00
)
func init ( ) {
cmdFilerSynchronize . Run = runFilerSynchronize // break init cycle
2021-01-24 16:01:44 +08:00
syncOptions . isActivePassive = cmdFilerSynchronize . Flag . Bool ( "isActivePassive" , false , "one directional follow from A to B if true" )
2020-09-10 02:21:23 +08:00
syncOptions . filerA = cmdFilerSynchronize . Flag . String ( "a" , "" , "filer A in one SeaweedFS cluster" )
syncOptions . filerB = cmdFilerSynchronize . Flag . String ( "b" , "" , "filer B in the other SeaweedFS cluster" )
syncOptions . aPath = cmdFilerSynchronize . Flag . String ( "a.path" , "/" , "directory to sync on filer A" )
syncOptions . bPath = cmdFilerSynchronize . Flag . String ( "b.path" , "/" , "directory to sync on filer B" )
syncOptions . aReplication = cmdFilerSynchronize . Flag . String ( "a.replication" , "" , "replication on filer A" )
syncOptions . bReplication = cmdFilerSynchronize . Flag . String ( "b.replication" , "" , "replication on filer B" )
syncOptions . aCollection = cmdFilerSynchronize . Flag . String ( "a.collection" , "" , "collection on filer A" )
syncOptions . bCollection = cmdFilerSynchronize . Flag . String ( "b.collection" , "" , "collection on filer B" )
syncOptions . aTtlSec = cmdFilerSynchronize . Flag . Int ( "a.ttlSec" , 0 , "ttl in seconds on filer A" )
syncOptions . bTtlSec = cmdFilerSynchronize . Flag . Int ( "b.ttlSec" , 0 , "ttl in seconds on filer B" )
2021-02-22 18:03:12 +08:00
syncOptions . aDiskType = cmdFilerSynchronize . Flag . String ( "a.disk" , "" , "[hdd|ssd|<tag>] hard drive or solid state drive or any tag on filer A" )
syncOptions . bDiskType = cmdFilerSynchronize . Flag . String ( "b.disk" , "" , "[hdd|ssd|<tag>] hard drive or solid state drive or any tag on filer B" )
2021-01-26 02:14:42 +08:00
syncOptions . aProxyByFiler = cmdFilerSynchronize . Flag . Bool ( "a.filerProxy" , false , "read and write file chunks by filer A instead of volume servers" )
syncOptions . bProxyByFiler = cmdFilerSynchronize . Flag . Bool ( "b.filerProxy" , false , "read and write file chunks by filer B instead of volume servers" )
2020-09-10 02:21:23 +08:00
syncOptions . aDebug = cmdFilerSynchronize . Flag . Bool ( "a.debug" , false , "debug mode to print out filer A received files" )
syncOptions . bDebug = cmdFilerSynchronize . Flag . Bool ( "b.debug" , false , "debug mode to print out filer B received files" )
2022-06-09 10:53:19 +08:00
syncOptions . aFromTsMs = cmdFilerSynchronize . Flag . Int64 ( "a.fromTsMs" , 0 , "synchronization from timestamp on filer A. The unit is millisecond" )
syncOptions . bFromTsMs = cmdFilerSynchronize . Flag . Int64 ( "b.fromTsMs" , 0 , "synchronization from timestamp on filer B. The unit is millisecond" )
2020-09-10 02:33:52 +08:00
syncCpuProfile = cmdFilerSynchronize . Flag . String ( "cpuprofile" , "" , "cpu profile output file" )
syncMemProfile = cmdFilerSynchronize . Flag . String ( "memprofile" , "" , "memory profile output file" )
2022-06-15 11:33:18 +08:00
syncOptions . metricsHttpPort = cmdFilerSynchronize . Flag . Int ( "metricsPort" , 0 , "metrics listen port" )
2021-12-30 16:23:57 +08:00
syncOptions . clientId = util . RandomInt32 ( )
2020-09-10 02:21:23 +08:00
}
var cmdFilerSynchronize = & Command {
UsageLine : "filer.sync -a=<oneFilerHost>:<oneFilerPort> -b=<otherFilerHost>:<otherFilerPort>" ,
2021-08-09 13:30:12 +08:00
Short : "resumable continuous synchronization between two active-active or active-passive SeaweedFS clusters" ,
Long : ` resumable continuous synchronization for file changes between two active - active or active - passive filers
2020-09-10 02:21:23 +08:00
filer . sync listens on filer notifications . If any file is updated , it will fetch the updated content ,
and write to the other destination . Different from filer . replicate :
* filer . sync only works between two filers .
* filer . sync does not need any special message queue setup .
* filer . sync supports both active - active and active - passive modes .
If restarted , the synchronization will resume from the previous checkpoints , persisted every minute .
2020-09-15 15:40:38 +08:00
A fresh sync will start from the earliest metadata logs .
2020-09-10 02:21:23 +08:00
` ,
}
func runFilerSynchronize ( cmd * Command , args [ ] string ) bool {
2021-08-03 16:30:35 +08:00
util . LoadConfiguration ( "security" , false )
2020-09-10 02:21:23 +08:00
grpcDialOption := security . LoadClientTLS ( util . GetViper ( ) , "grpc.client" )
2020-09-10 02:33:52 +08:00
grace . SetupProfiling ( * syncCpuProfile , * syncMemProfile )
2021-09-13 13:47:52 +08:00
filerA := pb . ServerAddress ( * syncOptions . filerA )
filerB := pb . ServerAddress ( * syncOptions . filerB )
2022-06-09 10:53:19 +08:00
2022-06-15 11:33:18 +08:00
// start filer.sync metrics server
go statsCollect . StartMetricsServer ( * syncOptions . metricsHttpPort )
2022-06-09 10:53:19 +08:00
// read a filer signature
aFilerSignature , aFilerErr := replication . ReadFilerSignature ( grpcDialOption , filerA )
if aFilerErr != nil {
glog . Errorf ( "get filer 'a' signature %d error from %s to %s: %v" , aFilerSignature , * syncOptions . filerA , * syncOptions . filerB , aFilerErr )
return true
}
// read b filer signature
bFilerSignature , bFilerErr := replication . ReadFilerSignature ( grpcDialOption , filerB )
if bFilerErr != nil {
glog . Errorf ( "get filer 'b' signature %d error from %s to %s: %v" , bFilerSignature , * syncOptions . filerA , * syncOptions . filerB , bFilerErr )
return true
}
2020-09-10 02:21:23 +08:00
go func ( ) {
2022-06-09 10:53:19 +08:00
// a->b
// set synchronization start timestamp to offset
initOffsetError := initOffsetFromTsMs ( grpcDialOption , filerB , aFilerSignature , * syncOptions . bFromTsMs )
if initOffsetError != nil {
glog . Errorf ( "init offset from timestamp %d error from %s to %s: %v" , * syncOptions . bFromTsMs , * syncOptions . filerA , * syncOptions . filerB , initOffsetError )
os . Exit ( 2 )
}
2020-09-10 02:21:23 +08:00
for {
2022-07-24 01:50:28 +08:00
syncOptions . clientEpoch ++
err := doSubscribeFilerMetaChanges ( syncOptions . clientId , syncOptions . clientEpoch , grpcDialOption , filerA , * syncOptions . aPath , * syncOptions . aProxyByFiler , filerB ,
2022-06-09 10:53:19 +08:00
* syncOptions . bPath , * syncOptions . bReplication , * syncOptions . bCollection , * syncOptions . bTtlSec , * syncOptions . bProxyByFiler , * syncOptions . bDiskType ,
* syncOptions . bDebug , aFilerSignature , bFilerSignature )
2020-09-10 02:21:23 +08:00
if err != nil {
glog . Errorf ( "sync from %s to %s: %v" , * syncOptions . filerA , * syncOptions . filerB , err )
time . Sleep ( 1747 * time . Millisecond )
}
}
} ( )
if ! * syncOptions . isActivePassive {
2022-06-09 10:53:19 +08:00
// b->a
// set synchronization start timestamp to offset
initOffsetError := initOffsetFromTsMs ( grpcDialOption , filerA , bFilerSignature , * syncOptions . aFromTsMs )
if initOffsetError != nil {
glog . Errorf ( "init offset from timestamp %d error from %s to %s: %v" , * syncOptions . aFromTsMs , * syncOptions . filerB , * syncOptions . filerA , initOffsetError )
os . Exit ( 2 )
}
2020-09-10 02:21:23 +08:00
go func ( ) {
for {
2022-07-24 01:50:28 +08:00
syncOptions . clientEpoch ++
err := doSubscribeFilerMetaChanges ( syncOptions . clientId , syncOptions . clientEpoch , grpcDialOption , filerB , * syncOptions . bPath , * syncOptions . bProxyByFiler , filerA ,
2022-06-09 10:53:19 +08:00
* syncOptions . aPath , * syncOptions . aReplication , * syncOptions . aCollection , * syncOptions . aTtlSec , * syncOptions . aProxyByFiler , * syncOptions . aDiskType ,
* syncOptions . aDebug , bFilerSignature , aFilerSignature )
2020-09-10 02:21:23 +08:00
if err != nil {
glog . Errorf ( "sync from %s to %s: %v" , * syncOptions . filerB , * syncOptions . filerA , err )
time . Sleep ( 2147 * time . Millisecond )
}
}
} ( )
}
select { }
return true
}
2022-06-09 10:53:19 +08:00
// initOffsetFromTsMs Initialize offset
func initOffsetFromTsMs ( grpcDialOption grpc . DialOption , targetFiler pb . ServerAddress , sourceFilerSignature int32 , fromTsMs int64 ) error {
if fromTsMs <= 0 {
return nil
2020-09-10 02:21:23 +08:00
}
2022-06-09 10:53:19 +08:00
// convert to nanosecond
fromTsNs := fromTsMs * 1000_000
// If not successful, exit the program.
setOffsetErr := setOffset ( grpcDialOption , targetFiler , SyncKeyPrefix , sourceFilerSignature , fromTsNs )
if setOffsetErr != nil {
return setOffsetErr
2020-09-10 02:21:23 +08:00
}
2022-06-09 10:53:19 +08:00
glog . Infof ( "setOffset from timestamp ms success! start offset: %d from %s to %s" , fromTsNs , * syncOptions . filerA , * syncOptions . filerB )
return nil
}
2022-07-24 01:50:28 +08:00
func doSubscribeFilerMetaChanges ( clientId int32 , clientEpoch int32 , grpcDialOption grpc . DialOption , sourceFiler pb . ServerAddress , sourcePath string , sourceReadChunkFromFiler bool , targetFiler pb . ServerAddress , targetPath string ,
2022-06-09 10:53:19 +08:00
replicationStr , collection string , ttlSec int , sinkWriteChunkByFiler bool , diskType string , debug bool , sourceFilerSignature int32 , targetFilerSignature int32 ) error {
2020-09-10 02:21:23 +08:00
// if first time, start from now
// if has previously synced, resume from that point of time
2022-06-14 19:46:02 +08:00
sourceFilerOffsetTsNs , err := getOffset ( grpcDialOption , targetFiler , getSignaturePrefixByPath ( sourcePath ) , sourceFilerSignature )
2020-09-10 02:21:23 +08:00
if err != nil {
return err
}
glog . V ( 0 ) . Infof ( "start sync %s(%d) => %s(%d) from %v(%d)" , sourceFiler , sourceFilerSignature , targetFiler , targetFilerSignature , time . Unix ( 0 , sourceFilerOffsetTsNs ) , sourceFilerOffsetTsNs )
// create filer sink
filerSource := & source . FilerSource { }
2021-09-13 13:47:52 +08:00
filerSource . DoInitialize ( sourceFiler . ToHttpAddress ( ) , sourceFiler . ToGrpcAddress ( ) , sourcePath , sourceReadChunkFromFiler )
2020-09-10 02:21:23 +08:00
filerSink := & filersink . FilerSink { }
2021-09-13 13:47:52 +08:00
filerSink . DoInitialize ( targetFiler . ToHttpAddress ( ) , targetFiler . ToGrpcAddress ( ) , targetPath , replicationStr , collection , ttlSec , diskType , grpcDialOption , sinkWriteChunkByFiler )
2020-09-10 02:21:23 +08:00
filerSink . SetSourceFiler ( filerSource )
2021-03-01 08:22:27 +08:00
persistEventFn := genProcessFunction ( sourcePath , targetPath , filerSink , debug )
2020-09-10 02:21:23 +08:00
processEventFn := func ( resp * filer_pb . SubscribeMetadataResponse ) error {
message := resp . EventNotification
for _ , sig := range message . Signatures {
if sig == targetFilerSignature && targetFilerSignature != 0 {
fmt . Printf ( "%s skipping %s change to %v\n" , targetFiler , sourceFiler , message )
return nil
}
}
2021-03-01 08:22:27 +08:00
return persistEventFn ( resp )
2020-09-10 02:21:23 +08:00
}
2022-06-28 03:40:47 +08:00
var lastLogTsNs = time . Now ( ) . UnixNano ( )
2022-06-15 13:33:20 +08:00
var clientName = fmt . Sprintf ( "syncFrom_%s_To_%s" , string ( sourceFiler ) , string ( targetFiler ) )
2021-08-09 13:30:36 +08:00
processEventFnWithOffset := pb . AddOffsetFunc ( processEventFn , 3 * time . Second , func ( counter int64 , lastTsNs int64 ) error {
2022-06-28 03:40:47 +08:00
now := time . Now ( ) . UnixNano ( )
2022-01-10 17:00:11 +08:00
glog . V ( 0 ) . Infof ( "sync %s to %s progressed to %v %0.2f/sec" , sourceFiler , targetFiler , time . Unix ( 0 , lastTsNs ) , float64 ( counter ) / ( float64 ( now - lastLogTsNs ) / 1e9 ) )
lastLogTsNs = now
2022-06-15 11:33:18 +08:00
// collect synchronous offset
statsCollect . FilerSyncOffsetGauge . WithLabelValues ( sourceFiler . String ( ) , targetFiler . String ( ) , clientName , sourcePath ) . Set ( float64 ( lastTsNs ) )
2022-06-14 19:46:02 +08:00
return setOffset ( grpcDialOption , targetFiler , getSignaturePrefixByPath ( sourcePath ) , sourceFilerSignature , lastTsNs )
2020-09-10 02:21:23 +08:00
} )
2022-07-24 01:50:28 +08:00
return pb . FollowMetadata ( sourceFiler , grpcDialOption , clientName , clientId , clientEpoch ,
2022-06-02 01:28:47 +08:00
sourcePath , nil , sourceFilerOffsetTsNs , 0 , targetFilerSignature , processEventFnWithOffset , pb . RetryForeverOnError )
2021-08-05 07:25:46 +08:00
2020-09-10 02:21:23 +08:00
}
const (
SyncKeyPrefix = "sync."
)
2022-06-14 19:46:02 +08:00
// When each business is distinguished according to path, and offsets need to be maintained separately.
func getSignaturePrefixByPath ( path string ) string {
// compatible historical version
if path == "/" {
return SyncKeyPrefix
} else {
return SyncKeyPrefix + path
}
}
2021-09-13 13:47:52 +08:00
func getOffset ( grpcDialOption grpc . DialOption , filer pb . ServerAddress , signaturePrefix string , signature int32 ) ( lastOffsetTsNs int64 , readErr error ) {
2020-09-10 02:21:23 +08:00
2021-12-26 16:15:03 +08:00
readErr = pb . WithFilerClient ( false , filer , grpcDialOption , func ( client filer_pb . SeaweedFilerClient ) error {
2021-03-01 08:22:27 +08:00
syncKey := [ ] byte ( signaturePrefix + "____" )
util . Uint32toBytes ( syncKey [ len ( signaturePrefix ) : len ( signaturePrefix ) + 4 ] , uint32 ( signature ) )
2020-09-10 02:21:23 +08:00
resp , err := client . KvGet ( context . Background ( ) , & filer_pb . KvGetRequest { Key : syncKey } )
if err != nil {
return err
}
if len ( resp . Error ) != 0 {
return errors . New ( resp . Error )
}
if len ( resp . Value ) < 8 {
return nil
}
lastOffsetTsNs = int64 ( util . BytesToUint64 ( resp . Value ) )
return nil
} )
return
}
2021-09-13 13:47:52 +08:00
func setOffset ( grpcDialOption grpc . DialOption , filer pb . ServerAddress , signaturePrefix string , signature int32 , offsetTsNs int64 ) error {
2021-12-26 16:15:03 +08:00
return pb . WithFilerClient ( false , filer , grpcDialOption , func ( client filer_pb . SeaweedFilerClient ) error {
2020-09-10 02:21:23 +08:00
2021-03-01 08:22:27 +08:00
syncKey := [ ] byte ( signaturePrefix + "____" )
util . Uint32toBytes ( syncKey [ len ( signaturePrefix ) : len ( signaturePrefix ) + 4 ] , uint32 ( signature ) )
2020-09-10 02:21:23 +08:00
valueBuf := make ( [ ] byte , 8 )
util . Uint64toBytes ( valueBuf , uint64 ( offsetTsNs ) )
resp , err := client . KvPut ( context . Background ( ) , & filer_pb . KvPutRequest {
Key : syncKey ,
Value : valueBuf ,
} )
if err != nil {
return err
}
if len ( resp . Error ) != 0 {
return errors . New ( resp . Error )
}
return nil
} )
}
2021-03-01 08:22:27 +08:00
func genProcessFunction ( sourcePath string , targetPath string , dataSink sink . ReplicationSink , debug bool ) func ( resp * filer_pb . SubscribeMetadataResponse ) error {
// process function
processEventFn := func ( resp * filer_pb . SubscribeMetadataResponse ) error {
message := resp . EventNotification
var sourceOldKey , sourceNewKey util . FullPath
if message . OldEntry != nil {
sourceOldKey = util . FullPath ( resp . Directory ) . Child ( message . OldEntry . Name )
}
if message . NewEntry != nil {
sourceNewKey = util . FullPath ( message . NewParentPath ) . Child ( message . NewEntry . Name )
}
if debug {
glog . V ( 0 ) . Infof ( "received %v" , resp )
}
if ! strings . HasPrefix ( resp . Directory , sourcePath ) {
return nil
}
// handle deletions
2022-02-25 17:17:26 +08:00
if filer_pb . IsDelete ( resp ) {
2021-03-01 08:22:27 +08:00
if ! strings . HasPrefix ( string ( sourceOldKey ) , sourcePath ) {
return nil
}
key := buildKey ( dataSink , message , targetPath , sourceOldKey , sourcePath )
2022-04-15 04:35:01 +08:00
if ! dataSink . IsIncremental ( ) {
return dataSink . DeleteEntry ( key , message . OldEntry . IsDirectory , message . DeleteChunks , message . Signatures )
}
return nil
2021-03-01 08:22:27 +08:00
}
// handle new entries
2022-02-25 17:17:26 +08:00
if filer_pb . IsCreate ( resp ) {
2021-03-01 08:22:27 +08:00
if ! strings . HasPrefix ( string ( sourceNewKey ) , sourcePath ) {
return nil
}
key := buildKey ( dataSink , message , targetPath , sourceNewKey , sourcePath )
return dataSink . CreateEntry ( key , message . NewEntry , message . Signatures )
}
// this is something special?
2022-02-25 17:17:26 +08:00
if filer_pb . IsEmpty ( resp ) {
2021-03-01 08:22:27 +08:00
return nil
}
// handle updates
if strings . HasPrefix ( string ( sourceOldKey ) , sourcePath ) {
// old key is in the watched directory
if strings . HasPrefix ( string ( sourceNewKey ) , sourcePath ) {
// new key is also in the watched directory
if ! dataSink . IsIncremental ( ) {
oldKey := util . Join ( targetPath , string ( sourceOldKey ) [ len ( sourcePath ) : ] )
message . NewParentPath = util . Join ( targetPath , message . NewParentPath [ len ( sourcePath ) : ] )
foundExisting , err := dataSink . UpdateEntry ( string ( oldKey ) , message . OldEntry , message . NewParentPath , message . NewEntry , message . DeleteChunks , message . Signatures )
if foundExisting {
return err
}
// not able to find old entry
if err = dataSink . DeleteEntry ( string ( oldKey ) , message . OldEntry . IsDirectory , false , message . Signatures ) ; err != nil {
return fmt . Errorf ( "delete old entry %v: %v" , oldKey , err )
}
}
// create the new entry
newKey := buildKey ( dataSink , message , targetPath , sourceNewKey , sourcePath )
return dataSink . CreateEntry ( newKey , message . NewEntry , message . Signatures )
} else {
// new key is outside of the watched directory
if ! dataSink . IsIncremental ( ) {
key := buildKey ( dataSink , message , targetPath , sourceOldKey , sourcePath )
return dataSink . DeleteEntry ( key , message . OldEntry . IsDirectory , message . DeleteChunks , message . Signatures )
}
}
} else {
// old key is outside of the watched directory
if strings . HasPrefix ( string ( sourceNewKey ) , sourcePath ) {
// new key is in the watched directory
key := buildKey ( dataSink , message , targetPath , sourceNewKey , sourcePath )
return dataSink . CreateEntry ( key , message . NewEntry , message . Signatures )
} else {
// new key is also outside of the watched directory
// skip
}
}
return nil
}
return processEventFn
}
2021-05-29 21:45:23 +08:00
func buildKey ( dataSink sink . ReplicationSink , message * filer_pb . EventNotification , targetPath string , sourceKey util . FullPath , sourcePath string ) ( key string ) {
2021-03-01 08:22:27 +08:00
if ! dataSink . IsIncremental ( ) {
2021-05-29 21:45:23 +08:00
key = util . Join ( targetPath , string ( sourceKey ) [ len ( sourcePath ) : ] )
} else {
var mTime int64
if message . NewEntry != nil {
mTime = message . NewEntry . Attributes . Mtime
} else if message . OldEntry != nil {
mTime = message . OldEntry . Attributes . Mtime
}
dateKey := time . Unix ( mTime , 0 ) . Format ( "2006-01-02" )
key = util . Join ( targetPath , dateKey , string ( sourceKey ) [ len ( sourcePath ) : ] )
2021-03-01 08:22:27 +08:00
}
2021-05-29 21:45:23 +08:00
return escapeKey ( key )
2021-03-01 08:22:27 +08:00
}