2020-09-10 02:21:23 +08:00
package command
import (
"context"
"errors"
"fmt"
2022-07-29 15:17:28 +08:00
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/replication"
"github.com/seaweedfs/seaweedfs/weed/replication/sink"
"github.com/seaweedfs/seaweedfs/weed/replication/sink/filersink"
"github.com/seaweedfs/seaweedfs/weed/replication/source"
"github.com/seaweedfs/seaweedfs/weed/security"
statsCollect "github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/grace"
2020-09-10 02:21:23 +08:00
"google.golang.org/grpc"
2022-06-09 10:53:19 +08:00
"os"
2023-11-13 20:32:37 +08:00
"regexp"
2020-09-10 02:21:23 +08:00
"strings"
2024-01-13 02:57:18 +08:00
"sync/atomic"
2020-09-10 02:21:23 +08:00
"time"
)
type SyncOptions struct {
isActivePassive * bool
filerA * string
filerB * string
aPath * string
2022-07-27 22:22:57 +08:00
aExcludePaths * string
2020-09-10 02:21:23 +08:00
bPath * string
2022-07-27 22:22:57 +08:00
bExcludePaths * string
2020-09-10 02:21:23 +08:00
aReplication * string
bReplication * string
aCollection * string
bCollection * string
aTtlSec * int
bTtlSec * int
2020-12-17 01:14:05 +08:00
aDiskType * string
bDiskType * string
2020-09-10 02:21:23 +08:00
aDebug * bool
bDebug * bool
2022-06-09 10:53:19 +08:00
aFromTsMs * int64
bFromTsMs * int64
2021-01-29 07:23:46 +08:00
aProxyByFiler * bool
bProxyByFiler * bool
2022-11-25 02:22:59 +08:00
metricsHttpIp * string
2022-06-15 11:33:18 +08:00
metricsHttpPort * int
2022-09-03 14:03:23 +08:00
concurrency * int
2024-01-07 02:02:16 +08:00
aDoDeleteFiles * bool
bDoDeleteFiles * bool
2021-12-30 16:23:57 +08:00
clientId int32
2024-01-13 02:57:18 +08:00
clientEpoch atomic . Int32
2020-09-10 02:21:23 +08:00
}
2022-09-03 14:03:23 +08:00
const (
2022-09-15 14:06:44 +08:00
SyncKeyPrefix = "sync."
2022-09-14 21:30:32 +08:00
DefaultConcurrencyLimit = 32
2022-09-03 14:03:23 +08:00
)
2020-09-10 02:21:23 +08:00
var (
2020-09-12 19:08:03 +08:00
syncOptions SyncOptions
2020-09-10 02:33:52 +08:00
syncCpuProfile * string
syncMemProfile * string
2020-09-10 02:21:23 +08:00
)
func init ( ) {
cmdFilerSynchronize . Run = runFilerSynchronize // break init cycle
2021-01-24 16:01:44 +08:00
syncOptions . isActivePassive = cmdFilerSynchronize . Flag . Bool ( "isActivePassive" , false , "one directional follow from A to B if true" )
2020-09-10 02:21:23 +08:00
syncOptions . filerA = cmdFilerSynchronize . Flag . String ( "a" , "" , "filer A in one SeaweedFS cluster" )
syncOptions . filerB = cmdFilerSynchronize . Flag . String ( "b" , "" , "filer B in the other SeaweedFS cluster" )
syncOptions . aPath = cmdFilerSynchronize . Flag . String ( "a.path" , "/" , "directory to sync on filer A" )
2022-07-27 22:22:57 +08:00
syncOptions . aExcludePaths = cmdFilerSynchronize . Flag . String ( "a.excludePaths" , "" , "exclude directories to sync on filer A" )
2020-09-10 02:21:23 +08:00
syncOptions . bPath = cmdFilerSynchronize . Flag . String ( "b.path" , "/" , "directory to sync on filer B" )
2022-07-27 22:22:57 +08:00
syncOptions . bExcludePaths = cmdFilerSynchronize . Flag . String ( "b.excludePaths" , "" , "exclude directories to sync on filer B" )
2020-09-10 02:21:23 +08:00
syncOptions . aReplication = cmdFilerSynchronize . Flag . String ( "a.replication" , "" , "replication on filer A" )
syncOptions . bReplication = cmdFilerSynchronize . Flag . String ( "b.replication" , "" , "replication on filer B" )
syncOptions . aCollection = cmdFilerSynchronize . Flag . String ( "a.collection" , "" , "collection on filer A" )
syncOptions . bCollection = cmdFilerSynchronize . Flag . String ( "b.collection" , "" , "collection on filer B" )
syncOptions . aTtlSec = cmdFilerSynchronize . Flag . Int ( "a.ttlSec" , 0 , "ttl in seconds on filer A" )
syncOptions . bTtlSec = cmdFilerSynchronize . Flag . Int ( "b.ttlSec" , 0 , "ttl in seconds on filer B" )
2021-02-22 18:03:12 +08:00
syncOptions . aDiskType = cmdFilerSynchronize . Flag . String ( "a.disk" , "" , "[hdd|ssd|<tag>] hard drive or solid state drive or any tag on filer A" )
syncOptions . bDiskType = cmdFilerSynchronize . Flag . String ( "b.disk" , "" , "[hdd|ssd|<tag>] hard drive or solid state drive or any tag on filer B" )
2021-01-26 02:14:42 +08:00
syncOptions . aProxyByFiler = cmdFilerSynchronize . Flag . Bool ( "a.filerProxy" , false , "read and write file chunks by filer A instead of volume servers" )
syncOptions . bProxyByFiler = cmdFilerSynchronize . Flag . Bool ( "b.filerProxy" , false , "read and write file chunks by filer B instead of volume servers" )
2020-09-10 02:21:23 +08:00
syncOptions . aDebug = cmdFilerSynchronize . Flag . Bool ( "a.debug" , false , "debug mode to print out filer A received files" )
syncOptions . bDebug = cmdFilerSynchronize . Flag . Bool ( "b.debug" , false , "debug mode to print out filer B received files" )
2022-06-09 10:53:19 +08:00
syncOptions . aFromTsMs = cmdFilerSynchronize . Flag . Int64 ( "a.fromTsMs" , 0 , "synchronization from timestamp on filer A. The unit is millisecond" )
syncOptions . bFromTsMs = cmdFilerSynchronize . Flag . Int64 ( "b.fromTsMs" , 0 , "synchronization from timestamp on filer B. The unit is millisecond" )
2022-09-14 21:30:32 +08:00
syncOptions . concurrency = cmdFilerSynchronize . Flag . Int ( "concurrency" , DefaultConcurrencyLimit , "The maximum number of files that will be synced concurrently." )
2020-09-10 02:33:52 +08:00
syncCpuProfile = cmdFilerSynchronize . Flag . String ( "cpuprofile" , "" , "cpu profile output file" )
syncMemProfile = cmdFilerSynchronize . Flag . String ( "memprofile" , "" , "memory profile output file" )
2022-11-25 02:22:59 +08:00
syncOptions . metricsHttpIp = cmdFilerSynchronize . Flag . String ( "metricsIp" , "" , "metrics listen ip" )
2022-06-15 11:33:18 +08:00
syncOptions . metricsHttpPort = cmdFilerSynchronize . Flag . Int ( "metricsPort" , 0 , "metrics listen port" )
2024-01-07 02:02:16 +08:00
syncOptions . aDoDeleteFiles = cmdFilerSynchronize . Flag . Bool ( "a.doDeleteFiles" , true , "delete and update files when synchronizing on filer A" )
syncOptions . bDoDeleteFiles = cmdFilerSynchronize . Flag . Bool ( "b.doDeleteFiles" , true , "delete and update files when synchronizing on filer B" )
2021-12-30 16:23:57 +08:00
syncOptions . clientId = util . RandomInt32 ( )
2020-09-10 02:21:23 +08:00
}
var cmdFilerSynchronize = & Command {
UsageLine : "filer.sync -a=<oneFilerHost>:<oneFilerPort> -b=<otherFilerHost>:<otherFilerPort>" ,
2021-08-09 13:30:12 +08:00
Short : "resumable continuous synchronization between two active-active or active-passive SeaweedFS clusters" ,
Long : ` resumable continuous synchronization for file changes between two active - active or active - passive filers
2020-09-10 02:21:23 +08:00
filer . sync listens on filer notifications . If any file is updated , it will fetch the updated content ,
and write to the other destination . Different from filer . replicate :
* filer . sync only works between two filers .
* filer . sync does not need any special message queue setup .
* filer . sync supports both active - active and active - passive modes .
If restarted , the synchronization will resume from the previous checkpoints , persisted every minute .
2020-09-15 15:40:38 +08:00
A fresh sync will start from the earliest metadata logs .
2020-09-10 02:21:23 +08:00
` ,
}
func runFilerSynchronize ( cmd * Command , args [ ] string ) bool {
2021-08-03 16:30:35 +08:00
util . LoadConfiguration ( "security" , false )
2020-09-10 02:21:23 +08:00
grpcDialOption := security . LoadClientTLS ( util . GetViper ( ) , "grpc.client" )
2020-09-10 02:33:52 +08:00
grace . SetupProfiling ( * syncCpuProfile , * syncMemProfile )
2021-09-13 13:47:52 +08:00
filerA := pb . ServerAddress ( * syncOptions . filerA )
filerB := pb . ServerAddress ( * syncOptions . filerB )
2022-06-09 10:53:19 +08:00
2022-06-15 11:33:18 +08:00
// start filer.sync metrics server
2022-11-25 02:22:59 +08:00
go statsCollect . StartMetricsServer ( * syncOptions . metricsHttpIp , * syncOptions . metricsHttpPort )
2022-06-15 11:33:18 +08:00
2022-06-09 10:53:19 +08:00
// read a filer signature
aFilerSignature , aFilerErr := replication . ReadFilerSignature ( grpcDialOption , filerA )
if aFilerErr != nil {
glog . Errorf ( "get filer 'a' signature %d error from %s to %s: %v" , aFilerSignature , * syncOptions . filerA , * syncOptions . filerB , aFilerErr )
return true
}
// read b filer signature
bFilerSignature , bFilerErr := replication . ReadFilerSignature ( grpcDialOption , filerB )
if bFilerErr != nil {
glog . Errorf ( "get filer 'b' signature %d error from %s to %s: %v" , bFilerSignature , * syncOptions . filerA , * syncOptions . filerB , bFilerErr )
return true
}
2020-09-10 02:21:23 +08:00
go func ( ) {
2022-06-09 10:53:19 +08:00
// a->b
// set synchronization start timestamp to offset
2022-08-16 12:56:47 +08:00
initOffsetError := initOffsetFromTsMs ( grpcDialOption , filerB , aFilerSignature , * syncOptions . bFromTsMs , getSignaturePrefixByPath ( * syncOptions . aPath ) )
2022-06-09 10:53:19 +08:00
if initOffsetError != nil {
glog . Errorf ( "init offset from timestamp %d error from %s to %s: %v" , * syncOptions . bFromTsMs , * syncOptions . filerA , * syncOptions . filerB , initOffsetError )
os . Exit ( 2 )
}
2020-09-10 02:21:23 +08:00
for {
2024-01-13 02:57:18 +08:00
syncOptions . clientEpoch . Add ( 1 )
2022-07-27 22:22:57 +08:00
err := doSubscribeFilerMetaChanges (
syncOptions . clientId ,
2024-01-13 02:57:18 +08:00
syncOptions . clientEpoch . Load ( ) ,
2022-07-27 22:22:57 +08:00
grpcDialOption ,
filerA ,
* syncOptions . aPath ,
2022-08-07 16:34:32 +08:00
util . StringSplit ( * syncOptions . aExcludePaths , "," ) ,
2022-07-27 22:22:57 +08:00
* syncOptions . aProxyByFiler ,
filerB ,
* syncOptions . bPath ,
* syncOptions . bReplication ,
* syncOptions . bCollection ,
* syncOptions . bTtlSec ,
* syncOptions . bProxyByFiler ,
* syncOptions . bDiskType ,
* syncOptions . bDebug ,
2022-09-03 14:03:23 +08:00
* syncOptions . concurrency ,
2024-01-07 02:02:16 +08:00
* syncOptions . bDoDeleteFiles ,
2022-07-27 22:22:57 +08:00
aFilerSignature ,
bFilerSignature )
2020-09-10 02:21:23 +08:00
if err != nil {
glog . Errorf ( "sync from %s to %s: %v" , * syncOptions . filerA , * syncOptions . filerB , err )
time . Sleep ( 1747 * time . Millisecond )
}
}
} ( )
if ! * syncOptions . isActivePassive {
2022-06-09 10:53:19 +08:00
// b->a
// set synchronization start timestamp to offset
2022-08-16 12:56:47 +08:00
initOffsetError := initOffsetFromTsMs ( grpcDialOption , filerA , bFilerSignature , * syncOptions . aFromTsMs , getSignaturePrefixByPath ( * syncOptions . bPath ) )
2022-06-09 10:53:19 +08:00
if initOffsetError != nil {
glog . Errorf ( "init offset from timestamp %d error from %s to %s: %v" , * syncOptions . aFromTsMs , * syncOptions . filerB , * syncOptions . filerA , initOffsetError )
os . Exit ( 2 )
}
2020-09-10 02:21:23 +08:00
go func ( ) {
for {
2024-01-13 02:57:18 +08:00
syncOptions . clientEpoch . Add ( 1 )
2022-07-27 22:22:57 +08:00
err := doSubscribeFilerMetaChanges (
syncOptions . clientId ,
2024-01-13 02:57:18 +08:00
syncOptions . clientEpoch . Load ( ) ,
2022-07-27 22:22:57 +08:00
grpcDialOption ,
filerB ,
* syncOptions . bPath ,
2022-08-07 16:34:32 +08:00
util . StringSplit ( * syncOptions . bExcludePaths , "," ) ,
2022-07-27 22:22:57 +08:00
* syncOptions . bProxyByFiler ,
filerA ,
* syncOptions . aPath ,
* syncOptions . aReplication ,
* syncOptions . aCollection ,
* syncOptions . aTtlSec ,
* syncOptions . aProxyByFiler ,
* syncOptions . aDiskType ,
* syncOptions . aDebug ,
2022-09-03 14:03:23 +08:00
* syncOptions . concurrency ,
2024-01-07 02:02:16 +08:00
* syncOptions . aDoDeleteFiles ,
2022-07-27 22:22:57 +08:00
bFilerSignature ,
aFilerSignature )
2020-09-10 02:21:23 +08:00
if err != nil {
glog . Errorf ( "sync from %s to %s: %v" , * syncOptions . filerB , * syncOptions . filerA , err )
time . Sleep ( 2147 * time . Millisecond )
}
}
} ( )
}
select { }
return true
}
2022-06-09 10:53:19 +08:00
// initOffsetFromTsMs Initialize offset
2022-08-16 12:56:47 +08:00
func initOffsetFromTsMs ( grpcDialOption grpc . DialOption , targetFiler pb . ServerAddress , sourceFilerSignature int32 , fromTsMs int64 , signaturePrefix string ) error {
2022-06-09 10:53:19 +08:00
if fromTsMs <= 0 {
return nil
2020-09-10 02:21:23 +08:00
}
2022-06-09 10:53:19 +08:00
// convert to nanosecond
fromTsNs := fromTsMs * 1000_000
// If not successful, exit the program.
2022-08-16 12:56:47 +08:00
setOffsetErr := setOffset ( grpcDialOption , targetFiler , signaturePrefix , sourceFilerSignature , fromTsNs )
2022-06-09 10:53:19 +08:00
if setOffsetErr != nil {
return setOffsetErr
2020-09-10 02:21:23 +08:00
}
2022-06-09 10:53:19 +08:00
glog . Infof ( "setOffset from timestamp ms success! start offset: %d from %s to %s" , fromTsNs , * syncOptions . filerA , * syncOptions . filerB )
return nil
}
2022-07-27 22:22:57 +08:00
func doSubscribeFilerMetaChanges ( clientId int32 , clientEpoch int32 , grpcDialOption grpc . DialOption , sourceFiler pb . ServerAddress , sourcePath string , sourceExcludePaths [ ] string , sourceReadChunkFromFiler bool , targetFiler pb . ServerAddress , targetPath string ,
2024-01-07 02:02:16 +08:00
replicationStr , collection string , ttlSec int , sinkWriteChunkByFiler bool , diskType string , debug bool , concurrency int , doDeleteFiles bool , sourceFilerSignature int32 , targetFilerSignature int32 ) error {
2020-09-10 02:21:23 +08:00
// if first time, start from now
// if has previously synced, resume from that point of time
2022-06-14 19:46:02 +08:00
sourceFilerOffsetTsNs , err := getOffset ( grpcDialOption , targetFiler , getSignaturePrefixByPath ( sourcePath ) , sourceFilerSignature )
2020-09-10 02:21:23 +08:00
if err != nil {
return err
}
glog . V ( 0 ) . Infof ( "start sync %s(%d) => %s(%d) from %v(%d)" , sourceFiler , sourceFilerSignature , targetFiler , targetFilerSignature , time . Unix ( 0 , sourceFilerOffsetTsNs ) , sourceFilerOffsetTsNs )
// create filer sink
filerSource := & source . FilerSource { }
2021-09-13 13:47:52 +08:00
filerSource . DoInitialize ( sourceFiler . ToHttpAddress ( ) , sourceFiler . ToGrpcAddress ( ) , sourcePath , sourceReadChunkFromFiler )
2020-09-10 02:21:23 +08:00
filerSink := & filersink . FilerSink { }
2021-09-13 13:47:52 +08:00
filerSink . DoInitialize ( targetFiler . ToHttpAddress ( ) , targetFiler . ToGrpcAddress ( ) , targetPath , replicationStr , collection , ttlSec , diskType , grpcDialOption , sinkWriteChunkByFiler )
2020-09-10 02:21:23 +08:00
filerSink . SetSourceFiler ( filerSource )
2024-01-07 02:02:16 +08:00
persistEventFn := genProcessFunction ( sourcePath , targetPath , sourceExcludePaths , nil , filerSink , doDeleteFiles , debug )
2021-03-01 08:22:27 +08:00
2020-09-10 02:21:23 +08:00
processEventFn := func ( resp * filer_pb . SubscribeMetadataResponse ) error {
message := resp . EventNotification
for _ , sig := range message . Signatures {
if sig == targetFilerSignature && targetFilerSignature != 0 {
fmt . Printf ( "%s skipping %s change to %v\n" , targetFiler , sourceFiler , message )
return nil
}
}
2021-03-01 08:22:27 +08:00
return persistEventFn ( resp )
2020-09-10 02:21:23 +08:00
}
2022-09-03 14:03:23 +08:00
if concurrency < 0 || concurrency > 1024 {
2022-09-14 21:30:32 +08:00
glog . Warningf ( "invalid concurrency value, using default: %d" , DefaultConcurrencyLimit )
concurrency = DefaultConcurrencyLimit
2022-09-03 14:03:23 +08:00
}
2024-01-13 02:57:18 +08:00
processor := NewMetadataProcessor ( processEventFn , concurrency , sourceFilerOffsetTsNs )
2020-09-10 02:21:23 +08:00
2022-06-28 03:40:47 +08:00
var lastLogTsNs = time . Now ( ) . UnixNano ( )
2022-06-15 13:33:20 +08:00
var clientName = fmt . Sprintf ( "syncFrom_%s_To_%s" , string ( sourceFiler ) , string ( targetFiler ) )
2022-08-07 15:56:15 +08:00
processEventFnWithOffset := pb . AddOffsetFunc ( func ( resp * filer_pb . SubscribeMetadataResponse ) error {
processor . AddSyncJob ( resp )
return nil
} , 3 * time . Second , func ( counter int64 , lastTsNs int64 ) error {
2024-01-13 02:57:18 +08:00
offsetTsNs := processor . processedTsWatermark . Load ( )
if offsetTsNs == 0 {
2022-08-16 14:43:52 +08:00
return nil
}
2022-08-07 15:56:15 +08:00
// use processor.processedTsWatermark instead of the lastTsNs from the most recent job
2022-06-28 03:40:47 +08:00
now := time . Now ( ) . UnixNano ( )
2024-01-13 02:57:18 +08:00
glog . V ( 0 ) . Infof ( "sync %s to %s progressed to %v %0.2f/sec" , sourceFiler , targetFiler , time . Unix ( 0 , offsetTsNs ) , float64 ( counter ) / ( float64 ( now - lastLogTsNs ) / 1e9 ) )
2022-01-10 17:00:11 +08:00
lastLogTsNs = now
2022-06-15 11:33:18 +08:00
// collect synchronous offset
2024-01-13 02:57:18 +08:00
statsCollect . FilerSyncOffsetGauge . WithLabelValues ( sourceFiler . String ( ) , targetFiler . String ( ) , clientName , sourcePath ) . Set ( float64 ( offsetTsNs ) )
return setOffset ( grpcDialOption , targetFiler , getSignaturePrefixByPath ( sourcePath ) , sourceFilerSignature , offsetTsNs )
2020-09-10 02:21:23 +08:00
} )
2023-03-22 14:01:49 +08:00
metadataFollowOption := & pb . MetadataFollowOption {
ClientName : clientName ,
ClientId : clientId ,
ClientEpoch : clientEpoch ,
SelfSignature : targetFilerSignature ,
PathPrefix : sourcePath ,
AdditionalPathPrefixes : nil ,
DirectoriesToWatch : nil ,
StartTsNs : sourceFilerOffsetTsNs ,
StopTsNs : 0 ,
EventErrorType : pb . RetryForeverOnError ,
}
return pb . FollowMetadata ( sourceFiler , grpcDialOption , metadataFollowOption , processEventFnWithOffset )
2021-08-05 07:25:46 +08:00
2020-09-10 02:21:23 +08:00
}
2022-06-14 19:46:02 +08:00
// When each business is distinguished according to path, and offsets need to be maintained separately.
func getSignaturePrefixByPath ( path string ) string {
// compatible historical version
if path == "/" {
return SyncKeyPrefix
} else {
return SyncKeyPrefix + path
}
}
2021-09-13 13:47:52 +08:00
func getOffset ( grpcDialOption grpc . DialOption , filer pb . ServerAddress , signaturePrefix string , signature int32 ) ( lastOffsetTsNs int64 , readErr error ) {
2020-09-10 02:21:23 +08:00
2023-01-20 17:48:12 +08:00
readErr = pb . WithFilerClient ( false , signature , filer , grpcDialOption , func ( client filer_pb . SeaweedFilerClient ) error {
2021-03-01 08:22:27 +08:00
syncKey := [ ] byte ( signaturePrefix + "____" )
util . Uint32toBytes ( syncKey [ len ( signaturePrefix ) : len ( signaturePrefix ) + 4 ] , uint32 ( signature ) )
2020-09-10 02:21:23 +08:00
resp , err := client . KvGet ( context . Background ( ) , & filer_pb . KvGetRequest { Key : syncKey } )
if err != nil {
return err
}
if len ( resp . Error ) != 0 {
return errors . New ( resp . Error )
}
if len ( resp . Value ) < 8 {
return nil
}
lastOffsetTsNs = int64 ( util . BytesToUint64 ( resp . Value ) )
return nil
} )
return
}
2021-09-13 13:47:52 +08:00
func setOffset ( grpcDialOption grpc . DialOption , filer pb . ServerAddress , signaturePrefix string , signature int32 , offsetTsNs int64 ) error {
2023-01-20 17:48:12 +08:00
return pb . WithFilerClient ( false , signature , filer , grpcDialOption , func ( client filer_pb . SeaweedFilerClient ) error {
2020-09-10 02:21:23 +08:00
2021-03-01 08:22:27 +08:00
syncKey := [ ] byte ( signaturePrefix + "____" )
util . Uint32toBytes ( syncKey [ len ( signaturePrefix ) : len ( signaturePrefix ) + 4 ] , uint32 ( signature ) )
2020-09-10 02:21:23 +08:00
valueBuf := make ( [ ] byte , 8 )
util . Uint64toBytes ( valueBuf , uint64 ( offsetTsNs ) )
resp , err := client . KvPut ( context . Background ( ) , & filer_pb . KvPutRequest {
Key : syncKey ,
Value : valueBuf ,
} )
if err != nil {
return err
}
if len ( resp . Error ) != 0 {
return errors . New ( resp . Error )
}
return nil
} )
}
2021-03-01 08:22:27 +08:00
2023-11-17 21:25:09 +08:00
func genProcessFunction ( sourcePath string , targetPath string , excludePaths [ ] string , reExcludeFileName * regexp . Regexp , dataSink sink . ReplicationSink , doDeleteFiles bool , debug bool ) func ( resp * filer_pb . SubscribeMetadataResponse ) error {
2021-03-01 08:22:27 +08:00
// process function
processEventFn := func ( resp * filer_pb . SubscribeMetadataResponse ) error {
message := resp . EventNotification
var sourceOldKey , sourceNewKey util . FullPath
if message . OldEntry != nil {
sourceOldKey = util . FullPath ( resp . Directory ) . Child ( message . OldEntry . Name )
}
if message . NewEntry != nil {
sourceNewKey = util . FullPath ( message . NewParentPath ) . Child ( message . NewEntry . Name )
}
if debug {
glog . V ( 0 ) . Infof ( "received %v" , resp )
}
2024-01-19 01:13:14 +08:00
2024-03-21 03:54:29 +08:00
if isMultipartUploadDir ( resp . Directory + "/" ) {
2024-01-18 22:13:46 +08:00
return nil
}
2024-01-19 01:13:14 +08:00
2021-03-01 08:22:27 +08:00
if ! strings . HasPrefix ( resp . Directory , sourcePath ) {
return nil
}
2022-07-27 22:22:57 +08:00
for _ , excludePath := range excludePaths {
if strings . HasPrefix ( resp . Directory , excludePath ) {
return nil
}
}
2023-11-13 20:32:37 +08:00
if reExcludeFileName != nil && reExcludeFileName . MatchString ( message . NewEntry . Name ) {
return nil
}
2023-11-17 21:25:09 +08:00
if dataSink . IsIncremental ( ) {
doDeleteFiles = false
}
2021-03-01 08:22:27 +08:00
// handle deletions
2022-02-25 17:17:26 +08:00
if filer_pb . IsDelete ( resp ) {
2024-01-13 03:04:29 +08:00
if ! doDeleteFiles {
2023-11-17 21:25:09 +08:00
return nil
}
2021-03-01 08:22:27 +08:00
if ! strings . HasPrefix ( string ( sourceOldKey ) , sourcePath ) {
return nil
}
key := buildKey ( dataSink , message , targetPath , sourceOldKey , sourcePath )
2023-11-17 21:25:09 +08:00
return dataSink . DeleteEntry ( key , message . OldEntry . IsDirectory , message . DeleteChunks , message . Signatures )
2021-03-01 08:22:27 +08:00
}
// handle new entries
2022-02-25 17:17:26 +08:00
if filer_pb . IsCreate ( resp ) {
2021-03-01 08:22:27 +08:00
if ! strings . HasPrefix ( string ( sourceNewKey ) , sourcePath ) {
return nil
}
key := buildKey ( dataSink , message , targetPath , sourceNewKey , sourcePath )
2022-12-18 05:18:35 +08:00
if err := dataSink . CreateEntry ( key , message . NewEntry , message . Signatures ) ; err != nil {
return fmt . Errorf ( "create entry1 : %v" , err )
} else {
return nil
}
2021-03-01 08:22:27 +08:00
}
// this is something special?
2022-02-25 17:17:26 +08:00
if filer_pb . IsEmpty ( resp ) {
2021-03-01 08:22:27 +08:00
return nil
}
// handle updates
if strings . HasPrefix ( string ( sourceOldKey ) , sourcePath ) {
// old key is in the watched directory
if strings . HasPrefix ( string ( sourceNewKey ) , sourcePath ) {
// new key is also in the watched directory
2023-11-17 21:25:09 +08:00
if doDeleteFiles {
2021-03-01 08:22:27 +08:00
oldKey := util . Join ( targetPath , string ( sourceOldKey ) [ len ( sourcePath ) : ] )
message . NewParentPath = util . Join ( targetPath , message . NewParentPath [ len ( sourcePath ) : ] )
foundExisting , err := dataSink . UpdateEntry ( string ( oldKey ) , message . OldEntry , message . NewParentPath , message . NewEntry , message . DeleteChunks , message . Signatures )
if foundExisting {
return err
}
// not able to find old entry
if err = dataSink . DeleteEntry ( string ( oldKey ) , message . OldEntry . IsDirectory , false , message . Signatures ) ; err != nil {
return fmt . Errorf ( "delete old entry %v: %v" , oldKey , err )
}
}
// create the new entry
newKey := buildKey ( dataSink , message , targetPath , sourceNewKey , sourcePath )
2022-12-18 05:18:35 +08:00
if err := dataSink . CreateEntry ( newKey , message . NewEntry , message . Signatures ) ; err != nil {
return fmt . Errorf ( "create entry2 : %v" , err )
} else {
return nil
}
2021-03-01 08:22:27 +08:00
} else {
// new key is outside of the watched directory
2023-11-17 21:25:09 +08:00
if doDeleteFiles {
2021-03-01 08:22:27 +08:00
key := buildKey ( dataSink , message , targetPath , sourceOldKey , sourcePath )
return dataSink . DeleteEntry ( key , message . OldEntry . IsDirectory , message . DeleteChunks , message . Signatures )
}
}
} else {
// old key is outside of the watched directory
if strings . HasPrefix ( string ( sourceNewKey ) , sourcePath ) {
// new key is in the watched directory
key := buildKey ( dataSink , message , targetPath , sourceNewKey , sourcePath )
2022-12-18 05:18:35 +08:00
if err := dataSink . CreateEntry ( key , message . NewEntry , message . Signatures ) ; err != nil {
return fmt . Errorf ( "create entry3 : %v" , err )
} else {
return nil
}
2021-03-01 08:22:27 +08:00
} else {
// new key is also outside of the watched directory
// skip
}
}
return nil
}
return processEventFn
}
2021-05-29 21:45:23 +08:00
func buildKey ( dataSink sink . ReplicationSink , message * filer_pb . EventNotification , targetPath string , sourceKey util . FullPath , sourcePath string ) ( key string ) {
2021-03-01 08:22:27 +08:00
if ! dataSink . IsIncremental ( ) {
2021-05-29 21:45:23 +08:00
key = util . Join ( targetPath , string ( sourceKey ) [ len ( sourcePath ) : ] )
} else {
var mTime int64
if message . NewEntry != nil {
mTime = message . NewEntry . Attributes . Mtime
} else if message . OldEntry != nil {
mTime = message . OldEntry . Attributes . Mtime
}
dateKey := time . Unix ( mTime , 0 ) . Format ( "2006-01-02" )
key = util . Join ( targetPath , dateKey , string ( sourceKey ) [ len ( sourcePath ) : ] )
2021-03-01 08:22:27 +08:00
}
2021-05-29 21:45:23 +08:00
return escapeKey ( key )
2021-03-01 08:22:27 +08:00
}