2021-03-01 08:22:27 +08:00
package command
import (
"fmt"
2022-07-29 15:17:28 +08:00
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/replication/source"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/util"
2021-03-01 08:22:27 +08:00
"google.golang.org/grpc"
2023-11-13 20:32:37 +08:00
"regexp"
2021-03-01 08:22:27 +08:00
"time"
)
type FilerBackupOptions struct {
isActivePassive * bool
filer * string
path * string
2022-07-27 22:22:57 +08:00
excludePaths * string
2023-11-13 20:32:37 +08:00
excludeFileName * string
2021-03-01 08:22:27 +08:00
debug * bool
proxyByFiler * bool
2023-11-17 21:25:09 +08:00
doDeleteFiles * bool
2021-03-01 08:22:27 +08:00
timeAgo * time . Duration
2022-11-04 15:34:40 +08:00
retentionDays * int
2021-03-01 08:22:27 +08:00
}
var (
filerBackupOptions FilerBackupOptions
)
func init ( ) {
cmdFilerBackup . Run = runFilerBackup // break init cycle
filerBackupOptions . filer = cmdFilerBackup . Flag . String ( "filer" , "localhost:8888" , "filer of one SeaweedFS cluster" )
filerBackupOptions . path = cmdFilerBackup . Flag . String ( "filerPath" , "/" , "directory to sync on filer" )
2022-07-27 22:22:57 +08:00
filerBackupOptions . excludePaths = cmdFilerBackup . Flag . String ( "filerExcludePaths" , "" , "exclude directories to sync on filer" )
2023-11-13 20:32:37 +08:00
filerBackupOptions . excludeFileName = cmdFilerBackup . Flag . String ( "filerExcludeFileName" , "" , "exclude file names that match the regexp to sync on filer" )
2021-03-01 08:22:27 +08:00
filerBackupOptions . proxyByFiler = cmdFilerBackup . Flag . Bool ( "filerProxy" , false , "read and write file chunks by filer instead of volume servers" )
2023-11-17 21:25:09 +08:00
filerBackupOptions . doDeleteFiles = cmdFilerBackup . Flag . Bool ( "doDeleteFiles" , false , "delete files on the destination" )
2021-03-01 08:22:27 +08:00
filerBackupOptions . debug = cmdFilerBackup . Flag . Bool ( "debug" , false , "debug mode to print out received files" )
filerBackupOptions . timeAgo = cmdFilerBackup . Flag . Duration ( "timeAgo" , 0 , "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"" )
2022-11-04 15:34:40 +08:00
filerBackupOptions . retentionDays = cmdFilerBackup . Flag . Int ( "retentionDays" , 0 , "incremental backup retention days" )
2021-03-01 08:22:27 +08:00
}
var cmdFilerBackup = & Command {
UsageLine : "filer.backup -filer=<filerHost>:<filerPort> " ,
Short : "resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml" ,
Long : ` resume - able continuously replicate files from a SeaweedFS cluster to another location defined in replication . toml
filer . backup listens on filer notifications . If any file is updated , it will fetch the updated content ,
and write to the destination . This is to replace filer . replicate command since additional message queue is not needed .
If restarted and "-timeAgo" is not set , the synchronization will resume from the previous checkpoints , persisted every minute .
A fresh sync will start from the earliest metadata logs . To reset the checkpoints , just set "-timeAgo" to a high value .
` ,
}
func runFilerBackup ( cmd * Command , args [ ] string ) bool {
util . LoadConfiguration ( "security" , false )
util . LoadConfiguration ( "replication" , true )
2021-08-03 16:30:35 +08:00
grpcDialOption := security . LoadClientTLS ( util . GetViper ( ) , "grpc.client" )
2021-12-30 16:23:57 +08:00
clientId := util . RandomInt32 ( )
2022-07-24 01:50:28 +08:00
var clientEpoch int32
2021-12-30 16:23:57 +08:00
2021-03-01 08:22:27 +08:00
for {
2022-07-24 01:50:28 +08:00
clientEpoch ++
err := doFilerBackup ( grpcDialOption , & filerBackupOptions , clientId , clientEpoch )
2021-03-01 08:22:27 +08:00
if err != nil {
glog . Errorf ( "backup from %s: %v" , * filerBackupOptions . filer , err )
time . Sleep ( 1747 * time . Millisecond )
}
}
return true
}
const (
BackupKeyPrefix = "backup."
)
2022-07-24 01:50:28 +08:00
func doFilerBackup ( grpcDialOption grpc . DialOption , backupOption * FilerBackupOptions , clientId int32 , clientEpoch int32 ) error {
2021-03-01 08:22:27 +08:00
// find data sink
2024-03-08 00:35:51 +08:00
dataSink := findSink ( util . GetViper ( ) )
2021-03-01 08:22:27 +08:00
if dataSink == nil {
return fmt . Errorf ( "no data sink configured in replication.toml" )
}
2021-09-13 13:47:52 +08:00
sourceFiler := pb . ServerAddress ( * backupOption . filer )
2021-03-01 08:22:27 +08:00
sourcePath := * backupOption . path
2022-08-07 16:34:32 +08:00
excludePaths := util . StringSplit ( * backupOption . excludePaths , "," )
2023-11-13 22:14:00 +08:00
var reExcludeFileName * regexp . Regexp
if * backupOption . excludeFileName != "" {
var err error
if reExcludeFileName , err = regexp . Compile ( * backupOption . excludeFileName ) ; err != nil {
return fmt . Errorf ( "error compile regexp %v for exclude file name: %+v" , * backupOption . excludeFileName , err )
}
2023-11-13 20:32:37 +08:00
}
2021-03-01 08:22:27 +08:00
timeAgo := * backupOption . timeAgo
targetPath := dataSink . GetSinkToDirectory ( )
debug := * backupOption . debug
// get start time for the data sink
startFrom := time . Unix ( 0 , 0 )
sinkId := util . HashStringToLong ( dataSink . GetName ( ) + dataSink . GetSinkToDirectory ( ) )
if timeAgo . Milliseconds ( ) == 0 {
lastOffsetTsNs , err := getOffset ( grpcDialOption , sourceFiler , BackupKeyPrefix , int32 ( sinkId ) )
if err != nil {
glog . V ( 0 ) . Infof ( "starting from %v" , startFrom )
} else {
startFrom = time . Unix ( 0 , lastOffsetTsNs )
glog . V ( 0 ) . Infof ( "resuming from %v" , startFrom )
}
} else {
startFrom = time . Now ( ) . Add ( - timeAgo )
glog . V ( 0 ) . Infof ( "start time is set to %v" , startFrom )
}
// create filer sink
filerSource := & source . FilerSource { }
2022-07-27 22:22:57 +08:00
filerSource . DoInitialize (
sourceFiler . ToHttpAddress ( ) ,
sourceFiler . ToGrpcAddress ( ) ,
sourcePath ,
* backupOption . proxyByFiler )
2021-03-01 08:22:27 +08:00
dataSink . SetSourceFiler ( filerSource )
2023-11-17 21:25:09 +08:00
processEventFn := genProcessFunction ( sourcePath , targetPath , excludePaths , reExcludeFileName , dataSink , * backupOption . doDeleteFiles , debug )
2021-03-01 08:22:27 +08:00
2021-08-09 13:30:36 +08:00
processEventFnWithOffset := pb . AddOffsetFunc ( processEventFn , 3 * time . Second , func ( counter int64 , lastTsNs int64 ) error {
2021-08-05 07:25:46 +08:00
glog . V ( 0 ) . Infof ( "backup %s progressed to %v %0.2f/sec" , sourceFiler , time . Unix ( 0 , lastTsNs ) , float64 ( counter ) / float64 ( 3 ) )
return setOffset ( grpcDialOption , sourceFiler , BackupKeyPrefix , int32 ( sinkId ) , lastTsNs )
2021-03-01 08:22:27 +08:00
} )
2022-11-04 15:34:40 +08:00
if dataSink . IsIncremental ( ) && * filerBackupOptions . retentionDays > 0 {
go func ( ) {
for {
now := time . Now ( )
time . Sleep ( time . Hour * 24 )
key := util . Join ( targetPath , now . Add ( - 1 * time . Hour * 24 * time . Duration ( * filerBackupOptions . retentionDays ) ) . Format ( "2006-01-02" ) )
_ = dataSink . DeleteEntry ( util . Join ( targetPath , key ) , true , true , nil )
glog . V ( 0 ) . Infof ( "incremental backup delete directory:%s" , key )
}
} ( )
}
2023-03-22 14:01:49 +08:00
metadataFollowOption := & pb . MetadataFollowOption {
ClientName : "backup_" + dataSink . GetName ( ) ,
ClientId : clientId ,
ClientEpoch : clientEpoch ,
SelfSignature : 0 ,
PathPrefix : sourcePath ,
AdditionalPathPrefixes : nil ,
DirectoriesToWatch : nil ,
StartTsNs : startFrom . UnixNano ( ) ,
StopTsNs : 0 ,
EventErrorType : pb . TrivialOnError ,
}
return pb . FollowMetadata ( sourceFiler , grpcDialOption , metadataFollowOption , processEventFnWithOffset )
2021-08-05 07:25:46 +08:00
2021-03-01 08:22:27 +08:00
}