2016-06-03 09:09:14 +08:00
package command
2013-01-21 11:44:23 +08:00
import (
"archive/tar"
2013-01-23 07:07:19 +08:00
"bytes"
2014-10-27 02:25:02 +08:00
"fmt"
2019-12-24 04:48:20 +08:00
"io"
2013-01-21 11:44:23 +08:00
"os"
"path"
2015-07-09 14:19:54 +08:00
"path/filepath"
2013-01-21 11:44:23 +08:00
"strconv"
"strings"
2013-01-23 07:07:19 +08:00
"text/template"
2013-01-21 11:44:23 +08:00
"time"
2014-10-27 02:34:55 +08:00
2016-06-03 09:09:14 +08:00
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
2019-04-19 12:43:36 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/needle"
2019-12-25 02:18:56 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/needle_map"
2019-12-24 04:48:20 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
2018-07-08 17:28:04 +08:00
"github.com/chrislusf/seaweedfs/weed/storage/types"
2020-06-20 13:45:27 +08:00
"github.com/chrislusf/seaweedfs/weed/util"
2013-01-21 11:44:23 +08:00
)
2013-01-23 07:07:19 +08:00
const (
2020-09-11 17:05:14 +08:00
defaultFnFormat = ` {{ .Id }} _ {{ .Name }} {{ .Ext }} `
2015-05-26 14:53:45 +08:00
timeFormat = "2006-01-02T15:04:05"
2013-01-23 07:07:19 +08:00
)
2015-06-02 10:25:01 +08:00
var (
export ExportOptions
)
type ExportOptions struct {
dir * string
collection * string
volumeId * int
}
2013-01-21 11:44:23 +08:00
var cmdExport = & Command {
2015-05-26 14:53:45 +08:00
UsageLine : "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'" ,
2013-01-21 11:51:27 +08:00
Short : "list or export files from one volume data file" ,
Long : ` List all files in a volume , or Export all files in a volume to a tar file if the output is specified .
2014-08-26 02:37:00 +08:00
2013-03-19 12:29:25 +08:00
The format of file name in the tar file can be customized . Default is { { . Mime } } / { { . Id } } : { { . Name } } . Also available is { { . Key } } .
2013-01-21 11:44:23 +08:00
` ,
}
2015-06-02 10:25:01 +08:00
func init ( ) {
cmdExport . Run = runExport // break init cycle
export . dir = cmdExport . Flag . String ( "dir" , "." , "input data directory to store volume data files" )
export . collection = cmdExport . Flag . String ( "collection" , "" , "the volume collection name" )
export . volumeId = cmdExport . Flag . Int ( "volumeId" , - 1 , "a volume id. The volume .dat and .idx files should already exist in the dir." )
}
2013-01-21 11:44:23 +08:00
var (
2018-07-15 11:51:17 +08:00
output = cmdExport . Flag . String ( "o" , "" , "output tar file name, must ends with .tar, or just a \"-\" for stdout" )
2020-09-11 17:05:14 +08:00
format = cmdExport . Flag . String ( "fileNameFormat" , defaultFnFormat , "filename formatted with {{.Id}} {{.Name}} {{.Ext}}" )
2018-07-15 11:51:17 +08:00
newer = cmdExport . Flag . String ( "newer" , "" , "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05" )
showDeleted = cmdExport . Flag . Bool ( "deleted" , false , "export deleted files. only applies if -o is not specified" )
limit = cmdExport . Flag . Int ( "limit" , 0 , "only show first n entries if specified" )
2015-06-02 15:23:41 +08:00
2015-06-02 15:33:13 +08:00
tarOutputFile * tar . Writer
tarHeader tar . Header
fileNameTemplate * template . Template
fileNameTemplateBuffer = bytes . NewBuffer ( nil )
newerThan time . Time
newerThanUnix int64 = - 1
localLocation , _ = time . LoadLocation ( "Local" )
2013-01-21 11:44:23 +08:00
)
2020-09-15 13:57:23 +08:00
func printNeedle ( vid needle . VolumeId , n * needle . Needle , version needle . Version , deleted bool , offset int64 , onDiskSize int64 ) {
2019-04-19 12:43:36 +08:00
key := needle . NewFileIdFromNeedle ( vid , n ) . String ( )
2020-08-19 08:04:28 +08:00
size := int32 ( n . DataSize )
2019-04-19 12:43:36 +08:00
if version == needle . Version1 {
2020-08-19 08:04:28 +08:00
size = int32 ( n . Size )
2018-07-15 11:51:17 +08:00
}
2020-09-15 13:57:23 +08:00
fmt . Printf ( "%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\t%d\t%d\n" ,
2018-07-15 11:51:17 +08:00
key ,
n . Name ,
size ,
2020-06-20 13:45:27 +08:00
n . IsCompressed ( ) ,
2018-07-15 11:51:17 +08:00
n . Mime ,
n . LastModifiedString ( ) ,
n . Ttl . String ( ) ,
deleted ,
2020-09-15 13:57:23 +08:00
offset ,
offset + onDiskSize ,
2018-07-15 11:51:17 +08:00
)
}
2019-01-16 17:48:59 +08:00
type VolumeFileScanner4Export struct {
2019-04-19 12:43:36 +08:00
version needle . Version
2019-01-16 17:48:59 +08:00
counter int
2019-12-25 02:18:56 +08:00
needleMap * needle_map . MemDb
2019-04-19 12:43:36 +08:00
vid needle . VolumeId
2019-01-16 17:48:59 +08:00
}
2019-12-24 04:48:20 +08:00
func ( scanner * VolumeFileScanner4Export ) VisitSuperBlock ( superBlock super_block . SuperBlock ) error {
scanner . version = superBlock . Version
2019-01-16 17:48:59 +08:00
return nil
}
func ( scanner * VolumeFileScanner4Export ) ReadNeedleBody ( ) bool {
return true
}
2019-10-22 15:50:30 +08:00
func ( scanner * VolumeFileScanner4Export ) VisitNeedle ( n * needle . Needle , offset int64 , needleHeader , needleBody [ ] byte ) error {
2019-01-16 17:48:59 +08:00
needleMap := scanner . needleMap
vid := scanner . vid
nv , ok := needleMap . Get ( n . Id )
2020-06-20 13:45:27 +08:00
glog . V ( 3 ) . Infof ( "key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v" ,
n . Id , offset , n . Size , n . DiskSize ( scanner . version ) , n . IsCompressed ( ) , ok , nv )
2020-09-11 17:05:14 +08:00
if * showDeleted && n . Size > 0 || ok && nv . Size . IsValid ( ) && nv . Offset . ToAcutalOffset ( ) == offset {
2019-01-16 17:48:59 +08:00
if newerThanUnix >= 0 && n . HasLastModifiedDate ( ) && n . LastModified < uint64 ( newerThanUnix ) {
glog . V ( 3 ) . Infof ( "Skipping this file, as it's old enough: LastModified %d vs %d" ,
n . LastModified , newerThanUnix )
return nil
}
scanner . counter ++
if * limit > 0 && scanner . counter > * limit {
return io . EOF
}
if tarOutputFile != nil {
return writeFile ( vid , n )
} else {
2020-09-15 13:57:23 +08:00
printNeedle ( vid , n , scanner . version , false , offset , n . DiskSize ( scanner . version ) )
2019-01-16 17:48:59 +08:00
return nil
}
}
if ! ok {
if * showDeleted && tarOutputFile == nil {
if n . DataSize > 0 {
2020-09-15 13:57:23 +08:00
printNeedle ( vid , n , scanner . version , true , offset , n . DiskSize ( scanner . version ) )
2019-01-16 17:48:59 +08:00
} else {
n . Name = [ ] byte ( "*tombstone" )
2020-09-15 13:57:23 +08:00
printNeedle ( vid , n , scanner . version , true , offset , n . DiskSize ( scanner . version ) )
2019-01-16 17:48:59 +08:00
}
}
glog . V ( 2 ) . Infof ( "This seems deleted %d size %d" , n . Id , n . Size )
} else {
glog . V ( 2 ) . Infof ( "Skipping later-updated Id %d size %d" , n . Id , n . Size )
}
return nil
}
2013-01-21 11:44:23 +08:00
func runExport ( cmd * Command , args [ ] string ) bool {
2015-05-20 20:11:12 +08:00
var err error
if * newer != "" {
if newerThan , err = time . ParseInLocation ( timeFormat , * newer , localLocation ) ; err != nil {
fmt . Println ( "cannot parse 'newer' argument: " + err . Error ( ) )
return false
}
newerThanUnix = newerThan . Unix ( )
}
2015-06-02 15:23:41 +08:00
if * export . volumeId == - 1 {
2013-01-21 11:44:23 +08:00
return false
}
2015-06-02 15:33:13 +08:00
if * output != "" {
if * output != "-" && ! strings . HasSuffix ( * output , ".tar" ) {
fmt . Println ( "the output file" , * output , "should be '-' or end with .tar" )
2013-01-22 09:50:10 +08:00
return false
}
2013-01-23 07:07:19 +08:00
2015-06-02 15:33:13 +08:00
if fileNameTemplate , err = template . New ( "name" ) . Parse ( * format ) ; err != nil {
2013-01-23 07:07:19 +08:00
fmt . Println ( "cannot parse format " + * format + ": " + err . Error ( ) )
return false
}
2015-06-02 15:33:13 +08:00
var outputFile * os . File
if * output == "-" {
outputFile = os . Stdout
2013-01-21 11:44:23 +08:00
} else {
2015-06-02 15:33:13 +08:00
if outputFile , err = os . Create ( * output ) ; err != nil {
glog . Fatalf ( "cannot open output tar %s: %s" , * output , err )
2013-01-21 11:44:23 +08:00
}
}
2015-06-02 15:33:13 +08:00
defer outputFile . Close ( )
tarOutputFile = tar . NewWriter ( outputFile )
defer tarOutputFile . Close ( )
2013-01-21 11:44:23 +08:00
t := time . Now ( )
tarHeader = tar . Header { Mode : 0644 ,
ModTime : t , Uid : os . Getuid ( ) , Gid : os . Getgid ( ) ,
2018-07-22 08:39:10 +08:00
Typeflag : tar . TypeReg ,
2013-01-21 11:44:23 +08:00
AccessTime : t , ChangeTime : t }
}
2015-06-02 15:23:41 +08:00
fileName := strconv . Itoa ( * export . volumeId )
if * export . collection != "" {
fileName = * export . collection + "_" + fileName
2014-01-22 12:51:07 +08:00
}
2019-04-19 12:43:36 +08:00
vid := needle . VolumeId ( * export . volumeId )
2013-01-21 11:44:23 +08:00
2019-12-25 02:18:56 +08:00
needleMap := needle_map . NewMemDb ( )
2020-03-10 13:29:02 +08:00
defer needleMap . Close ( )
2020-07-17 13:50:14 +08:00
if err := needleMap . LoadFromIdx ( path . Join ( util . ResolvePath ( * export . dir ) , fileName + ".idx" ) ) ; err != nil {
2019-12-25 02:18:56 +08:00
glog . Fatalf ( "cannot load needle map from %s.idx: %s" , fileName , err )
2013-02-11 05:41:25 +08:00
}
2013-01-21 11:44:23 +08:00
2019-01-16 17:48:59 +08:00
volumeFileScanner := & VolumeFileScanner4Export {
needleMap : needleMap ,
vid : vid ,
}
2013-01-22 09:50:10 +08:00
2018-07-15 11:51:17 +08:00
if tarOutputFile == nil {
2020-09-15 13:57:23 +08:00
fmt . Printf ( "key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\tstart\tstop\n" )
2018-07-15 11:51:17 +08:00
}
2020-07-17 13:50:14 +08:00
err = storage . ScanVolumeFile ( util . ResolvePath ( * export . dir ) , * export . collection , vid , storage . NeedleMapInMemory , volumeFileScanner )
2018-07-15 11:51:17 +08:00
if err != nil && err != io . EOF {
2013-08-09 14:57:22 +08:00
glog . Fatalf ( "Export Volume File [ERROR] %s\n" , err )
2013-01-21 11:44:23 +08:00
}
return true
}
2013-01-23 07:07:19 +08:00
type nameParams struct {
Name string
2018-07-08 17:28:04 +08:00
Id types . NeedleId
2013-01-23 07:07:19 +08:00
Mime string
Key string
2015-07-09 14:19:54 +08:00
Ext string
2013-01-23 07:07:19 +08:00
}
2019-04-19 12:43:36 +08:00
func writeFile ( vid needle . VolumeId , n * needle . Needle ) ( err error ) {
key := needle . NewFileIdFromNeedle ( vid , n ) . String ( )
2018-07-15 11:51:17 +08:00
fileNameTemplateBuffer . Reset ( )
if err = fileNameTemplate . Execute ( fileNameTemplateBuffer ,
nameParams {
Name : string ( n . Name ) ,
Id : n . Id ,
Mime : string ( n . Mime ) ,
Key : key ,
Ext : filepath . Ext ( string ( n . Name ) ) ,
} ,
) ; err != nil {
return err
}
2013-01-23 07:07:19 +08:00
2018-07-15 11:51:17 +08:00
fileName := fileNameTemplateBuffer . String ( )
2015-06-02 15:33:13 +08:00
2020-06-20 13:45:27 +08:00
if n . IsCompressed ( ) {
if util . IsGzippedContent ( n . Data ) && path . Ext ( fileName ) != ".gz" {
fileName = fileName + ".gz"
}
// TODO other compression method
2018-07-15 11:51:17 +08:00
}
2013-01-23 07:07:19 +08:00
2018-07-15 11:51:17 +08:00
tarHeader . Name , tarHeader . Size = fileName , int64 ( len ( n . Data ) )
if n . HasLastModifiedDate ( ) {
tarHeader . ModTime = time . Unix ( int64 ( n . LastModified ) , 0 )
2013-01-21 11:44:23 +08:00
} else {
2018-07-15 11:51:17 +08:00
tarHeader . ModTime = time . Unix ( 0 , 0 )
}
tarHeader . ChangeTime = tarHeader . ModTime
if err = tarOutputFile . WriteHeader ( & tarHeader ) ; err != nil {
return err
2013-01-21 11:44:23 +08:00
}
2018-07-15 11:51:17 +08:00
_ , err = tarOutputFile . Write ( n . Data )
2013-01-21 11:44:23 +08:00
return
}