seaweedfs/weed/command/export.go

268 lines
7.7 KiB
Go
Raw Normal View History

package command
import (
"archive/tar"
"bytes"
2014-10-27 02:25:02 +08:00
"fmt"
2019-12-24 04:48:20 +08:00
"io"
"os"
"path"
2015-07-09 14:19:54 +08:00
"path/filepath"
"strconv"
"strings"
"text/template"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/storage"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
)
const (
2020-09-11 17:05:14 +08:00
defaultFnFormat = `{{.Id}}_{{.Name}}{{.Ext}}`
2015-05-26 14:53:45 +08:00
timeFormat = "2006-01-02T15:04:05"
)
var (
export ExportOptions
)
type ExportOptions struct {
dir *string
collection *string
volumeId *int
}
var cmdExport = &Command{
2015-05-26 14:53:45 +08:00
UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
2013-01-21 11:51:27 +08:00
Short: "list or export files from one volume data file",
Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
2013-03-19 12:29:25 +08:00
The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
`,
}
func init() {
cmdExport.Run = runExport // break init cycle
export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
}
var (
output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
2020-09-11 17:05:14 +08:00
format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Id}} {{.Name}} {{.Ext}}")
newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified")
limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified")
2015-06-02 15:23:41 +08:00
2015-06-02 15:33:13 +08:00
tarOutputFile *tar.Writer
tarHeader tar.Header
fileNameTemplate *template.Template
fileNameTemplateBuffer = bytes.NewBuffer(nil)
newerThan time.Time
newerThanUnix int64 = -1
localLocation, _ = time.LoadLocation("Local")
)
func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool, offset int64, onDiskSize int64) {
2019-04-19 12:43:36 +08:00
key := needle.NewFileIdFromNeedle(vid, n).String()
size := int32(n.DataSize)
2019-04-19 12:43:36 +08:00
if version == needle.Version1 {
size = int32(n.Size)
}
fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\t%d\t%d\n",
key,
n.Name,
size,
2020-06-20 13:45:27 +08:00
n.IsCompressed(),
n.Mime,
n.LastModifiedString(),
n.Ttl.String(),
deleted,
offset,
offset+onDiskSize,
)
}
type VolumeFileScanner4Export struct {
2019-04-19 12:43:36 +08:00
version needle.Version
counter int
2019-12-25 02:18:56 +08:00
needleMap *needle_map.MemDb
2019-04-19 12:43:36 +08:00
vid needle.VolumeId
}
2019-12-24 04:48:20 +08:00
func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock super_block.SuperBlock) error {
scanner.version = superBlock.Version
return nil
}
func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool {
return true
}
2019-10-22 15:50:30 +08:00
func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
needleMap := scanner.needleMap
vid := scanner.vid
nv, ok := needleMap.Get(n.Id)
2020-06-20 13:45:27 +08:00
glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v",
n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv)
if *showDeleted && n.Size > 0 || ok && nv.Size.IsValid() && nv.Offset.ToActualOffset() == offset {
if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
n.LastModified, newerThanUnix)
return nil
}
scanner.counter++
if *limit > 0 && scanner.counter > *limit {
return io.EOF
}
if tarOutputFile != nil {
return writeFile(vid, n)
} else {
printNeedle(vid, n, scanner.version, false, offset, n.DiskSize(scanner.version))
return nil
}
}
if !ok {
if *showDeleted && tarOutputFile == nil {
if n.DataSize > 0 {
printNeedle(vid, n, scanner.version, true, offset, n.DiskSize(scanner.version))
} else {
n.Name = []byte("*tombstone")
printNeedle(vid, n, scanner.version, true, offset, n.DiskSize(scanner.version))
}
}
glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
} else {
glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
}
return nil
}
func runExport(cmd *Command, args []string) bool {
var err error
if *newer != "" {
if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
fmt.Println("cannot parse 'newer' argument: " + err.Error())
return false
}
newerThanUnix = newerThan.Unix()
}
2015-06-02 15:23:41 +08:00
if *export.volumeId == -1 {
return false
}
2015-06-02 15:33:13 +08:00
if *output != "" {
if *output != "-" && !strings.HasSuffix(*output, ".tar") {
fmt.Println("the output file", *output, "should be '-' or end with .tar")
return false
}
2015-06-02 15:33:13 +08:00
if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
fmt.Println("cannot parse format " + *format + ": " + err.Error())
return false
}
2015-06-02 15:33:13 +08:00
var outputFile *os.File
if *output == "-" {
outputFile = os.Stdout
} else {
2015-06-02 15:33:13 +08:00
if outputFile, err = os.Create(*output); err != nil {
glog.Fatalf("cannot open output tar %s: %s", *output, err)
}
}
2015-06-02 15:33:13 +08:00
defer outputFile.Close()
tarOutputFile = tar.NewWriter(outputFile)
defer tarOutputFile.Close()
t := time.Now()
tarHeader = tar.Header{Mode: 0644,
ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
2018-07-22 08:39:10 +08:00
Typeflag: tar.TypeReg,
AccessTime: t, ChangeTime: t}
}
2015-06-02 15:23:41 +08:00
fileName := strconv.Itoa(*export.volumeId)
if *export.collection != "" {
fileName = *export.collection + "_" + fileName
}
2019-04-19 12:43:36 +08:00
vid := needle.VolumeId(*export.volumeId)
2019-12-25 02:18:56 +08:00
needleMap := needle_map.NewMemDb()
defer needleMap.Close()
if err := needleMap.LoadFromIdx(path.Join(util.ResolvePath(*export.dir), fileName+".idx")); err != nil {
2019-12-25 02:18:56 +08:00
glog.Fatalf("cannot load needle map from %s.idx: %s", fileName, err)
2013-02-11 05:41:25 +08:00
}
volumeFileScanner := &VolumeFileScanner4Export{
needleMap: needleMap,
vid: vid,
}
if tarOutputFile == nil {
fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\tstart\tstop\n")
}
err = storage.ScanVolumeFile(util.ResolvePath(*export.dir), *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
if err != nil && err != io.EOF {
glog.Errorf("Export Volume File [ERROR] %s\n", err)
}
return true
}
type nameParams struct {
Name string
Id types.NeedleId
Mime string
Key string
2015-07-09 14:19:54 +08:00
Ext string
}
2019-04-19 12:43:36 +08:00
func writeFile(vid needle.VolumeId, n *needle.Needle) (err error) {
key := needle.NewFileIdFromNeedle(vid, n).String()
fileNameTemplateBuffer.Reset()
if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
nameParams{
Name: string(n.Name),
Id: n.Id,
Mime: string(n.Mime),
Key: key,
Ext: filepath.Ext(string(n.Name)),
},
); err != nil {
return err
}
fileName := fileNameTemplateBuffer.String()
2015-06-02 15:33:13 +08:00
2020-06-20 13:45:27 +08:00
if n.IsCompressed() {
if util.IsGzippedContent(n.Data) && path.Ext(fileName) != ".gz" {
fileName = fileName + ".gz"
}
// TODO other compression method
}
tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
if n.HasLastModifiedDate() {
tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
} else {
tarHeader.ModTime = time.Unix(0, 0)
}
tarHeader.ChangeTime = tarHeader.ModTime
if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
return err
}
_, err = tarOutputFile.Write(n.Data)
return
}