seaweedfs/weed/shell/command_remote_cache.go

144 lines
4.2 KiB
Go
Raw Normal View History

2021-08-10 05:35:18 +08:00
package shell
import (
"flag"
"fmt"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
2021-08-27 06:18:34 +08:00
"github.com/chrislusf/seaweedfs/weed/pb/remote_pb"
2021-08-10 05:35:18 +08:00
"github.com/chrislusf/seaweedfs/weed/util"
"io"
)
func init() {
Commands = append(Commands, &commandRemoteCache{})
}
type commandRemoteCache struct {
}
func (c *commandRemoteCache) Name() string {
return "remote.cache"
}
func (c *commandRemoteCache) Help() string {
return `cache the file content for mounted directories or files
2021-08-10 17:48:41 +08:00
# assume a remote storage is configured to name "cloud1"
remote.configure -name=cloud1 -type=s3 -access_key=xxx -secret_key=yyy
2021-08-10 05:35:18 +08:00
# mount and pull one bucket
2021-08-15 06:55:53 +08:00
remote.mount -dir=/xxx -remote=cloud1/bucket
2021-08-10 05:35:18 +08:00
# after mount, run one of these command to cache the content of the files
2021-08-15 06:55:53 +08:00
remote.cache -dir=/xxx
remote.cache -dir=/xxx/some/sub/dir
remote.cache -dir=/xxx/some/sub/dir -include=*.pdf
2021-08-21 17:17:10 +08:00
remote.cache -dir=/xxx/some/sub/dir -exclude=*.txt
remote.cache -maxSize=1024000 # cache files smaller than 100K
remote.cache -maxAge=3600 # cache files less than 1 hour old
This is designed to run regularly. So you can add it to some cronjob.
If a file is already synchronized with the remote copy, the file will be skipped to avoid unnecessary copy.
2021-08-21 17:17:10 +08:00
The actual data copying goes through volume severs in parallel.
2021-08-10 05:35:18 +08:00
`
}
func (c *commandRemoteCache) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
dir := remoteMountCommand.String("dir", "", "a directory in filer")
fileFiler := newFileFilter(remoteMountCommand)
2021-08-10 05:35:18 +08:00
if err = remoteMountCommand.Parse(args); err != nil {
return nil
}
2021-08-16 03:09:54 +08:00
mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, detectErr := detectMountInfo(commandEnv, writer, *dir)
2021-08-15 16:53:46 +08:00
if detectErr != nil{
2021-08-16 03:09:54 +08:00
jsonPrintln(writer, mappings)
2021-08-15 16:53:46 +08:00
return detectErr
2021-08-10 05:35:18 +08:00
}
// pull content from remote
if err = c.cacheContentData(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(*dir), fileFiler, remoteStorageConf); err != nil {
2021-08-10 05:35:18 +08:00
return fmt.Errorf("cache content data: %v", err)
}
return nil
}
func recursivelyTraverseDirectory(filerClient filer_pb.FilerClient, dirPath util.FullPath, visitEntry func(dir util.FullPath, entry *filer_pb.Entry) bool) (err error) {
err = filer_pb.ReadDirAllEntries(filerClient, dirPath, "", func(entry *filer_pb.Entry, isLast bool) error {
if entry.IsDirectory {
if !visitEntry(dirPath, entry) {
return nil
}
subDir := dirPath.Child(entry.Name)
if err := recursivelyTraverseDirectory(filerClient, subDir, visitEntry); err != nil {
return err
}
2021-08-10 05:37:34 +08:00
} else {
2021-08-10 05:35:18 +08:00
if !visitEntry(dirPath, entry) {
return nil
}
}
return nil
})
return
}
func shouldCacheToLocal(entry *filer_pb.Entry) bool {
if entry.IsDirectory {
return false
}
if entry.RemoteEntry == nil {
return false
}
2021-08-15 12:46:34 +08:00
if entry.RemoteEntry.LastLocalSyncTsNs == 0 && entry.RemoteEntry.RemoteSize > 0 {
2021-08-10 05:35:18 +08:00
return true
}
return false
}
func mayHaveCachedToLocal(entry *filer_pb.Entry) bool {
if entry.IsDirectory {
return false
}
if entry.RemoteEntry == nil {
return false // should not uncache an entry that is not in remote
2021-08-10 05:35:18 +08:00
}
2021-08-27 06:18:34 +08:00
if entry.RemoteEntry.LastLocalSyncTsNs > 0 {
2021-08-10 05:35:18 +08:00
return true
}
return false
}
2021-08-27 06:18:34 +08:00
func (c *commandRemoteCache) cacheContentData(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, dirToCache util.FullPath, fileFilter *FileFilter, remoteConf *remote_pb.RemoteConf) error {
2021-08-10 05:35:18 +08:00
return recursivelyTraverseDirectory(commandEnv, dirToCache, func(dir util.FullPath, entry *filer_pb.Entry) bool {
if !shouldCacheToLocal(entry) {
return true // true means recursive traversal should continue
}
if fileFilter.matches(entry) {
return true
}
2021-08-27 06:18:34 +08:00
fmt.Fprintf(writer, "Cache %+v ... ", dir.Child(entry.Name))
2021-08-10 05:35:18 +08:00
remoteLocation := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dir.Child(entry.Name))
if err := filer.DownloadToLocal(commandEnv, remoteConf, remoteLocation, dir, entry); err != nil {
fmt.Fprintf(writer, "DownloadToLocal %+v: %v\n", remoteLocation, err)
return false
}
2021-08-27 06:18:34 +08:00
fmt.Fprintf(writer, "Done\n")
2021-08-10 05:35:18 +08:00
return true
})
}