seaweedfs/weed/replication/sink/s3sink/s3_sink.go

151 lines
3.8 KiB
Go
Raw Normal View History

2018-10-04 14:36:52 +08:00
package S3Sink
import (
2019-03-16 08:20:24 +08:00
"context"
2018-10-04 14:36:52 +08:00
"fmt"
2018-11-05 03:58:59 +08:00
"strings"
2018-10-07 04:04:33 +08:00
"sync"
2018-10-04 14:36:52 +08:00
2018-10-11 15:08:13 +08:00
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
2018-10-04 14:36:52 +08:00
"github.com/aws/aws-sdk-go/aws/session"
2018-10-11 15:08:13 +08:00
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3iface"
2020-09-01 15:21:19 +08:00
"github.com/chrislusf/seaweedfs/weed/filer"
2018-11-01 16:12:21 +08:00
"github.com/chrislusf/seaweedfs/weed/glog"
2018-10-11 15:08:13 +08:00
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
2018-10-04 14:36:52 +08:00
"github.com/chrislusf/seaweedfs/weed/replication/sink"
2018-10-11 15:08:13 +08:00
"github.com/chrislusf/seaweedfs/weed/replication/source"
"github.com/chrislusf/seaweedfs/weed/util"
2018-10-04 14:36:52 +08:00
)
type S3Sink struct {
conn s3iface.S3API
region string
bucket string
dir string
2020-04-08 08:49:00 +08:00
endpoint string
2018-10-04 14:36:52 +08:00
filerSource *source.FilerSource
}
func init() {
sink.Sinks = append(sink.Sinks, &S3Sink{})
}
func (s3sink *S3Sink) GetName() string {
2018-10-07 08:10:15 +08:00
return "s3"
2018-10-04 14:36:52 +08:00
}
func (s3sink *S3Sink) GetSinkToDirectory() string {
return s3sink.dir
}
func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error {
glog.V(0).Infof("sink.s3.region: %v", configuration.GetString(prefix+"region"))
glog.V(0).Infof("sink.s3.bucket: %v", configuration.GetString(prefix+"bucket"))
glog.V(0).Infof("sink.s3.directory: %v", configuration.GetString(prefix+"directory"))
2020-04-08 08:49:00 +08:00
glog.V(0).Infof("sink.s3.endpoint: %v", configuration.GetString(prefix+"endpoint"))
2018-10-04 14:36:52 +08:00
return s3sink.initialize(
configuration.GetString(prefix+"aws_access_key_id"),
configuration.GetString(prefix+"aws_secret_access_key"),
configuration.GetString(prefix+"region"),
configuration.GetString(prefix+"bucket"),
configuration.GetString(prefix+"directory"),
2020-04-08 08:49:00 +08:00
configuration.GetString(prefix+"endpoint"),
2018-10-04 14:36:52 +08:00
)
}
func (s3sink *S3Sink) SetSourceFiler(s *source.FilerSource) {
s3sink.filerSource = s
}
2020-04-08 08:49:00 +08:00
func (s3sink *S3Sink) initialize(awsAccessKeyId, awsSecretAccessKey, region, bucket, dir, endpoint string) error {
2018-10-04 14:36:52 +08:00
s3sink.region = region
s3sink.bucket = bucket
s3sink.dir = dir
2020-04-08 08:49:00 +08:00
s3sink.endpoint = endpoint
2018-10-04 14:36:52 +08:00
config := &aws.Config{
2020-04-08 08:49:00 +08:00
Region: aws.String(s3sink.region),
Endpoint: aws.String(s3sink.endpoint),
2018-10-04 14:36:52 +08:00
}
2019-11-19 11:24:34 +08:00
if awsAccessKeyId != "" && awsSecretAccessKey != "" {
config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "")
2018-10-04 14:36:52 +08:00
}
sess, err := session.NewSession(config)
if err != nil {
return fmt.Errorf("create aws session: %v", err)
}
s3sink.conn = s3.New(sess)
return nil
}
func (s3sink *S3Sink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool) error {
2018-10-04 14:36:52 +08:00
2018-11-05 03:58:59 +08:00
key = cleanKey(key)
2018-10-04 14:36:52 +08:00
if isDirectory {
key = key + "/"
}
return s3sink.deleteObject(key)
}
func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
2018-11-05 03:58:59 +08:00
key = cleanKey(key)
2018-10-04 16:14:44 +08:00
if entry.IsDirectory {
return nil
}
2018-10-04 14:36:52 +08:00
uploadId, err := s3sink.createMultipartUpload(key, entry)
if err != nil {
return err
}
2020-09-01 15:21:19 +08:00
totalSize := filer.FileSize(entry)
chunkViews := filer.ViewFromChunks(s3sink.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
2018-10-04 14:36:52 +08:00
2020-03-11 14:37:14 +08:00
parts := make([]*s3.CompletedPart, len(chunkViews))
2018-10-04 14:36:52 +08:00
var wg sync.WaitGroup
for chunkIndex, chunk := range chunkViews {
partId := chunkIndex + 1
wg.Add(1)
2020-09-01 15:21:19 +08:00
go func(chunk *filer.ChunkView, index int) {
2018-10-04 14:36:52 +08:00
defer wg.Done()
2020-02-26 14:23:59 +08:00
if part, uploadErr := s3sink.uploadPart(key, uploadId, partId, chunk); uploadErr != nil {
2018-10-04 14:36:52 +08:00
err = uploadErr
} else {
2020-03-11 14:37:14 +08:00
parts[index] = part
2018-10-04 14:36:52 +08:00
}
2020-03-11 14:37:14 +08:00
}(chunk, chunkIndex)
2018-10-04 14:36:52 +08:00
}
wg.Wait()
if err != nil {
s3sink.abortMultipartUpload(key, uploadId)
return err
}
return s3sink.completeMultipartUpload(context.Background(), key, uploadId, parts)
2018-10-04 14:36:52 +08:00
}
func (s3sink *S3Sink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) {
2018-11-05 03:58:59 +08:00
key = cleanKey(key)
2018-10-04 14:36:52 +08:00
// TODO improve efficiency
return false, nil
}
2018-11-05 03:58:59 +08:00
func cleanKey(key string) string {
if strings.HasPrefix(key, "/") {
key = key[1:]
}
return key
}