Merge pull request #39 from chrislusf/master

sync
This commit is contained in:
hilimd 2020-11-22 20:29:17 +08:00 committed by GitHub
commit 9aa990f80f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 121 additions and 47 deletions

View File

@ -121,15 +121,16 @@ On top of the object store, optional [Filer] can support directories and POSIX a
## Filer Features ## ## Filer Features ##
* [Filer server][Filer] provides "normal" directories and files via http. * [Filer server][Filer] provides "normal" directories and files via http.
* [Super Large Files][SuperLargeFiles] stores large or super large files in tens of TB. * [File TTL][FilerTTL] automatically expires file metadata and actual file data.
* [Mount filer][Mount] reads and writes files directly as a local directory via FUSE. * [Mount filer][Mount] reads and writes files directly as a local directory via FUSE.
* [Filer Store Replication][FilerStoreReplication] enables HA for filer meta data stores.
* [Active-Active Replication][ActiveActiveAsyncReplication] enables asynchronous one-way or two-way cross cluster continuous replication. * [Active-Active Replication][ActiveActiveAsyncReplication] enables asynchronous one-way or two-way cross cluster continuous replication.
* [Amazon S3 compatible API][AmazonS3API] accesses files with S3 tooling. * [Amazon S3 compatible API][AmazonS3API] accesses files with S3 tooling.
* [Hadoop Compatible File System][Hadoop] accesses files from Hadoop/Spark/Flink/etc or even runs HBase. * [Hadoop Compatible File System][Hadoop] accesses files from Hadoop/Spark/Flink/etc or even runs HBase.
* [Async Replication To Cloud][BackupToCloud] has extremely fast local access and backups to Amazon S3, Google Cloud Storage, Azure, BackBlaze. * [Async Replication To Cloud][BackupToCloud] has extremely fast local access and backups to Amazon S3, Google Cloud Storage, Azure, BackBlaze.
* [WebDAV] accesses as a mapped drive on Mac and Windows, or from mobile devices. * [WebDAV] accesses as a mapped drive on Mac and Windows, or from mobile devices.
* [AES256-GCM Encrypted Storage][FilerDataEncryption] safely stores the encrypted data. * [AES256-GCM Encrypted Storage][FilerDataEncryption] safely stores the encrypted data.
* [File TTL][FilerTTL] automatically purges file metadata and actual file data. * [Super Large Files][SuperLargeFiles] stores large or super large files in tens of TB.
* [Kubernetes CSI Driver][SeaweedFsCsiDriver] A Container Storage Interface (CSI) Driver. [![Docker Pulls](https://img.shields.io/docker/pulls/chrislusf/seaweedfs-csi-driver.svg?maxAge=4800)](https://hub.docker.com/r/chrislusf/seaweedfs-csi-driver/) * [Kubernetes CSI Driver][SeaweedFsCsiDriver] A Container Storage Interface (CSI) Driver. [![Docker Pulls](https://img.shields.io/docker/pulls/chrislusf/seaweedfs-csi-driver.svg?maxAge=4800)](https://hub.docker.com/r/chrislusf/seaweedfs-csi-driver/)
[Filer]: https://github.com/chrislusf/seaweedfs/wiki/Directories-and-Files [Filer]: https://github.com/chrislusf/seaweedfs/wiki/Directories-and-Files
@ -146,6 +147,7 @@ On top of the object store, optional [Filer] can support directories and POSIX a
[VolumeServerTTL]: https://github.com/chrislusf/seaweedfs/wiki/Store-file-with-a-Time-To-Live [VolumeServerTTL]: https://github.com/chrislusf/seaweedfs/wiki/Store-file-with-a-Time-To-Live
[SeaweedFsCsiDriver]: https://github.com/seaweedfs/seaweedfs-csi-driver [SeaweedFsCsiDriver]: https://github.com/seaweedfs/seaweedfs-csi-driver
[ActiveActiveAsyncReplication]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Active-Active-cross-cluster-continuous-synchronization [ActiveActiveAsyncReplication]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Active-Active-cross-cluster-continuous-synchronization
[FilerStoreReplication]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Store-Replication
[Back to TOC](#table-of-contents) [Back to TOC](#table-of-contents)

2
go.mod
View File

@ -40,7 +40,7 @@ require (
github.com/json-iterator/go v1.1.10 github.com/json-iterator/go v1.1.10
github.com/karlseguin/ccache v2.0.3+incompatible github.com/karlseguin/ccache v2.0.3+incompatible
github.com/karlseguin/expect v1.0.1 // indirect github.com/karlseguin/expect v1.0.1 // indirect
github.com/klauspost/compress v1.10.9 github.com/klauspost/compress v1.10.9 // indirect
github.com/klauspost/cpuid v1.2.1 // indirect github.com/klauspost/cpuid v1.2.1 // indirect
github.com/klauspost/crc32 v1.2.0 github.com/klauspost/crc32 v1.2.0
github.com/klauspost/reedsolomon v1.9.2 github.com/klauspost/reedsolomon v1.9.2

View File

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.5.4</version> <version>1.5.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>
@ -68,6 +68,11 @@
<version>4.13.1</version> <version>4.13.1</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
<version>1.3.2</version>
</dependency>
</dependencies> </dependencies>
<distributionManagement> <distributionManagement>

View File

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.5.4</version> <version>1.5.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

View File

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.5.4</version> <version>1.5.6</version>
<parent> <parent>
<groupId>org.sonatype.oss</groupId> <groupId>org.sonatype.oss</groupId>

View File

@ -11,13 +11,13 @@
<dependency> <dependency>
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId> <artifactId>seaweedfs-client</artifactId>
<version>1.5.4</version> <version>1.5.6</version>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.github.chrislusf</groupId> <groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-hadoop2-client</artifactId> <artifactId>seaweedfs-hadoop2-client</artifactId>
<version>1.5.4</version> <version>1.5.6</version>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -301,7 +301,7 @@
</snapshotRepository> </snapshotRepository>
</distributionManagement> </distributionManagement>
<properties> <properties>
<seaweedfs.client.version>1.5.4</seaweedfs.client.version> <seaweedfs.client.version>1.5.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version> <hadoop.version>2.9.2</hadoop.version>
</properties> </properties>
</project> </project>

View File

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<properties> <properties>
<seaweedfs.client.version>1.5.4</seaweedfs.client.version> <seaweedfs.client.version>1.5.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version> <hadoop.version>2.9.2</hadoop.version>
</properties> </properties>

View File

@ -309,7 +309,7 @@
</snapshotRepository> </snapshotRepository>
</distributionManagement> </distributionManagement>
<properties> <properties>
<seaweedfs.client.version>1.5.4</seaweedfs.client.version> <seaweedfs.client.version>1.5.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version> <hadoop.version>3.1.1</hadoop.version>
</properties> </properties>
</project> </project>

View File

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<properties> <properties>
<seaweedfs.client.version>1.5.4</seaweedfs.client.version> <seaweedfs.client.version>1.5.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version> <hadoop.version>3.1.1</hadoop.version>
</properties> </properties>

View File

@ -0,0 +1,3 @@
#!/bin/bash
hsbench -a accesstoken -s secret -z 4K -d 10 -t 10 -b 10 -u http://localhost:8333 -m "cxipgdx" -bp "hsbench-"

View File

@ -47,7 +47,7 @@ func (f *Filer) DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isR
} }
if shouldDeleteChunks && !isCollection { if shouldDeleteChunks && !isCollection {
go f.DeleteChunks(chunks) f.DirectDeleteChunks(chunks)
} }
// A case not handled: // A case not handled:
// what if the chunk is in a different collection? // what if the chunk is in a different collection?

View File

@ -68,6 +68,50 @@ func (f *Filer) loopProcessingDeletion() {
} }
} }
func (f *Filer) doDeleteFileIds(fileIds []string) {
lookupFunc := LookupByMasterClientFn(f.MasterClient)
DeletionBatchSize := 100000 // roughly 20 bytes cost per file id.
for len(fileIds) > 0 {
var toDeleteFileIds []string
if len(fileIds) > DeletionBatchSize {
toDeleteFileIds = fileIds[:DeletionBatchSize]
fileIds = fileIds[DeletionBatchSize:]
} else {
toDeleteFileIds = fileIds
fileIds = fileIds[:0]
}
deletionCount := len(toDeleteFileIds)
_, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc)
if err != nil {
if !strings.Contains(err.Error(), "already deleted") {
glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err)
}
}
}
}
func (f *Filer) DirectDeleteChunks(chunks []*filer_pb.FileChunk) {
var fildIdsToDelete []string
for _, chunk := range chunks {
if !chunk.IsChunkManifest {
fildIdsToDelete = append(fildIdsToDelete, chunk.GetFileIdString())
continue
}
dataChunks, manifestResolveErr := ResolveOneChunkManifest(f.MasterClient.LookupFileId, chunk)
if manifestResolveErr != nil {
glog.V(0).Infof("failed to resolve manifest %s: %v", chunk.FileId, manifestResolveErr)
}
for _, dChunk := range dataChunks {
fildIdsToDelete = append(fildIdsToDelete, dChunk.GetFileIdString())
}
fildIdsToDelete = append(fildIdsToDelete, chunk.GetFileIdString())
}
f.doDeleteFileIds(fildIdsToDelete)
}
func (f *Filer) DeleteChunks(chunks []*filer_pb.FileChunk) { func (f *Filer) DeleteChunks(chunks []*filer_pb.FileChunk) {
for _, chunk := range chunks { for _, chunk := range chunks {
if !chunk.IsChunkManifest { if !chunk.IsChunkManifest {

View File

@ -75,6 +75,7 @@ func TestCreateNeedleFromRequest(t *testing.T) {
Upload("http://localhost:8080/389,0f084d17353afda0", "t.txt", false, bytes.NewReader(gzippedData), true, "text/plain", nil, "") Upload("http://localhost:8080/389,0f084d17353afda0", "t.txt", false, bytes.NewReader(gzippedData), true, "text/plain", nil, "")
} }
/*
{ {
mc.needleHandling = func(n *needle.Needle, originalSize int, err error) { mc.needleHandling = func(n *needle.Needle, originalSize int, err error) {
assert.Equal(t, nil, err, "upload: %v", err) assert.Equal(t, nil, err, "upload: %v", err)
@ -98,6 +99,7 @@ func TestCreateNeedleFromRequest(t *testing.T) {
zstdData, _ := util.ZstdData([]byte(textContent)) zstdData, _ := util.ZstdData([]byte(textContent))
Upload("http://localhost:8080/389,0f084d17353afda0", "t.txt", false, bytes.NewReader(zstdData), false, "application/zstd", nil, "") Upload("http://localhost:8080/389,0f084d17353afda0", "t.txt", false, bytes.NewReader(zstdData), false, "application/zstd", nil, "")
} }
*/
} }

View File

@ -113,12 +113,6 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque
bucket, object := getBucketAndObject(r) bucket, object := getBucketAndObject(r)
response, _ := s3a.listFilerEntries(bucket, object, 1, "", "/")
if len(response.Contents) != 0 && strings.HasSuffix(object, "/") {
w.WriteHeader(http.StatusNoContent)
return
}
destUrl := fmt.Sprintf("http://%s%s/%s%s?recursive=true", destUrl := fmt.Sprintf("http://%s%s/%s%s?recursive=true",
s3a.option.Filer, s3a.option.BucketsPath, bucket, object) s3a.option.Filer, s3a.option.BucketsPath, bucket, object)
@ -266,11 +260,6 @@ func (s3a *S3ApiServer) proxyToFiler(w http.ResponseWriter, r *http.Request, des
resp, postErr := client.Do(proxyReq) resp, postErr := client.Do(proxyReq)
if (resp.ContentLength == -1 || resp.StatusCode == 404) && !strings.HasSuffix(destUrl, "/") {
writeErrorResponse(w, s3err.ErrNoSuchKey, r.URL)
return
}
if postErr != nil { if postErr != nil {
glog.Errorf("post to filer: %v", postErr) glog.Errorf("post to filer: %v", postErr)
writeErrorResponse(w, s3err.ErrInternalError, r.URL) writeErrorResponse(w, s3err.ErrInternalError, r.URL)
@ -278,6 +267,11 @@ func (s3a *S3ApiServer) proxyToFiler(w http.ResponseWriter, r *http.Request, des
} }
defer util.CloseResponse(resp) defer util.CloseResponse(resp)
if (resp.ContentLength == -1 || resp.StatusCode == 404) && !strings.HasSuffix(destUrl, "/") {
writeErrorResponse(w, s3err.ErrNoSuchKey, r.URL)
return
}
responseFn(resp, w) responseFn(resp, w)
} }

View File

@ -159,8 +159,8 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request)
if n.Data, err = util.DecompressData(n.Data); err != nil { if n.Data, err = util.DecompressData(n.Data); err != nil {
glog.V(0).Infoln("ungzip error:", err, r.URL.Path) glog.V(0).Infoln("ungzip error:", err, r.URL.Path)
} }
} else if strings.Contains(r.Header.Get("Accept-Encoding"), "zstd") && util.IsZstdContent(n.Data) { // } else if strings.Contains(r.Header.Get("Accept-Encoding"), "zstd") && util.IsZstdContent(n.Data) {
w.Header().Set("Content-Encoding", "zstd") // w.Header().Set("Content-Encoding", "zstd")
} else if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") && util.IsGzippedContent(n.Data) { } else if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") && util.IsGzippedContent(n.Data) {
w.Header().Set("Content-Encoding", "gzip") w.Header().Set("Content-Encoding", "gzip")
} else { } else {

View File

@ -22,7 +22,7 @@ func (c *commandCollectionDelete) Name() string {
func (c *commandCollectionDelete) Help() string { func (c *commandCollectionDelete) Help() string {
return `delete specified collection return `delete specified collection
collection.delete -collectin <collection_name> -force collection.delete -collection <collection_name> -force
` `
} }

View File

@ -72,8 +72,9 @@ func (c *commandFsMetaCat) Do(args []string, commandEnv *CommandEnv, writer io.W
bytes, _ := proto.Marshal(respLookupEntry.Entry) bytes, _ := proto.Marshal(respLookupEntry.Entry)
gzippedBytes, _ := util.GzipData(bytes) gzippedBytes, _ := util.GzipData(bytes)
zstdBytes, _ := util.ZstdData(bytes) // zstdBytes, _ := util.ZstdData(bytes)
fmt.Fprintf(writer, "chunks %d meta size: %d gzip:%d zstd:%d\n", len(respLookupEntry.Entry.Chunks), len(bytes), len(gzippedBytes), len(zstdBytes)) // fmt.Fprintf(writer, "chunks %d meta size: %d gzip:%d zstd:%d\n", len(respLookupEntry.Entry.Chunks), len(bytes), len(gzippedBytes), len(zstdBytes))
fmt.Fprintf(writer, "chunks %d meta size: %d gzip:%d\n", len(respLookupEntry.Entry.Chunks), len(bytes), len(gzippedBytes))
return nil return nil

View File

@ -306,16 +306,16 @@ func isGoodMove(placement *super_block.ReplicaPlacement, existingReplicas []*Vol
dcs[targetNode.dc] = true dcs[targetNode.dc] = true
racks[fmt.Sprintf("%s %s", targetNode.dc, targetNode.rack)]++ racks[fmt.Sprintf("%s %s", targetNode.dc, targetNode.rack)]++
if len(dcs) > placement.DiffDataCenterCount+1 { if len(dcs) != placement.DiffDataCenterCount+1 {
return false return false
} }
if len(racks) > placement.DiffRackCount+placement.DiffDataCenterCount+1 { if len(racks) != placement.DiffRackCount+placement.DiffDataCenterCount+1 {
return false return false
} }
for _, sameRackCount := range racks { for _, sameRackCount := range racks {
if sameRackCount > placement.SameRackCount+1 { if sameRackCount != placement.SameRackCount+1 {
return false return false
} }
} }

View File

@ -20,6 +20,22 @@ func TestIsGoodMove(t *testing.T) {
var tests = []testMoveCase{ var tests = []testMoveCase{
{
name: "test 100 move to wrong data centers",
replication: "100",
replicas: []*VolumeReplica{
{
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
},
{
location: &location{"dc2", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
},
},
sourceLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
targetLocation: location{"dc2", "r3", &master_pb.DataNodeInfo{Id: "dn3"}},
expected: false,
},
{ {
name: "test 100 move to spread into proper data centers", name: "test 100 move to spread into proper data centers",
replication: "100", replication: "100",

View File

@ -23,7 +23,7 @@ type ParsedUpload struct {
MimeType string MimeType string
PairMap map[string]string PairMap map[string]string
IsGzipped bool IsGzipped bool
IsZstd bool // IsZstd bool
OriginalDataSize int OriginalDataSize int
ModifiedTime uint64 ModifiedTime uint64
Ttl *TTL Ttl *TTL
@ -100,7 +100,7 @@ func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) {
func parsePut(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) { func parsePut(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip" pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip"
pu.IsZstd = r.Header.Get("Content-Encoding") == "zstd" // pu.IsZstd = r.Header.Get("Content-Encoding") == "zstd"
pu.MimeType = r.Header.Get("Content-Type") pu.MimeType = r.Header.Get("Content-Type")
pu.FileName = "" pu.FileName = ""
pu.Data, e = ioutil.ReadAll(io.LimitReader(r.Body, sizeLimit+1)) pu.Data, e = ioutil.ReadAll(io.LimitReader(r.Body, sizeLimit+1))
@ -194,7 +194,7 @@ func parseMultipart(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error
} }
pu.IsGzipped = part.Header.Get("Content-Encoding") == "gzip" pu.IsGzipped = part.Header.Get("Content-Encoding") == "gzip"
pu.IsZstd = part.Header.Get("Content-Encoding") == "zstd" // pu.IsZstd = part.Header.Get("Content-Encoding") == "zstd"
} }
return return

View File

@ -132,6 +132,8 @@ func NewVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, volumeSi
} }
func (vl *VolumeLayout) String() string { func (vl *VolumeLayout) String() string {
vl.accessLock.RLock()
defer vl.accessLock.RUnlock()
return fmt.Sprintf("rp:%v, ttl:%v, vid2location:%v, writables:%v, volumeSizeLimit:%v", vl.rp, vl.ttl, vl.vid2location, vl.writables, vl.volumeSizeLimit) return fmt.Sprintf("rp:%v, ttl:%v, vid2location:%v, writables:%v, volumeSizeLimit:%v", vl.rp, vl.ttl, vl.vid2location, vl.writables, vl.volumeSizeLimit)
} }

View File

@ -9,7 +9,7 @@ import (
"strings" "strings"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/klauspost/compress/zstd" // "github.com/klauspost/compress/zstd"
) )
var ( var (
@ -55,19 +55,16 @@ func GzipData(input []byte) ([]byte, error) {
return buf.Bytes(), nil return buf.Bytes(), nil
} }
var zstdEncoder, _ = zstd.NewWriter(nil)
func ZstdData(input []byte) ([]byte, error) {
return zstdEncoder.EncodeAll(input, nil), nil
}
func DecompressData(input []byte) ([]byte, error) { func DecompressData(input []byte) ([]byte, error) {
if IsGzippedContent(input) { if IsGzippedContent(input) {
return ungzipData(input) return ungzipData(input)
} }
/*
if IsZstdContent(input) { if IsZstdContent(input) {
return unzstdData(input) return unzstdData(input)
} }
*/
return input, UnsupportedCompression return input, UnsupportedCompression
} }
@ -82,12 +79,6 @@ func ungzipData(input []byte) ([]byte, error) {
return output, err return output, err
} }
var decoder, _ = zstd.NewReader(nil)
func unzstdData(input []byte) ([]byte, error) {
return decoder.DecodeAll(input, nil)
}
func IsGzippedContent(data []byte) bool { func IsGzippedContent(data []byte) bool {
if len(data) < 2 { if len(data) < 2 {
return false return false
@ -95,12 +86,26 @@ func IsGzippedContent(data []byte) bool {
return data[0] == 31 && data[1] == 139 return data[0] == 31 && data[1] == 139
} }
/*
var zstdEncoder, _ = zstd.NewWriter(nil)
func ZstdData(input []byte) ([]byte, error) {
return zstdEncoder.EncodeAll(input, nil), nil
}
var decoder, _ = zstd.NewReader(nil)
func unzstdData(input []byte) ([]byte, error) {
return decoder.DecodeAll(input, nil)
}
func IsZstdContent(data []byte) bool { func IsZstdContent(data []byte) bool {
if len(data) < 4 { if len(data) < 4 {
return false return false
} }
return data[3] == 0xFD && data[2] == 0x2F && data[1] == 0xB5 && data[0] == 0x28 return data[3] == 0xFD && data[2] == 0x2F && data[1] == 0xB5 && data[0] == 0x28
} }
*/
/* /*
* Default not to compressed since compression can be done on client side. * Default not to compressed since compression can be done on client side.