mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-01-19 06:53:32 +08:00
Merge pull request #1299 from song-zhang/master
use async write to persistent file to disk - part1
This commit is contained in:
commit
9016fa19ba
55
weed/storage/needle/async_request.go
Normal file
55
weed/storage/needle/async_request.go
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
package needle
|
||||||
|
|
||||||
|
type AsyncRequest struct {
|
||||||
|
N *Needle
|
||||||
|
IsWriteRequest bool
|
||||||
|
ActualSize int64
|
||||||
|
Fsync bool
|
||||||
|
offset uint64
|
||||||
|
size uint64
|
||||||
|
doneChan chan interface{}
|
||||||
|
isUnchanged bool
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewAsyncRequest(n *Needle, isWriteRequest bool, fsync bool) *AsyncRequest {
|
||||||
|
return &AsyncRequest{
|
||||||
|
offset: 0,
|
||||||
|
size: 0,
|
||||||
|
ActualSize: 0,
|
||||||
|
doneChan: make(chan interface{}),
|
||||||
|
N: n,
|
||||||
|
isUnchanged: false,
|
||||||
|
IsWriteRequest: isWriteRequest,
|
||||||
|
Fsync: fsync,
|
||||||
|
err: nil,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsyncRequest) WaitComplete() (uint64, uint64, bool, error) {
|
||||||
|
<-r.doneChan
|
||||||
|
return r.offset, r.size, r.isUnchanged, r.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsyncRequest) Complete(offset uint64, size uint64, isUnchanged bool, err error) {
|
||||||
|
r.offset = offset
|
||||||
|
r.size = size
|
||||||
|
r.isUnchanged = isUnchanged
|
||||||
|
r.err = err
|
||||||
|
close(r.doneChan)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsyncRequest) UpdateResult(offset uint64, size uint64, isUnchanged bool, err error) {
|
||||||
|
r.offset = offset
|
||||||
|
r.size = size
|
||||||
|
r.isUnchanged = isUnchanged
|
||||||
|
r.err = err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsyncRequest) Submit() {
|
||||||
|
close(r.doneChan)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsyncRequest) IsSucceed() bool {
|
||||||
|
return r.err == nil
|
||||||
|
}
|
@ -264,12 +264,7 @@ func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, fsync boo
|
|||||||
err = fmt.Errorf("volume %d is read only", i)
|
err = fmt.Errorf("volume %d is read only", i)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// using len(n.Data) here instead of n.Size before n.Size is populated in v.writeNeedle(n)
|
_, _, isUnchanged, err = v.writeNeedle2(n, fsync)
|
||||||
if MaxPossibleVolumeSize >= v.ContentSize()+uint64(needle.GetActualSize(uint32(len(n.Data)), v.Version())) {
|
|
||||||
_, _, isUnchanged, err = v.writeNeedle(n, fsync)
|
|
||||||
} else {
|
|
||||||
err = fmt.Errorf("volume size limit %d exceeded! current size is %d", s.GetVolumeSizeLimit(), v.ContentSize())
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
glog.V(0).Infoln("volume", i, "not found!")
|
glog.V(0).Infoln("volume", i, "not found!")
|
||||||
@ -282,11 +277,7 @@ func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (uint32,
|
|||||||
if v.noWriteOrDelete {
|
if v.noWriteOrDelete {
|
||||||
return 0, fmt.Errorf("volume %d is read only", i)
|
return 0, fmt.Errorf("volume %d is read only", i)
|
||||||
}
|
}
|
||||||
if MaxPossibleVolumeSize >= v.ContentSize()+uint64(needle.GetActualSize(0, v.Version())) {
|
return v.deleteNeedle2(n)
|
||||||
return v.deleteNeedle(n)
|
|
||||||
} else {
|
|
||||||
return 0, fmt.Errorf("volume size limit %d exceeded! current size is %d", s.GetVolumeSizeLimit(), v.ContentSize())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
|
return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ type Volume struct {
|
|||||||
super_block.SuperBlock
|
super_block.SuperBlock
|
||||||
|
|
||||||
dataFileAccessLock sync.RWMutex
|
dataFileAccessLock sync.RWMutex
|
||||||
|
asyncRequestsChan chan *needle.AsyncRequest
|
||||||
lastModifiedTsSeconds uint64 //unix time in seconds
|
lastModifiedTsSeconds uint64 //unix time in seconds
|
||||||
lastAppendAtNs uint64 //unix time in nanoseconds
|
lastAppendAtNs uint64 //unix time in nanoseconds
|
||||||
|
|
||||||
@ -46,12 +47,15 @@ type Volume struct {
|
|||||||
|
|
||||||
func NewVolume(dirname string, collection string, id needle.VolumeId, needleMapKind NeedleMapType, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32) (v *Volume, e error) {
|
func NewVolume(dirname string, collection string, id needle.VolumeId, needleMapKind NeedleMapType, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32) (v *Volume, e error) {
|
||||||
// if replicaPlacement is nil, the superblock will be loaded from disk
|
// if replicaPlacement is nil, the superblock will be loaded from disk
|
||||||
v = &Volume{dir: dirname, Collection: collection, Id: id, MemoryMapMaxSizeMb: memoryMapMaxSizeMb}
|
v = &Volume{dir: dirname, Collection: collection, Id: id, MemoryMapMaxSizeMb: memoryMapMaxSizeMb,
|
||||||
|
asyncRequestsChan: make(chan *needle.AsyncRequest, 128)}
|
||||||
v.SuperBlock = super_block.SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl}
|
v.SuperBlock = super_block.SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl}
|
||||||
v.needleMapKind = needleMapKind
|
v.needleMapKind = needleMapKind
|
||||||
e = v.load(true, true, needleMapKind, preallocate)
|
e = v.load(true, true, needleMapKind, preallocate)
|
||||||
|
v.startWorker()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *Volume) String() string {
|
func (v *Volume) String() string {
|
||||||
return fmt.Sprintf("Id:%v, dir:%s, Collection:%s, dataFile:%v, nm:%v, noWrite:%v canDelete:%v", v.Id, v.dir, v.Collection, v.DataBackend, v.nm, v.noWriteOrDelete || v.noWriteCanDelete, v.noWriteCanDelete)
|
return fmt.Sprintf("Id:%v, dir:%s, Collection:%s, dataFile:%v, nm:%v, noWrite:%v canDelete:%v", v.Id, v.dir, v.Collection, v.DataBackend, v.nm, v.noWriteOrDelete || v.noWriteCanDelete, v.noWriteCanDelete)
|
||||||
}
|
}
|
||||||
@ -65,6 +69,7 @@ func VolumeFileName(dir string, collection string, id int) (fileName string) {
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *Volume) FileName() (fileName string) {
|
func (v *Volume) FileName() (fileName string) {
|
||||||
return VolumeFileName(v.dir, v.Collection, int(v.Id))
|
return VolumeFileName(v.dir, v.Collection, int(v.Id))
|
||||||
}
|
}
|
||||||
|
@ -46,6 +46,7 @@ func (v *Volume) Destroy() (err error) {
|
|||||||
err = fmt.Errorf("volume %d is compacting", v.Id)
|
err = fmt.Errorf("volume %d is compacting", v.Id)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
close(v.asyncRequestsChan)
|
||||||
storageName, storageKey := v.RemoteStorageNameKey()
|
storageName, storageKey := v.RemoteStorageNameKey()
|
||||||
if v.HasRemoteFile() && storageName != "" && storageKey != "" {
|
if v.HasRemoteFile() && storageName != "" && storageKey != "" {
|
||||||
if backendStorage, found := backend.BackendStorages[storageName]; found {
|
if backendStorage, found := backend.BackendStorages[storageName]; found {
|
||||||
@ -63,7 +64,11 @@ func (v *Volume) Destroy() (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *Volume) writeNeedle(n *needle.Needle, fsync bool) (offset uint64, size uint32, isUnchanged bool, err error) {
|
func (v *Volume) asyncRequestAppend(request *needle.AsyncRequest) {
|
||||||
|
v.asyncRequestsChan <- request
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *Volume) writeNeedleDeprecated(n *needle.Needle, fsync bool) (offset uint64, size uint32, isUnchanged bool, err error) {
|
||||||
// glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
// glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
||||||
v.dataFileAccessLock.Lock()
|
v.dataFileAccessLock.Lock()
|
||||||
defer v.dataFileAccessLock.Unlock()
|
defer v.dataFileAccessLock.Unlock()
|
||||||
@ -117,7 +122,66 @@ func (v *Volume) writeNeedle(n *needle.Needle, fsync bool) (offset uint64, size
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *Volume) deleteNeedle(n *needle.Needle) (uint32, error) {
|
func (v *Volume) writeNeedle2(n *needle.Needle, fsync bool) (offset uint64, size uint32, isUnchanged bool, err error) {
|
||||||
|
// glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
||||||
|
if n.Ttl == needle.EMPTY_TTL && v.Ttl != needle.EMPTY_TTL {
|
||||||
|
n.SetHasTtl()
|
||||||
|
n.Ttl = v.Ttl
|
||||||
|
}
|
||||||
|
|
||||||
|
asyncRequest := needle.NewAsyncRequest(n, true, fsync)
|
||||||
|
// using len(n.Data) here instead of n.Size before n.Size is populated in n.Append()
|
||||||
|
asyncRequest.ActualSize = needle.GetActualSize(uint32(len(n.Data)), v.Version())
|
||||||
|
|
||||||
|
v.asyncRequestAppend(asyncRequest)
|
||||||
|
offset, _, isUnchanged, err = asyncRequest.WaitComplete()
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *Volume) doWriteRequest(n *needle.Needle) (offset uint64, size uint32, isUnchanged bool, err error) {
|
||||||
|
// glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
||||||
|
if v.isFileUnchanged(n) {
|
||||||
|
size = n.DataSize
|
||||||
|
isUnchanged = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// check whether existing needle cookie matches
|
||||||
|
nv, ok := v.nm.Get(n.Id)
|
||||||
|
if ok {
|
||||||
|
existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToAcutalOffset())
|
||||||
|
if existingNeedleReadErr != nil {
|
||||||
|
err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if existingNeedle.Cookie != n.Cookie {
|
||||||
|
glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie)
|
||||||
|
err = fmt.Errorf("mismatching cookie %x", n.Cookie)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// append to dat file
|
||||||
|
n.AppendAtNs = uint64(time.Now().UnixNano())
|
||||||
|
if offset, size, _, err = n.Append(v.DataBackend, v.Version()); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
v.lastAppendAtNs = n.AppendAtNs
|
||||||
|
|
||||||
|
// add to needle map
|
||||||
|
if !ok || uint64(nv.Offset.ToAcutalOffset()) < offset {
|
||||||
|
if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil {
|
||||||
|
glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if v.lastModifiedTsSeconds < n.LastModified {
|
||||||
|
v.lastModifiedTsSeconds = n.LastModified
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *Volume) deleteNeedleDeprecated(n *needle.Needle) (uint32, error) {
|
||||||
glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
||||||
v.dataFileAccessLock.Lock()
|
v.dataFileAccessLock.Lock()
|
||||||
defer v.dataFileAccessLock.Unlock()
|
defer v.dataFileAccessLock.Unlock()
|
||||||
@ -140,6 +204,37 @@ func (v *Volume) deleteNeedle(n *needle.Needle) (uint32, error) {
|
|||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (v *Volume) deleteNeedle2(n *needle.Needle) (uint32, error) {
|
||||||
|
asyncRequest := needle.NewAsyncRequest(n, false, false)
|
||||||
|
asyncRequest.ActualSize = needle.GetActualSize(0, v.Version())
|
||||||
|
|
||||||
|
v.asyncRequestAppend(asyncRequest)
|
||||||
|
_, size, _, err := asyncRequest.WaitComplete()
|
||||||
|
|
||||||
|
return uint32(size), err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v *Volume) doDeleteRequest(n *needle.Needle) (uint32, error) {
|
||||||
|
glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
|
||||||
|
nv, ok := v.nm.Get(n.Id)
|
||||||
|
//fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
|
||||||
|
if ok && nv.Size != TombstoneFileSize {
|
||||||
|
size := nv.Size
|
||||||
|
n.Data = nil
|
||||||
|
n.AppendAtNs = uint64(time.Now().UnixNano())
|
||||||
|
offset, _, _, err := n.Append(v.DataBackend, v.Version())
|
||||||
|
if err != nil {
|
||||||
|
return size, err
|
||||||
|
}
|
||||||
|
v.lastAppendAtNs = n.AppendAtNs
|
||||||
|
if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil {
|
||||||
|
return size, err
|
||||||
|
}
|
||||||
|
return size, err
|
||||||
|
}
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
// read fills in Needle content by looking up n.Id from NeedleMapper
|
// read fills in Needle content by looking up n.Id from NeedleMapper
|
||||||
func (v *Volume) readNeedle(n *needle.Needle) (int, error) {
|
func (v *Volume) readNeedle(n *needle.Needle) (int, error) {
|
||||||
v.dataFileAccessLock.RLock()
|
v.dataFileAccessLock.RLock()
|
||||||
@ -176,6 +271,87 @@ func (v *Volume) readNeedle(n *needle.Needle) (int, error) {
|
|||||||
return -1, ErrorNotFound
|
return -1, ErrorNotFound
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (v *Volume) startWorker() {
|
||||||
|
go func() {
|
||||||
|
chanClosed := false
|
||||||
|
for {
|
||||||
|
// chan closed. go thread will exit
|
||||||
|
if chanClosed {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fsync := false
|
||||||
|
currentRequests := make([]*needle.AsyncRequest, 0, 128)
|
||||||
|
currentBytesToWrite := int64(0)
|
||||||
|
for {
|
||||||
|
request, ok := <-v.asyncRequestsChan
|
||||||
|
//volume may be closed
|
||||||
|
if !ok {
|
||||||
|
chanClosed = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if MaxPossibleVolumeSize < v.ContentSize()+uint64(currentBytesToWrite+request.ActualSize) {
|
||||||
|
request.Complete(0, 0, false,
|
||||||
|
fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.ContentSize()))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
currentRequests = append(currentRequests, request)
|
||||||
|
currentBytesToWrite += request.ActualSize
|
||||||
|
if request.Fsync {
|
||||||
|
fsync = true
|
||||||
|
}
|
||||||
|
// submit at most 4M bytes or 128 requests at one time to decrease request delay.
|
||||||
|
// it also need to break if there is no data in channel to avoid io hang.
|
||||||
|
if currentBytesToWrite >= 4*1024*1024 || len(currentRequests) >= 128 || len(v.asyncRequestsChan) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(currentRequests) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.dataFileAccessLock.Lock()
|
||||||
|
end, _, e := v.DataBackend.GetStat()
|
||||||
|
if e != nil {
|
||||||
|
for i := 0; i < len(currentRequests); i++ {
|
||||||
|
currentRequests[i].Complete(0, 0, false,
|
||||||
|
fmt.Errorf("cannot read current volume position: %v", e))
|
||||||
|
}
|
||||||
|
v.dataFileAccessLock.Unlock()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(currentRequests); i++ {
|
||||||
|
if currentRequests[i].IsWriteRequest {
|
||||||
|
offset, size, isUnchanged, err := v.doWriteRequest(currentRequests[i].N)
|
||||||
|
currentRequests[i].UpdateResult(offset, uint64(size), isUnchanged, err)
|
||||||
|
} else {
|
||||||
|
size, err := v.doDeleteRequest(currentRequests[i].N)
|
||||||
|
currentRequests[i].UpdateResult(0, uint64(size), false, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if fsync {
|
||||||
|
// if sync error, data is not reliable, we should mark the completed request as fail and rollback
|
||||||
|
if err := v.DataBackend.Sync(); err != nil {
|
||||||
|
// todo: this may generate dirty data or cause data inconsistent, may be weed need to panic?
|
||||||
|
if te := v.DataBackend.Truncate(end); te != nil {
|
||||||
|
glog.V(0).Infof("Failed to truncate %s back to %d with error: %v", v.DataBackend.Name(), end, te)
|
||||||
|
}
|
||||||
|
for i := 0; i < len(currentRequests); i++ {
|
||||||
|
if currentRequests[i].IsSucceed() {
|
||||||
|
currentRequests[i].UpdateResult(0, 0, false, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(currentRequests); i++ {
|
||||||
|
currentRequests[i].Submit()
|
||||||
|
}
|
||||||
|
v.dataFileAccessLock.Unlock()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
type VolumeFileScanner interface {
|
type VolumeFileScanner interface {
|
||||||
VisitSuperBlock(super_block.SuperBlock) error
|
VisitSuperBlock(super_block.SuperBlock) error
|
||||||
ReadNeedleBody() bool
|
ReadNeedleBody() bool
|
||||||
|
Loading…
Reference in New Issue
Block a user