Merge pull request #12 from chrislusf/master

sync
This commit is contained in:
hilimd 2020-08-20 19:18:23 +08:00 committed by GitHub
commit b0d6330cf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
152 changed files with 7589 additions and 1592 deletions

View File

@ -1,4 +1,4 @@
apiVersion: v1
description: SeaweedFS
name: seaweedfs
version: 1.88
version: 1.90

View File

@ -4,7 +4,7 @@ global:
registry: ""
repository: ""
imageName: chrislusf/seaweedfs
imageTag: "1.88"
imageTag: "1.90"
imagePullPolicy: IfNotPresent
imagePullSecrets: imagepullsecret
restartPolicy: Always

View File

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.4.5</version>
<version>1.4.6</version>
<parent>
<groupId>org.sonatype.oss</groupId>

View File

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.4.5</version>
<version>1.4.6</version>
<parent>
<groupId>org.sonatype.oss</groupId>

View File

@ -5,7 +5,7 @@
<groupId>com.github.chrislusf</groupId>
<artifactId>seaweedfs-client</artifactId>
<version>1.4.5</version>
<version>1.4.6</version>
<parent>
<groupId>org.sonatype.oss</groupId>

View File

@ -23,7 +23,7 @@ public class SeaweedRead {
// returns bytesRead
public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals,
final long position, final byte[] buffer, final int bufferOffset,
final int bufferLength) throws IOException {
final int bufferLength, final long fileSize) throws IOException {
List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, bufferLength);
@ -42,6 +42,14 @@ public class SeaweedRead {
long readCount = 0;
int startOffset = bufferOffset;
for (ChunkView chunkView : chunkViews) {
if (startOffset < chunkView.logicOffset) {
long gap = chunkView.logicOffset - startOffset;
LOG.debug("zero [{},{})", startOffset, startOffset + gap);
readCount += gap;
startOffset += gap;
}
FilerProto.Locations locations = vid2Locations.get(parseVolumeId(chunkView.fileId));
if (locations == null || locations.getLocationsCount() == 0) {
LOG.error("failed to locate {}", chunkView.fileId);
@ -51,11 +59,22 @@ public class SeaweedRead {
int len = readChunkView(position, buffer, startOffset, chunkView, locations);
LOG.debug("read [{},{}) {} size {}", startOffset, startOffset + len, chunkView.fileId, chunkView.size);
readCount += len;
startOffset += len;
}
long limit = Math.min(bufferLength, fileSize);
if (startOffset < limit) {
long gap = limit - startOffset;
LOG.debug("zero2 [{},{})", startOffset, startOffset + gap);
readCount += gap;
startOffset += gap;
}
return readCount;
}
@ -71,7 +90,7 @@ public class SeaweedRead {
int len = (int) chunkView.size;
LOG.debug("readChunkView fid:{} chunkData.length:{} chunkView.offset:{} buffer.length:{} startOffset:{} len:{}",
chunkView.fileId, chunkData.length, chunkView.offset, buffer.length, startOffset, len);
System.arraycopy(chunkData, (int) chunkView.offset, buffer, startOffset, len);
System.arraycopy(chunkData, startOffset - (int) (chunkView.logicOffset - chunkView.offset), buffer, startOffset, len);
return len;
}
@ -93,7 +112,7 @@ public class SeaweedRead {
Header contentEncodingHeader = entity.getContentEncoding();
if (contentEncodingHeader != null) {
HeaderElement[] encodings =contentEncodingHeader.getElements();
HeaderElement[] encodings = contentEncodingHeader.getElements();
for (int i = 0; i < encodings.length; i++) {
if (encodings[i].getName().equalsIgnoreCase("gzip")) {
entity = new GzipDecompressingEntity(entity);
@ -134,18 +153,19 @@ public class SeaweedRead {
long stop = offset + size;
for (VisibleInterval chunk : visibleIntervals) {
if (chunk.start <= offset && offset < chunk.stop && offset < stop) {
long chunkStart = Math.max(offset, chunk.start);
long chunkStop = Math.min(stop, chunk.stop);
if (chunkStart < chunkStop) {
boolean isFullChunk = chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop;
views.add(new ChunkView(
chunk.fileId,
offset - chunk.start,
Math.min(chunk.stop, stop) - offset,
offset,
chunkStart - chunk.start + chunk.chunkOffset,
chunkStop - chunkStart,
chunkStart,
isFullChunk,
chunk.cipherKey,
chunk.isCompressed
));
offset = Math.min(chunk.stop, stop);
}
}
return views;
@ -160,7 +180,13 @@ public class SeaweedRead {
Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() {
@Override
public int compare(FilerProto.FileChunk a, FilerProto.FileChunk b) {
return (int) (a.getMtime() - b.getMtime());
// if just a.getMtime() - b.getMtime(), it will overflow!
if (a.getMtime() < b.getMtime()) {
return -1;
} else if (a.getMtime() > b.getMtime()) {
return 1;
}
return 0;
}
});
@ -181,6 +207,7 @@ public class SeaweedRead {
chunk.getOffset() + chunk.getSize(),
chunk.getFileId(),
chunk.getMtime(),
0,
true,
chunk.getCipherKey().toByteArray(),
chunk.getIsCompressed()
@ -203,6 +230,7 @@ public class SeaweedRead {
chunk.getOffset(),
v.fileId,
v.modifiedTime,
v.chunkOffset,
false,
v.cipherKey,
v.isCompressed
@ -215,6 +243,7 @@ public class SeaweedRead {
v.stop,
v.fileId,
v.modifiedTime,
v.chunkOffset + (chunkStop - v.start),
false,
v.cipherKey,
v.isCompressed
@ -247,6 +276,10 @@ public class SeaweedRead {
return fileId;
}
public static long fileSize(FilerProto.Entry entry) {
return Math.max(totalSize(entry.getChunksList()), entry.getAttributes().getFileSize());
}
public static long totalSize(List<FilerProto.FileChunk> chunksList) {
long size = 0;
for (FilerProto.FileChunk chunk : chunksList) {
@ -263,15 +296,17 @@ public class SeaweedRead {
public final long stop;
public final long modifiedTime;
public final String fileId;
public final long chunkOffset;
public final boolean isFullChunk;
public final byte[] cipherKey;
public final boolean isCompressed;
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
this.start = start;
this.stop = stop;
this.modifiedTime = modifiedTime;
this.fileId = fileId;
this.chunkOffset = chunkOffset;
this.isFullChunk = isFullChunk;
this.cipherKey = cipherKey;
this.isCompressed = isCompressed;

View File

@ -301,7 +301,7 @@
</snapshotRepository>
</distributionManagement>
<properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version>
</properties>
</project>

View File

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>2.9.2</hadoop.version>
</properties>

View File

@ -124,7 +124,7 @@ public class SeaweedFileSystemStore {
private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) {
FilerProto.FuseAttributes attributes = entry.getAttributes();
long length = SeaweedRead.totalSize(entry.getChunksList());
long length = SeaweedRead.fileSize(entry);
boolean isDir = entry.getIsDirectory();
int block_replication = 1;
int blocksize = 512;
@ -185,7 +185,7 @@ public class SeaweedFileSystemStore {
entry.mergeFrom(existingEntry);
entry.getAttributesBuilder().setMtime(now);
LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry);
writePosition = SeaweedRead.totalSize(existingEntry.getChunksList());
writePosition = SeaweedRead.fileSize(existingEntry);
replication = existingEntry.getAttributes().getReplication();
}
}

View File

@ -41,7 +41,7 @@ public class SeaweedInputStream extends FSInputStream {
this.statistics = statistics;
this.path = path;
this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.contentLength = SeaweedRead.fileSize(entry);
this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
@ -87,7 +87,7 @@ public class SeaweedInputStream extends FSInputStream {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
}
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len, SeaweedRead.fileSize(entry));
if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length");
}

View File

@ -120,6 +120,188 @@
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-hdfs-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-api</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-yarn-client</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>commons-cli</artifactId>
<groupId>commons-cli</groupId>
</exclusion>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-io</artifactId>
<groupId>commons-io</groupId>
</exclusion>
<exclusion>
<artifactId>commons-net</artifactId>
<groupId>commons-net</groupId>
</exclusion>
<exclusion>
<artifactId>commons-collections</artifactId>
<groupId>commons-collections</groupId>
</exclusion>
<exclusion>
<artifactId>javax.servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-server</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-util</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-servlet</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jetty-webapp</artifactId>
<groupId>org.eclipse.jetty</groupId>
</exclusion>
<exclusion>
<artifactId>jsp-api</artifactId>
<groupId>javax.servlet.jsp</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-core</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-servlet</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-json</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-server</artifactId>
<groupId>com.sun.jersey</groupId>
</exclusion>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang</artifactId>
<groupId>commons-lang</groupId>
</exclusion>
<exclusion>
<artifactId>commons-beanutils</artifactId>
<groupId>commons-beanutils</groupId>
</exclusion>
<exclusion>
<artifactId>commons-configuration2</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>avro</artifactId>
<groupId>org.apache.avro</groupId>
</exclusion>
<exclusion>
<artifactId>re2j</artifactId>
<groupId>com.google.re2j</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-auth</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>jsch</artifactId>
<groupId>com.jcraft</groupId>
</exclusion>
<exclusion>
<artifactId>curator-client</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>curator-recipes</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<artifactId>htrace-core4</artifactId>
<groupId>org.apache.htrace</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>kerb-simplekdc</artifactId>
<groupId>org.apache.kerby</groupId>
</exclusion>
<exclusion>
<artifactId>jackson-databind</artifactId>
<groupId>com.fasterxml.jackson.core</groupId>
</exclusion>
<exclusion>
<artifactId>stax2-api</artifactId>
<groupId>org.codehaus.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>woodstox-core</artifactId>
<groupId>com.fasterxml.woodstox</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-annotations</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
@ -127,7 +309,7 @@
</snapshotRepository>
</distributionManagement>
<properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version>
</properties>
</project>

View File

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<properties>
<seaweedfs.client.version>1.4.5</seaweedfs.client.version>
<seaweedfs.client.version>1.4.6</seaweedfs.client.version>
<hadoop.version>3.1.1</hadoop.version>
</properties>

View File

@ -124,7 +124,7 @@ public class SeaweedFileSystemStore {
private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) {
FilerProto.FuseAttributes attributes = entry.getAttributes();
long length = SeaweedRead.totalSize(entry.getChunksList());
long length = SeaweedRead.fileSize(entry);
boolean isDir = entry.getIsDirectory();
int block_replication = 1;
int blocksize = 512;
@ -185,7 +185,7 @@ public class SeaweedFileSystemStore {
entry.mergeFrom(existingEntry);
entry.getAttributesBuilder().setMtime(now);
LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry);
writePosition = SeaweedRead.totalSize(existingEntry.getChunksList());
writePosition = SeaweedRead.fileSize(existingEntry);
replication = existingEntry.getAttributes().getReplication();
}
}

View File

@ -41,7 +41,7 @@ public class SeaweedInputStream extends FSInputStream {
this.statistics = statistics;
this.path = path;
this.entry = entry;
this.contentLength = SeaweedRead.totalSize(entry.getChunksList());
this.contentLength = SeaweedRead.fileSize(entry);
this.bufferSize = bufferSize;
this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerGrpcClient, entry.getChunksList());
@ -87,7 +87,7 @@ public class SeaweedInputStream extends FSInputStream {
throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer");
}
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len);
long bytesRead = SeaweedRead.read(this.filerGrpcClient, this.visibleIntervalList, this.position, b, off, len, SeaweedRead.fileSize(entry));
if (bytesRead > Integer.MAX_VALUE) {
throw new IOException("Unexpected Content-Length");
}

View File

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.seaweedfs.test</groupId>
<artifactId>random_access</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<properties>
<guava.version>28.0-jre</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.esotericsoftware.kryo</groupId>
<artifactId>kryo</artifactId>
<version>2.24.0</version>
</dependency>
</dependencies>
<build>
<extensions>
<extension>
<groupId>kr.motd.maven</groupId>
<artifactId>os-maven-plugin</artifactId>
<version>1.6.2</version>
</extension>
</extensions>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,753 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.collect.ImmutableSet;
import seaweedfs.client.btree.serialize.Serializer;
import seaweedfs.client.btree.serialize.kryo.KryoBackedDecoder;
import seaweedfs.client.btree.serialize.kryo.KryoBackedEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
// todo - stream serialised value to file
// todo - handle hash collisions (properly, this time)
// todo - don't store null links to child blocks in leaf index blocks
// todo - align block boundaries
// todo - thread safety control
// todo - merge small values into a single data block
// todo - discard when file corrupt
// todo - include data directly in index entry when serializer can guarantee small fixed sized data
// todo - free list leaks disk space
// todo - merge adjacent free blocks
// todo - use more efficient lookup for free block with nearest size
@SuppressWarnings("unchecked")
public class BTreePersistentIndexedCache<K, V> {
private static final Logger LOGGER = LoggerFactory.getLogger(BTreePersistentIndexedCache.class);
private final File cacheFile;
private final KeyHasher<K> keyHasher;
private final Serializer<V> serializer;
private final short maxChildIndexEntries;
private final int minIndexChildNodes;
private final StateCheckBlockStore store;
private HeaderBlock header;
public BTreePersistentIndexedCache(File cacheFile, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
this(cacheFile, keySerializer, valueSerializer, (short) 512, 512);
}
public BTreePersistentIndexedCache(File cacheFile, Serializer<K> keySerializer, Serializer<V> valueSerializer,
short maxChildIndexEntries, int maxFreeListEntries) {
this.cacheFile = cacheFile;
this.keyHasher = new KeyHasher<K>(keySerializer);
this.serializer = valueSerializer;
this.maxChildIndexEntries = maxChildIndexEntries;
this.minIndexChildNodes = maxChildIndexEntries / 2;
BlockStore cachingStore = new CachingBlockStore(new FileBackedBlockStore(cacheFile), ImmutableSet.of(IndexBlock.class, FreeListBlockStore.FreeListBlock.class));
this.store = new StateCheckBlockStore(new FreeListBlockStore(cachingStore, maxFreeListEntries));
try {
open();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not open %s.", this), e);
}
}
@Override
public String toString() {
return "cache " + cacheFile.getName() + " (" + cacheFile + ")";
}
private void open() throws Exception {
LOGGER.debug("Opening {}", this);
try {
doOpen();
} catch (CorruptedCacheException e) {
rebuild();
}
}
private void doOpen() throws Exception {
BlockStore.Factory factory = new BlockStore.Factory() {
@Override
public Object create(Class<? extends BlockPayload> type) {
if (type == HeaderBlock.class) {
return new HeaderBlock();
}
if (type == IndexBlock.class) {
return new IndexBlock();
}
if (type == DataBlock.class) {
return new DataBlock();
}
throw new UnsupportedOperationException();
}
};
Runnable initAction = new Runnable() {
@Override
public void run() {
header = new HeaderBlock();
store.write(header);
header.index.newRoot();
store.flush();
}
};
store.open(initAction, factory);
header = store.readFirst(HeaderBlock.class);
}
public V get(K key) {
try {
try {
DataBlock block = header.getRoot().get(key);
if (block != null) {
return block.getValue();
}
return null;
} catch (CorruptedCacheException e) {
rebuild();
return null;
}
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not read entry '%s' from %s.", key, this), e);
}
}
public void put(K key, V value) {
try {
long hashCode = keyHasher.getHashCode(key);
Lookup lookup = header.getRoot().find(hashCode);
DataBlock newBlock = null;
if (lookup.entry != null) {
DataBlock block = store.read(lookup.entry.dataBlock, DataBlock.class);
DataBlockUpdateResult updateResult = block.useNewValue(value);
if (updateResult.isFailed()) {
store.remove(block);
newBlock = new DataBlock(value, updateResult.getSerializedValue());
}
} else {
newBlock = new DataBlock(value);
}
if (newBlock != null) {
store.write(newBlock);
lookup.indexBlock.put(hashCode, newBlock.getPos());
}
store.flush();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not add entry '%s' to %s.", key, this), e);
}
}
public void remove(K key) {
try {
Lookup lookup = header.getRoot().find(key);
if (lookup.entry == null) {
return;
}
lookup.indexBlock.remove(lookup.entry);
DataBlock block = store.read(lookup.entry.dataBlock, DataBlock.class);
store.remove(block);
store.flush();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Could not remove entry '%s' from %s.", key, this), e);
}
}
private IndexBlock load(BlockPointer pos, IndexRoot root, IndexBlock parent, int index) {
IndexBlock block = store.read(pos, IndexBlock.class);
block.root = root;
block.parent = parent;
block.parentEntryIndex = index;
return block;
}
public void reset() {
close();
try {
open();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
public void close() {
LOGGER.debug("Closing {}", this);
try {
store.close();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
public boolean isOpen() {
return store.isOpen();
}
private void rebuild() {
LOGGER.warn("{} is corrupt. Discarding.", this);
try {
clear();
} catch (Exception e) {
LOGGER.warn("{} couldn't be rebuilt. Closing.", this);
close();
}
}
public void verify() {
try {
doVerify();
} catch (Exception e) {
throw new UncheckedIOException(String.format("Some problems were found when checking the integrity of %s.",
this), e);
}
}
private void doVerify() throws Exception {
List<BlockPayload> blocks = new ArrayList<BlockPayload>();
HeaderBlock header = store.readFirst(HeaderBlock.class);
blocks.add(header);
verifyTree(header.getRoot(), "", blocks, Long.MAX_VALUE, true);
Collections.sort(blocks, new Comparator<BlockPayload>() {
@Override
public int compare(BlockPayload block, BlockPayload block1) {
return block.getPos().compareTo(block1.getPos());
}
});
for (int i = 0; i < blocks.size() - 1; i++) {
Block b1 = blocks.get(i).getBlock();
Block b2 = blocks.get(i + 1).getBlock();
if (b1.getPos().getPos() + b1.getSize() > b2.getPos().getPos()) {
throw new IOException(String.format("%s overlaps with %s", b1, b2));
}
}
}
private void verifyTree(IndexBlock current, String prefix, Collection<BlockPayload> blocks, long maxValue,
boolean loadData) throws Exception {
blocks.add(current);
if (!prefix.equals("") && current.entries.size() < maxChildIndexEntries / 2) {
throw new IOException(String.format("Too few entries found in %s", current));
}
if (current.entries.size() > maxChildIndexEntries) {
throw new IOException(String.format("Too many entries found in %s", current));
}
boolean isLeaf = current.entries.size() == 0 || current.entries.get(0).childIndexBlock.isNull();
if (isLeaf ^ current.tailPos.isNull()) {
throw new IOException(String.format("Mismatched leaf/tail-node in %s", current));
}
long min = Long.MIN_VALUE;
for (IndexEntry entry : current.entries) {
if (isLeaf ^ entry.childIndexBlock.isNull()) {
throw new IOException(String.format("Mismatched leaf/non-leaf entry in %s", current));
}
if (entry.hashCode >= maxValue || entry.hashCode <= min) {
throw new IOException(String.format("Out-of-order key in %s", current));
}
min = entry.hashCode;
if (!entry.childIndexBlock.isNull()) {
IndexBlock child = store.read(entry.childIndexBlock, IndexBlock.class);
verifyTree(child, " " + prefix, blocks, entry.hashCode, loadData);
}
if (loadData) {
DataBlock block = store.read(entry.dataBlock, DataBlock.class);
blocks.add(block);
}
}
if (!current.tailPos.isNull()) {
IndexBlock tail = store.read(current.tailPos, IndexBlock.class);
verifyTree(tail, " " + prefix, blocks, maxValue, loadData);
}
}
public void clear() {
store.clear();
close();
try {
doOpen();
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
private class IndexRoot {
private BlockPointer rootPos = BlockPointer.start();
private HeaderBlock owner;
private IndexRoot(HeaderBlock owner) {
this.owner = owner;
}
public void setRootPos(BlockPointer rootPos) {
this.rootPos = rootPos;
store.write(owner);
}
public IndexBlock getRoot() {
return load(rootPos, this, null, 0);
}
public IndexBlock newRoot() {
IndexBlock block = new IndexBlock();
store.write(block);
setRootPos(block.getPos());
return block;
}
}
private class HeaderBlock extends BlockPayload {
private IndexRoot index;
private HeaderBlock() {
index = new IndexRoot(this);
}
@Override
protected byte getType() {
return 0x55;
}
@Override
protected int getSize() {
return Block.LONG_SIZE + Block.SHORT_SIZE;
}
@Override
protected void read(DataInputStream instr) throws Exception {
index.rootPos = BlockPointer.pos(instr.readLong());
short actualChildIndexEntries = instr.readShort();
if (actualChildIndexEntries != maxChildIndexEntries) {
throw blockCorruptedException();
}
}
@Override
protected void write(DataOutputStream outstr) throws Exception {
outstr.writeLong(index.rootPos.getPos());
outstr.writeShort(maxChildIndexEntries);
}
public IndexBlock getRoot() throws Exception {
return index.getRoot();
}
}
private class IndexBlock extends BlockPayload {
private final List<IndexEntry> entries = new ArrayList<IndexEntry>();
private BlockPointer tailPos = BlockPointer.start();
// Transient fields
private IndexBlock parent;
private int parentEntryIndex;
private IndexRoot root;
@Override
protected byte getType() {
return 0x77;
}
@Override
protected int getSize() {
return Block.INT_SIZE + Block.LONG_SIZE + (3 * Block.LONG_SIZE) * maxChildIndexEntries;
}
@Override
public void read(DataInputStream instr) throws IOException {
int count = instr.readInt();
entries.clear();
for (int i = 0; i < count; i++) {
IndexEntry entry = new IndexEntry();
entry.hashCode = instr.readLong();
entry.dataBlock = BlockPointer.pos(instr.readLong());
entry.childIndexBlock = BlockPointer.pos(instr.readLong());
entries.add(entry);
}
tailPos = BlockPointer.pos(instr.readLong());
}
@Override
public void write(DataOutputStream outstr) throws IOException {
outstr.writeInt(entries.size());
for (IndexEntry entry : entries) {
outstr.writeLong(entry.hashCode);
outstr.writeLong(entry.dataBlock.getPos());
outstr.writeLong(entry.childIndexBlock.getPos());
}
outstr.writeLong(tailPos.getPos());
}
public void put(long hashCode, BlockPointer pos) throws Exception {
int index = Collections.binarySearch(entries, new IndexEntry(hashCode));
IndexEntry entry;
if (index >= 0) {
entry = entries.get(index);
} else {
assert tailPos.isNull();
entry = new IndexEntry();
entry.hashCode = hashCode;
entry.childIndexBlock = BlockPointer.start();
index = -index - 1;
entries.add(index, entry);
}
entry.dataBlock = pos;
store.write(this);
maybeSplit();
}
private void maybeSplit() throws Exception {
if (entries.size() > maxChildIndexEntries) {
int splitPos = entries.size() / 2;
IndexEntry splitEntry = entries.remove(splitPos);
if (parent == null) {
parent = root.newRoot();
}
IndexBlock sibling = new IndexBlock();
store.write(sibling);
List<IndexEntry> siblingEntries = entries.subList(splitPos, entries.size());
sibling.entries.addAll(siblingEntries);
siblingEntries.clear();
sibling.tailPos = tailPos;
tailPos = splitEntry.childIndexBlock;
splitEntry.childIndexBlock = BlockPointer.start();
parent.add(this, splitEntry, sibling);
}
}
private void add(IndexBlock left, IndexEntry entry, IndexBlock right) throws Exception {
int index = left.parentEntryIndex;
if (index < entries.size()) {
IndexEntry parentEntry = entries.get(index);
assert parentEntry.childIndexBlock.equals(left.getPos());
parentEntry.childIndexBlock = right.getPos();
} else {
assert index == entries.size() && (tailPos.isNull() || tailPos.equals(left.getPos()));
tailPos = right.getPos();
}
entries.add(index, entry);
entry.childIndexBlock = left.getPos();
store.write(this);
maybeSplit();
}
public DataBlock get(K key) throws Exception {
Lookup lookup = find(key);
if (lookup.entry == null) {
return null;
}
return store.read(lookup.entry.dataBlock, DataBlock.class);
}
public Lookup find(K key) throws Exception {
long checksum = keyHasher.getHashCode(key);
return find(checksum);
}
private Lookup find(long hashCode) throws Exception {
int index = Collections.binarySearch(entries, new IndexEntry(hashCode));
if (index >= 0) {
return new Lookup(this, entries.get(index));
}
index = -index - 1;
BlockPointer childBlockPos;
if (index == entries.size()) {
childBlockPos = tailPos;
} else {
childBlockPos = entries.get(index).childIndexBlock;
}
if (childBlockPos.isNull()) {
return new Lookup(this, null);
}
IndexBlock childBlock = load(childBlockPos, root, this, index);
return childBlock.find(hashCode);
}
public void remove(IndexEntry entry) throws Exception {
int index = entries.indexOf(entry);
assert index >= 0;
entries.remove(index);
store.write(this);
if (entry.childIndexBlock.isNull()) {
maybeMerge();
} else {
// Not a leaf node. Move up an entry from a leaf node, then possibly merge the leaf node
IndexBlock leafBlock = load(entry.childIndexBlock, root, this, index);
leafBlock = leafBlock.findHighestLeaf();
IndexEntry highestEntry = leafBlock.entries.remove(leafBlock.entries.size() - 1);
highestEntry.childIndexBlock = entry.childIndexBlock;
entries.add(index, highestEntry);
store.write(leafBlock);
leafBlock.maybeMerge();
}
}
private void maybeMerge() throws Exception {
if (parent == null) {
// This is the root block. Can have any number of children <= maxChildIndexEntries
if (entries.size() == 0 && !tailPos.isNull()) {
// This is an empty root block, discard it
header.index.setRootPos(tailPos);
store.remove(this);
}
return;
}
// This is not the root block. Must have children >= minIndexChildNodes
if (entries.size() >= minIndexChildNodes) {
return;
}
// Attempt to merge with the left sibling
IndexBlock left = parent.getPrevious(this);
if (left != null) {
assert entries.size() + left.entries.size() <= maxChildIndexEntries * 2;
if (left.entries.size() > minIndexChildNodes) {
// There are enough entries in this block and the left sibling to make up 2 blocks, so redistribute
// the entries evenly between them
left.mergeFrom(this);
left.maybeSplit();
return;
} else {
// There are only enough entries to make up 1 block, so move the entries of the left sibling into
// this block and discard the left sibling. Might also need to merge the parent
left.mergeFrom(this);
parent.maybeMerge();
return;
}
}
// Attempt to merge with the right sibling
IndexBlock right = parent.getNext(this);
if (right != null) {
assert entries.size() + right.entries.size() <= maxChildIndexEntries * 2;
if (right.entries.size() > minIndexChildNodes) {
// There are enough entries in this block and the right sibling to make up 2 blocks, so redistribute
// the entries evenly between them
mergeFrom(right);
maybeSplit();
return;
} else {
// There are only enough entries to make up 1 block, so move the entries of the right sibling into
// this block and discard this block. Might also need to merge the parent
mergeFrom(right);
parent.maybeMerge();
return;
}
}
// Should not happen
throw new IllegalStateException(String.format("%s does not have any siblings.", getBlock()));
}
private void mergeFrom(IndexBlock right) throws Exception {
IndexEntry newChildEntry = parent.entries.remove(parentEntryIndex);
if (right.getPos().equals(parent.tailPos)) {
parent.tailPos = getPos();
} else {
IndexEntry newParentEntry = parent.entries.get(parentEntryIndex);
assert newParentEntry.childIndexBlock.equals(right.getPos());
newParentEntry.childIndexBlock = getPos();
}
entries.add(newChildEntry);
entries.addAll(right.entries);
newChildEntry.childIndexBlock = tailPos;
tailPos = right.tailPos;
store.write(parent);
store.write(this);
store.remove(right);
}
private IndexBlock getNext(IndexBlock indexBlock) throws Exception {
int index = indexBlock.parentEntryIndex + 1;
if (index > entries.size()) {
return null;
}
if (index == entries.size()) {
return load(tailPos, root, this, index);
}
return load(entries.get(index).childIndexBlock, root, this, index);
}
private IndexBlock getPrevious(IndexBlock indexBlock) throws Exception {
int index = indexBlock.parentEntryIndex - 1;
if (index < 0) {
return null;
}
return load(entries.get(index).childIndexBlock, root, this, index);
}
private IndexBlock findHighestLeaf() throws Exception {
if (tailPos.isNull()) {
return this;
}
return load(tailPos, root, this, entries.size()).findHighestLeaf();
}
}
private static class IndexEntry implements Comparable<IndexEntry> {
long hashCode;
BlockPointer dataBlock;
BlockPointer childIndexBlock;
private IndexEntry() {
}
private IndexEntry(long hashCode) {
this.hashCode = hashCode;
}
@Override
public int compareTo(IndexEntry indexEntry) {
if (hashCode > indexEntry.hashCode) {
return 1;
}
if (hashCode < indexEntry.hashCode) {
return -1;
}
return 0;
}
}
private class Lookup {
final IndexBlock indexBlock;
final IndexEntry entry;
private Lookup(IndexBlock indexBlock, IndexEntry entry) {
this.indexBlock = indexBlock;
this.entry = entry;
}
}
private class DataBlock extends BlockPayload {
private int size;
private StreamByteBuffer buffer;
private V value;
private DataBlock() {
}
public DataBlock(V value) throws Exception {
this.value = value;
setValue(value);
size = buffer.totalBytesUnread();
}
public DataBlock(V value, StreamByteBuffer buffer) throws Exception {
this.value = value;
this.buffer = buffer;
size = buffer.totalBytesUnread();
}
public void setValue(V value) throws Exception {
buffer = StreamByteBuffer.createWithChunkSizeInDefaultRange(size);
KryoBackedEncoder encoder = new KryoBackedEncoder(buffer.getOutputStream());
serializer.write(encoder, value);
encoder.flush();
}
public V getValue() throws Exception {
if (value == null) {
value = serializer.read(new KryoBackedDecoder(buffer.getInputStream()));
buffer = null;
}
return value;
}
@Override
protected byte getType() {
return 0x33;
}
@Override
protected int getSize() {
return 2 * Block.INT_SIZE + size;
}
@Override
public void read(DataInputStream instr) throws Exception {
size = instr.readInt();
int bytes = instr.readInt();
buffer = StreamByteBuffer.of(instr, bytes);
}
@Override
public void write(DataOutputStream outstr) throws Exception {
outstr.writeInt(size);
outstr.writeInt(buffer.totalBytesUnread());
buffer.writeTo(outstr);
buffer = null;
}
public DataBlockUpdateResult useNewValue(V value) throws Exception {
setValue(value);
boolean ok = buffer.totalBytesUnread() <= size;
if (ok) {
this.value = value;
store.write(this);
return DataBlockUpdateResult.success();
} else {
return DataBlockUpdateResult.failed(buffer);
}
}
}
private static class DataBlockUpdateResult {
private static final DataBlockUpdateResult SUCCESS = new DataBlockUpdateResult(true, null);
private final boolean success;
private final StreamByteBuffer serializedValue;
private DataBlockUpdateResult(boolean success, StreamByteBuffer serializedValue) {
this.success = success;
this.serializedValue = serializedValue;
}
static DataBlockUpdateResult success() {
return SUCCESS;
}
static DataBlockUpdateResult failed(StreamByteBuffer serializedValue) {
return new DataBlockUpdateResult(false, serializedValue);
}
public boolean isFailed() {
return !success;
}
public StreamByteBuffer getSerializedValue() {
return serializedValue;
}
}
}

View File

@ -0,0 +1,59 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public abstract class Block {
static final int LONG_SIZE = 8;
static final int INT_SIZE = 4;
static final int SHORT_SIZE = 2;
private BlockPayload payload;
protected Block(BlockPayload payload) {
this.payload = payload;
payload.setBlock(this);
}
public BlockPayload getPayload() {
return payload;
}
protected void detach() {
payload.setBlock(null);
payload = null;
}
public abstract BlockPointer getPos();
public abstract int getSize();
public abstract RuntimeException blockCorruptedException();
@Override
public String toString() {
return payload.getClass().getSimpleName() + " " + getPos();
}
public BlockPointer getNextPos() {
return BlockPointer.pos(getPos().getPos() + getSize());
}
public abstract boolean hasPos();
public abstract void setPos(BlockPointer pos);
public abstract void setSize(int size);
}

View File

@ -0,0 +1,51 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
public abstract class BlockPayload {
private Block block;
public Block getBlock() {
return block;
}
public void setBlock(Block block) {
this.block = block;
}
public BlockPointer getPos() {
return getBlock().getPos();
}
public BlockPointer getNextPos() {
return getBlock().getNextPos();
}
protected abstract int getSize();
protected abstract byte getType();
protected abstract void read(DataInputStream inputStream) throws Exception;
protected abstract void write(DataOutputStream outputStream) throws Exception;
protected RuntimeException blockCorruptedException() {
return getBlock().blockCorruptedException();
}
}

View File

@ -0,0 +1,75 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.primitives.Longs;
public class BlockPointer implements Comparable<BlockPointer> {
private static final BlockPointer NULL = new BlockPointer(-1);
public static BlockPointer start() {
return NULL;
}
public static BlockPointer pos(long pos) {
if (pos < -1) {
throw new CorruptedCacheException("block pointer must be >= -1, but was" + pos);
}
if (pos == -1) {
return NULL;
}
return new BlockPointer(pos);
}
private final long pos;
private BlockPointer(long pos) {
this.pos = pos;
}
public boolean isNull() {
return pos < 0;
}
public long getPos() {
return pos;
}
@Override
public String toString() {
return String.valueOf(pos);
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
BlockPointer other = (BlockPointer) obj;
return pos == other.pos;
}
@Override
public int hashCode() {
return Longs.hashCode(pos);
}
@Override
public int compareTo(BlockPointer o) {
return Longs.compare(pos, o.pos);
}
}

View File

@ -0,0 +1,68 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public interface BlockStore {
/**
* Opens this store, calling the given action if the store is empty.
*/
void open(Runnable initAction, Factory factory);
/**
* Closes this store.
*/
void close();
/**
* Discards all blocks from this store.
*/
void clear();
/**
* Removes the given block from this store.
*/
void remove(BlockPayload block);
/**
* Reads the first block from this store.
*/
<T extends BlockPayload> T readFirst(Class<T> payloadType);
/**
* Reads a block from this store.
*/
<T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType);
/**
* Writes a block to this store, adding the block if required.
*/
void write(BlockPayload block);
/**
* Adds a new block to this store. Allocates space for the block, but does not write the contents of the block
* until {@link #write(BlockPayload)} is called.
*/
void attach(BlockPayload block);
/**
* Flushes any pending updates for this store.
*/
void flush();
interface Factory {
Object create(Class<? extends BlockPayload> type);
}
}

View File

@ -0,0 +1,30 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.nio.Buffer;
public class BufferCaster {
/**
* Without this cast, when the code compiled by Java 9+ is executed on Java 8, it will throw
* java.lang.NoSuchMethodError: Method flip()Ljava/nio/ByteBuffer; does not exist in class java.nio.ByteBuffer
*/
@SuppressWarnings("RedundantCast")
public static <T extends Buffer> Buffer cast(T byteBuffer) {
return (Buffer) byteBuffer;
}
}

View File

@ -0,0 +1,74 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.io.CountingInputStream;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* Allows a stream of bytes to be read from a particular location of some backing byte stream.
*/
class ByteInput {
private final RandomAccessFile file;
private final ResettableBufferedInputStream bufferedInputStream;
private CountingInputStream countingInputStream;
public ByteInput(RandomAccessFile file) {
this.file = file;
bufferedInputStream = new ResettableBufferedInputStream(new RandomAccessFileInputStream(file));
}
/**
* Starts reading from the given offset.
*/
public DataInputStream start(long offset) throws IOException {
file.seek(offset);
bufferedInputStream.clear();
countingInputStream = new CountingInputStream(bufferedInputStream);
return new DataInputStream(countingInputStream);
}
/**
* Returns the number of bytes read since {@link #start(long)} was called.
*/
public long getBytesRead() {
return countingInputStream.getCount();
}
/**
* Finishes reading, resetting any buffered state.
*/
public void done() {
countingInputStream = null;
}
private static class ResettableBufferedInputStream extends BufferedInputStream {
ResettableBufferedInputStream(InputStream input) {
super(input);
}
void clear() {
count = 0;
pos = 0;
}
}
}

View File

@ -0,0 +1,74 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.io.CountingOutputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
/**
* Allows a stream of bytes to be written to a particular location of some backing byte stream.
*/
class ByteOutput {
private final RandomAccessFile file;
private final ResettableBufferedOutputStream bufferedOutputStream;
private CountingOutputStream countingOutputStream;
public ByteOutput(RandomAccessFile file) {
this.file = file;
bufferedOutputStream = new ResettableBufferedOutputStream(new RandomAccessFileOutputStream(file));
}
/**
* Starts writing to the given offset. Can be beyond the current length of the file.
*/
public DataOutputStream start(long offset) throws IOException {
file.seek(offset);
bufferedOutputStream.clear();
countingOutputStream = new CountingOutputStream(bufferedOutputStream);
return new DataOutputStream(countingOutputStream);
}
/**
* Returns the number of byte written since {@link #start(long)} was called.
*/
public long getBytesWritten() {
return countingOutputStream.getCount();
}
/**
* Finishes writing, flushing and resetting any buffered state
*/
public void done() throws IOException {
countingOutputStream.flush();
countingOutputStream = null;
}
private static class ResettableBufferedOutputStream extends BufferedOutputStream {
ResettableBufferedOutputStream(OutputStream output) {
super(output);
}
void clear() {
count = 0;
}
}
}

View File

@ -0,0 +1,129 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableSet;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
public class CachingBlockStore implements BlockStore {
private final BlockStore store;
private final Map<BlockPointer, BlockPayload> dirty = new LinkedHashMap<BlockPointer, BlockPayload>();
private final Cache<BlockPointer, BlockPayload> indexBlockCache = CacheBuilder.newBuilder().maximumSize(100).concurrencyLevel(1).build();
private final ImmutableSet<Class<? extends BlockPayload>> cacheableBlockTypes;
public CachingBlockStore(BlockStore store, Collection<Class<? extends BlockPayload>> cacheableBlockTypes) {
this.store = store;
this.cacheableBlockTypes = ImmutableSet.copyOf(cacheableBlockTypes);
}
@Override
public void open(Runnable initAction, Factory factory) {
store.open(initAction, factory);
}
@Override
public void close() {
flush();
indexBlockCache.invalidateAll();
store.close();
}
@Override
public void clear() {
dirty.clear();
indexBlockCache.invalidateAll();
store.clear();
}
@Override
public void flush() {
Iterator<BlockPayload> iterator = dirty.values().iterator();
while (iterator.hasNext()) {
BlockPayload block = iterator.next();
iterator.remove();
store.write(block);
}
store.flush();
}
@Override
public void attach(BlockPayload block) {
store.attach(block);
}
@Override
public void remove(BlockPayload block) {
dirty.remove(block.getPos());
if (isCacheable(block)) {
indexBlockCache.invalidate(block.getPos());
}
store.remove(block);
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
T block = store.readFirst(payloadType);
maybeCache(block);
return block;
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
T block = payloadType.cast(dirty.get(pos));
if (block != null) {
return block;
}
block = maybeGetFromCache(pos, payloadType);
if (block != null) {
return block;
}
block = store.read(pos, payloadType);
maybeCache(block);
return block;
}
@Nullable
private <T extends BlockPayload> T maybeGetFromCache(BlockPointer pos, Class<T> payloadType) {
if (cacheableBlockTypes.contains(payloadType)) {
return payloadType.cast(indexBlockCache.getIfPresent(pos));
}
return null;
}
@Override
public void write(BlockPayload block) {
store.attach(block);
maybeCache(block);
dirty.put(block.getPos(), block);
}
private <T extends BlockPayload> void maybeCache(T block) {
if (isCacheable(block)) {
indexBlockCache.put(block.getPos(), block);
}
}
private <T extends BlockPayload> boolean isCacheable(T block) {
return cacheableBlockTypes.contains(block.getClass());
}
}

View File

@ -0,0 +1,22 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
class CorruptedCacheException extends RuntimeException {
CorruptedCacheException(String message) {
super(message);
}
}

View File

@ -0,0 +1,274 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
public class FileBackedBlockStore implements BlockStore {
private final File cacheFile;
private RandomAccessFile file;
private ByteOutput output;
private ByteInput input;
private long nextBlock;
private Factory factory;
private long currentFileSize;
public FileBackedBlockStore(File cacheFile) {
this.cacheFile = cacheFile;
}
@Override
public String toString() {
return "cache '" + cacheFile + "'";
}
@Override
public void open(Runnable runnable, Factory factory) {
this.factory = factory;
try {
cacheFile.getParentFile().mkdirs();
file = openRandomAccessFile();
output = new ByteOutput(file);
input = new ByteInput(file);
currentFileSize = file.length();
nextBlock = currentFileSize;
if (currentFileSize == 0) {
runnable.run();
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
private RandomAccessFile openRandomAccessFile() throws FileNotFoundException {
try {
return randomAccessFile("rw");
} catch (FileNotFoundException e) {
return randomAccessFile("r");
}
}
private RandomAccessFile randomAccessFile(String mode) throws FileNotFoundException {
return new RandomAccessFile(cacheFile, mode);
}
@Override
public void close() {
try {
file.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public void clear() {
try {
file.setLength(0);
currentFileSize = 0;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
nextBlock = 0;
}
@Override
public void attach(BlockPayload block) {
if (block.getBlock() == null) {
block.setBlock(new BlockImpl(block));
}
}
@Override
public void remove(BlockPayload block) {
BlockImpl blockImpl = (BlockImpl) block.getBlock();
blockImpl.detach();
}
@Override
public void flush() {
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
return read(BlockPointer.pos(0), payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
assert !pos.isNull();
try {
T payload = payloadType.cast(factory.create(payloadType));
BlockImpl block = new BlockImpl(payload, pos);
block.read();
return payload;
} catch (CorruptedCacheException e) {
throw e;
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
@Override
public void write(BlockPayload block) {
BlockImpl blockImpl = (BlockImpl) block.getBlock();
try {
blockImpl.write();
} catch (CorruptedCacheException e) {
throw e;
} catch (Exception e) {
throw new UncheckedIOException(e);
}
}
private long alloc(long length) {
long pos = nextBlock;
nextBlock += length;
return pos;
}
private final class BlockImpl extends Block {
private static final int HEADER_SIZE = 1 + INT_SIZE; // type, payload size
private static final int TAIL_SIZE = INT_SIZE;
private BlockPointer pos;
private int payloadSize;
private BlockImpl(BlockPayload payload, BlockPointer pos) {
this(payload);
setPos(pos);
}
public BlockImpl(BlockPayload payload) {
super(payload);
pos = null;
payloadSize = -1;
}
@Override
public boolean hasPos() {
return pos != null;
}
@Override
public BlockPointer getPos() {
if (pos == null) {
pos = BlockPointer.pos(alloc(getSize()));
}
return pos;
}
@Override
public void setPos(BlockPointer pos) {
assert this.pos == null && !pos.isNull();
this.pos = pos;
}
@Override
public int getSize() {
if (payloadSize < 0) {
payloadSize = getPayload().getSize();
}
return payloadSize + HEADER_SIZE + TAIL_SIZE;
}
@Override
public void setSize(int size) {
int newPayloadSize = size - HEADER_SIZE - TAIL_SIZE;
assert newPayloadSize >= payloadSize;
payloadSize = newPayloadSize;
}
public void write() throws Exception {
long pos = getPos().getPos();
DataOutputStream outputStream = output.start(pos);
BlockPayload payload = getPayload();
// Write header
outputStream.writeByte(payload.getType());
outputStream.writeInt(payloadSize);
long finalSize = pos + HEADER_SIZE + TAIL_SIZE + payloadSize;
// Write body
payload.write(outputStream);
// Write count
long bytesWritten = output.getBytesWritten();
if (bytesWritten > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Block payload exceeds maximum size");
}
outputStream.writeInt((int) bytesWritten);
output.done();
// System.out.println(String.format("wrote [%d,%d)", pos, pos + bytesWritten + 4));
// Pad
if (currentFileSize < finalSize) {
// System.out.println(String.format("pad length %d => %d", currentFileSize, finalSize));
file.setLength(finalSize);
currentFileSize = finalSize;
}
}
public void read() throws Exception {
long pos = getPos().getPos();
assert pos >= 0;
if (pos + HEADER_SIZE >= currentFileSize) {
throw blockCorruptedException();
}
DataInputStream inputStream = input.start(pos);
BlockPayload payload = getPayload();
// Read header
byte type = inputStream.readByte();
if (type != payload.getType()) {
throw blockCorruptedException();
}
// Read body
payloadSize = inputStream.readInt();
if (pos + HEADER_SIZE + TAIL_SIZE + payloadSize > currentFileSize) {
throw blockCorruptedException();
}
payload.read(inputStream);
// Read and verify count
long actualCount = input.getBytesRead();
long count = inputStream.readInt();
if (actualCount != count) {
System.out.println(String.format("read expected %d actual %d, pos %d payloadSize %d currentFileSize %d", count, actualCount, pos, payloadSize, currentFileSize));
throw blockCorruptedException();
}
input.done();
}
@Override
public RuntimeException blockCorruptedException() {
return new CorruptedCacheException(String.format("Corrupted %s found in %s.", this,
FileBackedBlockStore.this));
}
}
}

View File

@ -0,0 +1,283 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class FreeListBlockStore implements BlockStore {
private final BlockStore store;
private final BlockStore freeListStore;
private final int maxBlockEntries;
private FreeListBlock freeListBlock;
public FreeListBlockStore(BlockStore store, int maxBlockEntries) {
this.store = store;
freeListStore = this;
this.maxBlockEntries = maxBlockEntries;
}
@Override
public void open(final Runnable initAction, final Factory factory) {
Runnable freeListInitAction = new Runnable() {
@Override
public void run() {
freeListBlock = new FreeListBlock();
store.write(freeListBlock);
store.flush();
initAction.run();
}
};
Factory freeListFactory = new Factory() {
@Override
public Object create(Class<? extends BlockPayload> type) {
if (type == FreeListBlock.class) {
return new FreeListBlock();
}
return factory.create(type);
}
};
store.open(freeListInitAction, freeListFactory);
freeListBlock = store.readFirst(FreeListBlock.class);
}
@Override
public void close() {
freeListBlock = null;
store.close();
}
@Override
public void clear() {
store.clear();
}
@Override
public void remove(BlockPayload block) {
Block container = block.getBlock();
store.remove(block);
freeListBlock.add(container.getPos(), container.getSize());
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
return store.read(freeListBlock.getNextPos(), payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
return store.read(pos, payloadType);
}
@Override
public void write(BlockPayload block) {
attach(block);
store.write(block);
}
@Override
public void attach(BlockPayload block) {
store.attach(block);
freeListBlock.alloc(block.getBlock());
}
@Override
public void flush() {
store.flush();
}
private void verify() {
FreeListBlock block = store.readFirst(FreeListBlock.class);
verify(block, Integer.MAX_VALUE);
}
private void verify(FreeListBlock block, int maxValue) {
if (block.largestInNextBlock > maxValue) {
throw new RuntimeException("corrupt free list");
}
int current = 0;
for (FreeListEntry entry : block.entries) {
if (entry.size > maxValue) {
throw new RuntimeException("corrupt free list");
}
if (entry.size < block.largestInNextBlock) {
throw new RuntimeException("corrupt free list");
}
if (entry.size < current) {
throw new RuntimeException("corrupt free list");
}
current = entry.size;
}
if (!block.nextBlock.isNull()) {
verify(store.read(block.nextBlock, FreeListBlock.class), block.largestInNextBlock);
}
}
public class FreeListBlock extends BlockPayload {
private List<FreeListEntry> entries = new ArrayList<FreeListEntry>();
private int largestInNextBlock;
private BlockPointer nextBlock = BlockPointer.start();
// Transient fields
private FreeListBlock prev;
private FreeListBlock next;
@Override
protected int getSize() {
return Block.LONG_SIZE + Block.INT_SIZE + Block.INT_SIZE + maxBlockEntries * (Block.LONG_SIZE
+ Block.INT_SIZE);
}
@Override
protected byte getType() {
return 0x44;
}
@Override
protected void read(DataInputStream inputStream) throws Exception {
nextBlock = BlockPointer.pos(inputStream.readLong());
largestInNextBlock = inputStream.readInt();
int count = inputStream.readInt();
for (int i = 0; i < count; i++) {
BlockPointer pos = BlockPointer.pos(inputStream.readLong());
int size = inputStream.readInt();
entries.add(new FreeListEntry(pos, size));
}
}
@Override
protected void write(DataOutputStream outputStream) throws Exception {
outputStream.writeLong(nextBlock.getPos());
outputStream.writeInt(largestInNextBlock);
outputStream.writeInt(entries.size());
for (FreeListEntry entry : entries) {
outputStream.writeLong(entry.pos.getPos());
outputStream.writeInt(entry.size);
}
}
public void add(BlockPointer pos, int size) {
assert !pos.isNull() && size >= 0;
if (size == 0) {
return;
}
if (size < largestInNextBlock) {
FreeListBlock next = getNextBlock();
next.add(pos, size);
return;
}
FreeListEntry entry = new FreeListEntry(pos, size);
int index = Collections.binarySearch(entries, entry);
if (index < 0) {
index = -index - 1;
}
entries.add(index, entry);
if (entries.size() > maxBlockEntries) {
FreeListBlock newBlock = new FreeListBlock();
newBlock.largestInNextBlock = largestInNextBlock;
newBlock.nextBlock = nextBlock;
newBlock.prev = this;
newBlock.next = next;
next = newBlock;
List<FreeListEntry> newBlockEntries = entries.subList(0, entries.size() / 2);
newBlock.entries.addAll(newBlockEntries);
newBlockEntries.clear();
largestInNextBlock = newBlock.entries.get(newBlock.entries.size() - 1).size;
freeListStore.write(newBlock);
nextBlock = newBlock.getPos();
}
freeListStore.write(this);
}
private FreeListBlock getNextBlock() {
if (next == null) {
next = freeListStore.read(nextBlock, FreeListBlock.class);
next.prev = this;
}
return next;
}
public void alloc(Block block) {
if (block.hasPos()) {
return;
}
int requiredSize = block.getSize();
if (entries.isEmpty() || requiredSize <= largestInNextBlock) {
if (nextBlock.isNull()) {
return;
}
getNextBlock().alloc(block);
return;
}
int index = Collections.binarySearch(entries, new FreeListEntry(null, requiredSize));
if (index < 0) {
index = -index - 1;
}
if (index == entries.size()) {
// Largest free block is too small
return;
}
FreeListEntry entry = entries.remove(index);
block.setPos(entry.pos);
block.setSize(entry.size);
freeListStore.write(this);
if (entries.size() == 0 && prev != null) {
prev.nextBlock = nextBlock;
prev.largestInNextBlock = largestInNextBlock;
prev.next = next;
if (next != null) {
next.prev = prev;
}
freeListStore.write(prev);
freeListStore.remove(this);
}
}
}
private static class FreeListEntry implements Comparable<FreeListEntry> {
final BlockPointer pos;
final int size;
private FreeListEntry(BlockPointer pos, int size) {
this.pos = pos;
this.size = size;
}
@Override
public int compareTo(FreeListEntry o) {
if (size > o.size) {
return 1;
}
if (size < o.size) {
return -1;
}
return 0;
}
}
}

View File

@ -0,0 +1,75 @@
/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import seaweedfs.client.btree.serialize.Serializer;
import seaweedfs.client.btree.serialize.kryo.KryoBackedEncoder;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
class KeyHasher<K> {
private final Serializer<K> serializer;
private final MessageDigestStream digestStream = new MessageDigestStream();
private final KryoBackedEncoder encoder = new KryoBackedEncoder(digestStream);
public KeyHasher(Serializer<K> serializer) {
this.serializer = serializer;
}
long getHashCode(K key) throws Exception {
serializer.write(encoder, key);
encoder.flush();
return digestStream.getChecksum();
}
private static class MessageDigestStream extends OutputStream {
MessageDigest messageDigest;
private MessageDigestStream() {
try {
messageDigest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw UncheckedException.throwAsUncheckedException(e);
}
}
@Override
public void write(int b) throws IOException {
messageDigest.update((byte) b);
}
@Override
public void write(byte[] b) throws IOException {
messageDigest.update(b);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
messageDigest.update(b, off, len);
}
long getChecksum() {
byte[] digest = messageDigest.digest();
assert digest.length == 16;
return new BigInteger(digest).longValue();
}
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
/**
* Reads from a {@link RandomAccessFile}. Each operation reads from and advances the current position of the file.
*
* <p>Closing this stream does not close the underlying file.
*/
public class RandomAccessFileInputStream extends InputStream {
private final RandomAccessFile file;
public RandomAccessFileInputStream(RandomAccessFile file) {
this.file = file;
}
@Override
public long skip(long n) throws IOException {
file.seek(file.getFilePointer() + n);
return n;
}
@Override
public int read(byte[] bytes) throws IOException {
return file.read(bytes);
}
@Override
public int read() throws IOException {
return file.read();
}
@Override
public int read(byte[] bytes, int offset, int length) throws IOException {
return file.read(bytes, offset, length);
}
}

View File

@ -0,0 +1,48 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
/**
* Writes to a {@link RandomAccessFile}. Each operation writes to and advances the current position of the file.
*
* <p>Closing this stream does not close the underlying file. Flushing this stream does nothing.
*/
public class RandomAccessFileOutputStream extends OutputStream {
private final RandomAccessFile file;
public RandomAccessFileOutputStream(RandomAccessFile file) {
this.file = file;
}
@Override
public void write(int i) throws IOException {
file.write(i);
}
@Override
public void write(byte[] bytes) throws IOException {
file.write(bytes);
}
@Override
public void write(byte[] bytes, int offset, int length) throws IOException {
file.write(bytes, offset, length);
}
}

View File

@ -0,0 +1,87 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
public class StateCheckBlockStore implements BlockStore {
private final BlockStore blockStore;
private boolean open;
public StateCheckBlockStore(BlockStore blockStore) {
this.blockStore = blockStore;
}
@Override
public void open(Runnable initAction, Factory factory) {
assert !open;
open = true;
blockStore.open(initAction, factory);
}
public boolean isOpen() {
return open;
}
@Override
public void close() {
if (!open) {
return;
}
open = false;
blockStore.close();
}
@Override
public void clear() {
assert open;
blockStore.clear();
}
@Override
public void remove(BlockPayload block) {
assert open;
blockStore.remove(block);
}
@Override
public <T extends BlockPayload> T readFirst(Class<T> payloadType) {
assert open;
return blockStore.readFirst(payloadType);
}
@Override
public <T extends BlockPayload> T read(BlockPointer pos, Class<T> payloadType) {
assert open;
return blockStore.read(pos, payloadType);
}
@Override
public void write(BlockPayload block) {
assert open;
blockStore.write(block);
}
@Override
public void attach(BlockPayload block) {
assert open;
blockStore.attach(block);
}
@Override
public void flush() {
assert open;
blockStore.flush();
}
}

View File

@ -0,0 +1,526 @@
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
* An in-memory buffer that provides OutputStream and InputStream interfaces.
*
* This is more efficient than using ByteArrayOutputStream/ByteArrayInputStream
*
* Reading the buffer will clear the buffer.
* This is not thread-safe, it is intended to be used by a single Thread.
*/
public class StreamByteBuffer {
private static final int DEFAULT_CHUNK_SIZE = 4096;
private static final int MAX_CHUNK_SIZE = 1024 * 1024;
private LinkedList<StreamByteBufferChunk> chunks = new LinkedList<StreamByteBufferChunk>();
private StreamByteBufferChunk currentWriteChunk;
private StreamByteBufferChunk currentReadChunk;
private int chunkSize;
private int nextChunkSize;
private int maxChunkSize;
private StreamByteBufferOutputStream output;
private StreamByteBufferInputStream input;
private int totalBytesUnreadInList;
public StreamByteBuffer() {
this(DEFAULT_CHUNK_SIZE);
}
public StreamByteBuffer(int chunkSize) {
this.chunkSize = chunkSize;
this.nextChunkSize = chunkSize;
this.maxChunkSize = Math.max(chunkSize, MAX_CHUNK_SIZE);
currentWriteChunk = new StreamByteBufferChunk(nextChunkSize);
output = new StreamByteBufferOutputStream();
input = new StreamByteBufferInputStream();
}
public static StreamByteBuffer of(InputStream inputStream) throws IOException {
StreamByteBuffer buffer = new StreamByteBuffer(chunkSizeInDefaultRange(inputStream.available()));
buffer.readFully(inputStream);
return buffer;
}
public static StreamByteBuffer of(InputStream inputStream, int len) throws IOException {
StreamByteBuffer buffer = new StreamByteBuffer(chunkSizeInDefaultRange(len));
buffer.readFrom(inputStream, len);
return buffer;
}
public static StreamByteBuffer createWithChunkSizeInDefaultRange(int value) {
return new StreamByteBuffer(chunkSizeInDefaultRange(value));
}
static int chunkSizeInDefaultRange(int value) {
return valueInRange(value, DEFAULT_CHUNK_SIZE, MAX_CHUNK_SIZE);
}
private static int valueInRange(int value, int min, int max) {
return Math.min(Math.max(value, min), max);
}
public OutputStream getOutputStream() {
return output;
}
public InputStream getInputStream() {
return input;
}
public void writeTo(OutputStream target) throws IOException {
while (prepareRead() != -1) {
currentReadChunk.writeTo(target);
}
}
public void readFrom(InputStream inputStream, int len) throws IOException {
int bytesLeft = len;
while (bytesLeft > 0) {
int spaceLeft = allocateSpace();
int limit = Math.min(spaceLeft, bytesLeft);
int readBytes = currentWriteChunk.readFrom(inputStream, limit);
if (readBytes == -1) {
throw new EOFException("Unexpected EOF");
}
bytesLeft -= readBytes;
}
}
public void readFully(InputStream inputStream) throws IOException {
while (true) {
int len = allocateSpace();
int readBytes = currentWriteChunk.readFrom(inputStream, len);
if (readBytes == -1) {
break;
}
}
}
public byte[] readAsByteArray() {
byte[] buf = new byte[totalBytesUnread()];
input.readImpl(buf, 0, buf.length);
return buf;
}
public List<byte[]> readAsListOfByteArrays() {
List<byte[]> listOfByteArrays = new ArrayList<byte[]>(chunks.size() + 1);
byte[] buf;
while ((buf = input.readNextBuffer()) != null) {
if (buf.length > 0) {
listOfByteArrays.add(buf);
}
}
return listOfByteArrays;
}
public String readAsString(String encoding) {
Charset charset = Charset.forName(encoding);
return readAsString(charset);
}
public String readAsString() {
return readAsString(Charset.defaultCharset());
}
public String readAsString(Charset charset) {
try {
return doReadAsString(charset);
} catch (CharacterCodingException e) {
throw new UncheckedIOException(e);
}
}
private String doReadAsString(Charset charset) throws CharacterCodingException {
int unreadSize = totalBytesUnread();
if (unreadSize > 0) {
return readAsCharBuffer(charset).toString();
}
return "";
}
private CharBuffer readAsCharBuffer(Charset charset) throws CharacterCodingException {
CharsetDecoder decoder = charset.newDecoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
CharBuffer charbuffer = CharBuffer.allocate(totalBytesUnread());
ByteBuffer buf = null;
boolean wasUnderflow = false;
ByteBuffer nextBuf = null;
boolean needsFlush = false;
while (hasRemaining(nextBuf) || hasRemaining(buf) || prepareRead() != -1) {
if (hasRemaining(buf)) {
// handle decoding underflow, multi-byte unicode character at buffer chunk boundary
if (!wasUnderflow) {
throw new IllegalStateException("Unexpected state. Buffer has remaining bytes without underflow in decoding.");
}
if (!hasRemaining(nextBuf) && prepareRead() != -1) {
nextBuf = currentReadChunk.readToNioBuffer();
}
// copy one by one until the underflow has been resolved
buf = ByteBuffer.allocate(buf.remaining() + 1).put(buf);
buf.put(nextBuf.get());
BufferCaster.cast(buf).flip();
} else {
if (hasRemaining(nextBuf)) {
buf = nextBuf;
} else if (prepareRead() != -1) {
buf = currentReadChunk.readToNioBuffer();
if (!hasRemaining(buf)) {
throw new IllegalStateException("Unexpected state. Buffer is empty.");
}
}
nextBuf = null;
}
boolean endOfInput = !hasRemaining(nextBuf) && prepareRead() == -1;
int bufRemainingBefore = buf.remaining();
CoderResult result = decoder.decode(buf, charbuffer, false);
if (bufRemainingBefore > buf.remaining()) {
needsFlush = true;
}
if (endOfInput) {
result = decoder.decode(ByteBuffer.allocate(0), charbuffer, true);
if (!result.isUnderflow()) {
result.throwException();
}
break;
}
wasUnderflow = result.isUnderflow();
}
if (needsFlush) {
CoderResult result = decoder.flush(charbuffer);
if (!result.isUnderflow()) {
result.throwException();
}
}
clear();
// push back remaining bytes of multi-byte unicode character
while (hasRemaining(buf)) {
byte b = buf.get();
try {
getOutputStream().write(b);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
BufferCaster.cast(charbuffer).flip();
return charbuffer;
}
private boolean hasRemaining(ByteBuffer nextBuf) {
return nextBuf != null && nextBuf.hasRemaining();
}
public int totalBytesUnread() {
int total = totalBytesUnreadInList;
if (currentReadChunk != null) {
total += currentReadChunk.bytesUnread();
}
if (currentWriteChunk != currentReadChunk && currentWriteChunk != null) {
total += currentWriteChunk.bytesUnread();
}
return total;
}
protected int allocateSpace() {
int spaceLeft = currentWriteChunk.spaceLeft();
if (spaceLeft == 0) {
addChunk(currentWriteChunk);
currentWriteChunk = new StreamByteBufferChunk(nextChunkSize);
if (nextChunkSize < maxChunkSize) {
nextChunkSize = Math.min(nextChunkSize * 2, maxChunkSize);
}
spaceLeft = currentWriteChunk.spaceLeft();
}
return spaceLeft;
}
protected int prepareRead() {
int bytesUnread = (currentReadChunk != null) ? currentReadChunk.bytesUnread() : 0;
if (bytesUnread == 0) {
if (!chunks.isEmpty()) {
currentReadChunk = chunks.removeFirst();
bytesUnread = currentReadChunk.bytesUnread();
totalBytesUnreadInList -= bytesUnread;
} else if (currentReadChunk != currentWriteChunk) {
currentReadChunk = currentWriteChunk;
bytesUnread = currentReadChunk.bytesUnread();
} else {
bytesUnread = -1;
}
}
return bytesUnread;
}
public static StreamByteBuffer of(List<byte[]> listOfByteArrays) {
StreamByteBuffer buffer = new StreamByteBuffer();
buffer.addChunks(listOfByteArrays);
return buffer;
}
private void addChunks(List<byte[]> listOfByteArrays) {
for (byte[] buf : listOfByteArrays) {
addChunk(new StreamByteBufferChunk(buf));
}
}
private void addChunk(StreamByteBufferChunk chunk) {
chunks.add(chunk);
totalBytesUnreadInList += chunk.bytesUnread();
}
static class StreamByteBufferChunk {
private int pointer;
private byte[] buffer;
private int size;
private int used;
public StreamByteBufferChunk(int size) {
this.size = size;
buffer = new byte[size];
}
public StreamByteBufferChunk(byte[] buf) {
this.size = buf.length;
this.buffer = buf;
this.used = buf.length;
}
public ByteBuffer readToNioBuffer() {
if (pointer < used) {
ByteBuffer result;
if (pointer > 0 || used < size) {
result = ByteBuffer.wrap(buffer, pointer, used - pointer);
} else {
result = ByteBuffer.wrap(buffer);
}
pointer = used;
return result;
}
return null;
}
public boolean write(byte b) {
if (used < size) {
buffer[used++] = b;
return true;
}
return false;
}
public void write(byte[] b, int off, int len) {
System.arraycopy(b, off, buffer, used, len);
used = used + len;
}
public void read(byte[] b, int off, int len) {
System.arraycopy(buffer, pointer, b, off, len);
pointer = pointer + len;
}
public void writeTo(OutputStream target) throws IOException {
if (pointer < used) {
target.write(buffer, pointer, used - pointer);
pointer = used;
}
}
public void reset() {
pointer = 0;
}
public int bytesUsed() {
return used;
}
public int bytesUnread() {
return used - pointer;
}
public int read() {
if (pointer < used) {
return buffer[pointer++] & 0xff;
}
return -1;
}
public int spaceLeft() {
return size - used;
}
public int readFrom(InputStream inputStream, int len) throws IOException {
int readBytes = inputStream.read(buffer, used, len);
if(readBytes > 0) {
used += readBytes;
}
return readBytes;
}
public void clear() {
used = pointer = 0;
}
public byte[] readBuffer() {
if (used == buffer.length && pointer == 0) {
pointer = used;
return buffer;
} else if (pointer < used) {
byte[] buf = new byte[used - pointer];
read(buf, 0, used - pointer);
return buf;
} else {
return new byte[0];
}
}
}
class StreamByteBufferOutputStream extends OutputStream {
private boolean closed;
@Override
public void write(byte[] b, int off, int len) throws IOException {
if (b == null) {
throw new NullPointerException();
}
if ((off < 0) || (off > b.length) || (len < 0)
|| ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return;
}
int bytesLeft = len;
int currentOffset = off;
while (bytesLeft > 0) {
int spaceLeft = allocateSpace();
int writeBytes = Math.min(spaceLeft, bytesLeft);
currentWriteChunk.write(b, currentOffset, writeBytes);
bytesLeft -= writeBytes;
currentOffset += writeBytes;
}
}
@Override
public void close() throws IOException {
closed = true;
}
public boolean isClosed() {
return closed;
}
@Override
public void write(int b) throws IOException {
allocateSpace();
currentWriteChunk.write((byte) b);
}
public StreamByteBuffer getBuffer() {
return StreamByteBuffer.this;
}
}
class StreamByteBufferInputStream extends InputStream {
@Override
public int read() throws IOException {
prepareRead();
return currentReadChunk.read();
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
return readImpl(b, off, len);
}
int readImpl(byte[] b, int off, int len) {
if (b == null) {
throw new NullPointerException();
}
if ((off < 0) || (off > b.length) || (len < 0)
|| ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return 0;
}
int bytesLeft = len;
int currentOffset = off;
int bytesUnread = prepareRead();
int totalBytesRead = 0;
while (bytesLeft > 0 && bytesUnread != -1) {
int readBytes = Math.min(bytesUnread, bytesLeft);
currentReadChunk.read(b, currentOffset, readBytes);
bytesLeft -= readBytes;
currentOffset += readBytes;
totalBytesRead += readBytes;
bytesUnread = prepareRead();
}
if (totalBytesRead > 0) {
return totalBytesRead;
}
return -1;
}
@Override
public int available() throws IOException {
return totalBytesUnread();
}
public StreamByteBuffer getBuffer() {
return StreamByteBuffer.this;
}
public byte[] readNextBuffer() {
if (prepareRead() != -1) {
return currentReadChunk.readBuffer();
}
return null;
}
}
public void clear() {
chunks.clear();
currentReadChunk = null;
totalBytesUnreadInList = 0;
currentWriteChunk.clear();
}
}

View File

@ -0,0 +1,88 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.concurrent.Callable;
/**
* Wraps a checked exception. Carries no other context.
*/
public final class UncheckedException extends RuntimeException {
private UncheckedException(Throwable cause) {
super(cause);
}
private UncheckedException(String message, Throwable cause) {
super(message, cause);
}
/**
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*/
public static RuntimeException throwAsUncheckedException(Throwable t) {
return throwAsUncheckedException(t, false);
}
/**
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*/
public static RuntimeException throwAsUncheckedException(Throwable t, boolean preserveMessage) {
if (t instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
if (t instanceof RuntimeException) {
throw (RuntimeException) t;
}
if (t instanceof Error) {
throw (Error) t;
}
if (t instanceof IOException) {
if (preserveMessage) {
throw new UncheckedIOException(t.getMessage(), t);
} else {
throw new UncheckedIOException(t);
}
}
if (preserveMessage) {
throw new UncheckedException(t.getMessage(), t);
} else {
throw new UncheckedException(t);
}
}
public static <T> T callUnchecked(Callable<T> callable) {
try {
return callable.call();
} catch (Exception e) {
throw throwAsUncheckedException(e);
}
}
/**
* Unwraps passed InvocationTargetException hence making the stack of exceptions cleaner without losing information.
*
* Note: always throws the failure in some form. The return value is to keep the compiler happy.
*
* @param e to be unwrapped
* @return an instance of RuntimeException based on the target exception of the parameter.
*/
public static RuntimeException unwrapAndRethrow(InvocationTargetException e) {
return UncheckedException.throwAsUncheckedException(e.getTargetException());
}
}

View File

@ -0,0 +1,36 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
/**
* <code>UncheckedIOException</code> is used to wrap an {@link java.io.IOException} into an unchecked exception.
*/
public class UncheckedIOException extends RuntimeException {
public UncheckedIOException() {
}
public UncheckedIOException(String message) {
super(message);
}
public UncheckedIOException(String message, Throwable cause) {
super(message, cause);
}
public UncheckedIOException(Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,133 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
public abstract class AbstractDecoder implements Decoder {
private DecoderStream stream;
@Override
public InputStream getInputStream() {
if (stream == null) {
stream = new DecoderStream();
}
return stream;
}
@Override
public void readBytes(byte[] buffer) throws IOException {
readBytes(buffer, 0, buffer.length);
}
@Override
public byte[] readBinary() throws EOFException, IOException {
int size = readSmallInt();
byte[] result = new byte[size];
readBytes(result);
return result;
}
@Override
public int readSmallInt() throws EOFException, IOException {
return readInt();
}
@Override
public long readSmallLong() throws EOFException, IOException {
return readLong();
}
@Nullable
@Override
public Integer readNullableSmallInt() throws IOException {
if (readBoolean()) {
return readSmallInt();
} else {
return null;
}
}
@Override
public String readNullableString() throws EOFException, IOException {
if (readBoolean()) {
return readString();
} else {
return null;
}
}
@Override
public void skipBytes(long count) throws EOFException, IOException {
long remaining = count;
while (remaining > 0) {
long skipped = maybeSkip(remaining);
if (skipped <= 0) {
break;
}
remaining -= skipped;
}
if (remaining > 0) {
throw new EOFException();
}
}
@Override
public <T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception {
throw new UnsupportedOperationException();
}
@Override
public void skipChunked() throws EOFException, IOException {
throw new UnsupportedOperationException();
}
protected abstract int maybeReadBytes(byte[] buffer, int offset, int count) throws IOException;
protected abstract long maybeSkip(long count) throws IOException;
private class DecoderStream extends InputStream {
byte[] buffer = new byte[1];
@Override
public long skip(long n) throws IOException {
return maybeSkip(n);
}
@Override
public int read() throws IOException {
int read = maybeReadBytes(buffer, 0, 1);
if (read <= 0) {
return read;
}
return buffer[0] & 0xff;
}
@Override
public int read(byte[] buffer) throws IOException {
return maybeReadBytes(buffer, 0, buffer.length);
}
@Override
public int read(byte[] buffer, int offset, int count) throws IOException {
return maybeReadBytes(buffer, offset, count);
}
}
}

View File

@ -0,0 +1,101 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.OutputStream;
public abstract class AbstractEncoder implements Encoder {
private EncoderStream stream;
@Override
public OutputStream getOutputStream() {
if (stream == null) {
stream = new EncoderStream();
}
return stream;
}
@Override
public void writeBytes(byte[] bytes) throws IOException {
writeBytes(bytes, 0, bytes.length);
}
@Override
public void writeBinary(byte[] bytes) throws IOException {
writeBinary(bytes, 0, bytes.length);
}
@Override
public void writeBinary(byte[] bytes, int offset, int count) throws IOException {
writeSmallInt(count);
writeBytes(bytes, offset, count);
}
@Override
public void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception {
throw new UnsupportedOperationException();
}
@Override
public void writeSmallInt(int value) throws IOException {
writeInt(value);
}
@Override
public void writeSmallLong(long value) throws IOException {
writeLong(value);
}
@Override
public void writeNullableSmallInt(@Nullable Integer value) throws IOException {
if (value == null) {
writeBoolean(false);
} else {
writeBoolean(true);
writeSmallInt(value);
}
}
@Override
public void writeNullableString(@Nullable CharSequence value) throws IOException {
if (value == null) {
writeBoolean(false);
} else {
writeBoolean(true);
writeString(value.toString());
}
}
private class EncoderStream extends OutputStream {
@Override
public void write(byte[] buffer) throws IOException {
writeBytes(buffer);
}
@Override
public void write(byte[] buffer, int offset, int length) throws IOException {
writeBytes(buffer, offset, length);
}
@Override
public void write(int b) throws IOException {
writeByte((byte) b);
}
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import com.google.common.base.Objects;
/**
* This abstract class provide a sensible default implementation for {@code Serializer} equality. This equality
* implementation is required to enable cache instance reuse within the same Gradle runtime. Serializers are used
* as cache parameter which need to be compared to determine compatible cache.
*/
public abstract class AbstractSerializer<T> implements Serializer<T> {
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
return Objects.equal(obj.getClass(), getClass());
}
@Override
public int hashCode() {
return Objects.hashCode(getClass());
}
}

View File

@ -0,0 +1,79 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
public abstract class Cast {
/**
* Casts the given object to the given type, providing a better error message than the default.
*
* The standard {@link Class#cast(Object)} method produces unsatisfactory error messages on some platforms
* when it fails. All this method does is provide a better, consistent, error message.
*
* This should be used whenever there is a chance the cast could fail. If in doubt, use this.
*
* @param outputType The type to cast the input to
* @param object The object to be cast (must not be {@code null})
* @param <O> The type to be cast to
* @param <I> The type of the object to be vast
* @return The input object, cast to the output type
*/
public static <O, I> O cast(Class<O> outputType, I object) {
try {
return outputType.cast(object);
} catch (ClassCastException e) {
throw new ClassCastException(String.format(
"Failed to cast object %s of type %s to target type %s", object, object.getClass().getName(), outputType.getName()
));
}
}
/**
* Casts the given object to the given type, providing a better error message than the default.
*
* The standard {@link Class#cast(Object)} method produces unsatisfactory error messages on some platforms
* when it fails. All this method does is provide a better, consistent, error message.
*
* This should be used whenever there is a chance the cast could fail. If in doubt, use this.
*
* @param outputType The type to cast the input to
* @param object The object to be cast
* @param <O> The type to be cast to
* @param <I> The type of the object to be vast
* @return The input object, cast to the output type
*/
@Nullable
public static <O, I> O castNullable(Class<O> outputType, @Nullable I object) {
if (object == null) {
return null;
}
return cast(outputType, object);
}
@SuppressWarnings("unchecked")
@Nullable
public static <T> T uncheckedCast(@Nullable Object object) {
return (T) object;
}
@SuppressWarnings("unchecked")
public static <T> T uncheckedNonnullCast(Object object) {
return (T) object;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectStreamClass;
public class ClassLoaderObjectInputStream extends ObjectInputStream {
private final ClassLoader loader;
public ClassLoaderObjectInputStream(InputStream in, ClassLoader loader) throws IOException {
super(in);
this.loader = loader;
}
public ClassLoader getClassLoader() {
return loader;
}
@Override
protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException {
try {
return Class.forName(desc.getName(), false, loader);
} catch (ClassNotFoundException e) {
return super.resolveClass(desc);
}
}
}

View File

@ -0,0 +1,140 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Provides a way to decode structured data from a backing byte stream. Implementations may buffer incoming bytes read
* from the backing stream prior to decoding.
*/
public interface Decoder {
/**
* Returns an InputStream which can be used to read raw bytes.
*/
InputStream getInputStream();
/**
* Reads a signed 64 bit long value. Can read any value that was written using {@link Encoder#writeLong(long)}.
*
* @throws EOFException when the end of the byte stream is reached before the long value can be fully read.
*/
long readLong() throws EOFException, IOException;
/**
* Reads a signed 64 bit int value. Can read any value that was written using {@link Encoder#writeSmallLong(long)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
long readSmallLong() throws EOFException, IOException;
/**
* Reads a signed 32 bit int value. Can read any value that was written using {@link Encoder#writeInt(int)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
int readInt() throws EOFException, IOException;
/**
* Reads a signed 32 bit int value. Can read any value that was written using {@link Encoder#writeSmallInt(int)}.
*
* @throws EOFException when the end of the byte stream is reached before the int value can be fully read.
*/
int readSmallInt() throws EOFException, IOException;
/**
* Reads a nullable signed 32 bit int value.
*
* @see #readSmallInt()
*/
@Nullable
Integer readNullableSmallInt() throws EOFException, IOException;
/**
* Reads a boolean value. Can read any value that was written using {@link Encoder#writeBoolean(boolean)}.
*
* @throws EOFException when the end of the byte stream is reached before the boolean value can be fully read.
*/
boolean readBoolean() throws EOFException, IOException;
/**
* Reads a non-null string value. Can read any value that was written using {@link Encoder#writeString(CharSequence)}.
*
* @throws EOFException when the end of the byte stream is reached before the string can be fully read.
*/
String readString() throws EOFException, IOException;
/**
* Reads a nullable string value. Can reads any value that was written using {@link Encoder#writeNullableString(CharSequence)}.
*
* @throws EOFException when the end of the byte stream is reached before the string can be fully read.
*/
@Nullable
String readNullableString() throws EOFException, IOException;
/**
* Reads a byte value. Can read any byte value that was written using one of the raw byte methods on {@link Encoder}, such as {@link Encoder#writeByte(byte)} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached.
*/
byte readByte() throws EOFException, IOException;
/**
* Reads bytes into the given buffer, filling the buffer. Can read any byte values that were written using one of the raw byte methods on {@link Encoder}, such as {@link
* Encoder#writeBytes(byte[])} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached before the buffer is full.
*/
void readBytes(byte[] buffer) throws EOFException, IOException;
/**
* Reads the specified number of bytes into the given buffer. Can read any byte values that were written using one of the raw byte methods on {@link Encoder}, such as {@link
* Encoder#writeBytes(byte[])} or {@link Encoder#getOutputStream()}
*
* @throws EOFException when the end of the byte stream is reached before the specified number of bytes were read.
*/
void readBytes(byte[] buffer, int offset, int count) throws EOFException, IOException;
/**
* Reads a byte array. Can read any byte array written using {@link Encoder#writeBinary(byte[])} or {@link Encoder#writeBinary(byte[], int, int)}.
*
* @throws EOFException when the end of the byte stream is reached before the byte array was fully read.
*/
byte[] readBinary() throws EOFException, IOException;
/**
* Skips the given number of bytes. Can skip over any byte values that were written using one of the raw byte methods on {@link Encoder}.
*/
void skipBytes(long count) throws EOFException, IOException;
/**
* Reads a byte stream written using {@link Encoder#encodeChunked(Encoder.EncodeAction)}.
*/
<T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception;
/**
* Skips over a byte stream written using {@link Encoder#encodeChunked(Encoder.EncodeAction)}, discarding its content.
*/
void skipChunked() throws EOFException, IOException;
interface DecodeAction<IN, OUT> {
OUT read(IN source) throws Exception;
}
}

View File

@ -0,0 +1,73 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import com.google.common.base.Objects;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.StreamCorruptedException;
public class DefaultSerializer<T> extends AbstractSerializer<T> {
private ClassLoader classLoader;
public DefaultSerializer() {
classLoader = getClass().getClassLoader();
}
public DefaultSerializer(ClassLoader classLoader) {
this.classLoader = classLoader != null ? classLoader : getClass().getClassLoader();
}
public ClassLoader getClassLoader() {
return classLoader;
}
public void setClassLoader(ClassLoader classLoader) {
this.classLoader = classLoader;
}
@Override
public T read(Decoder decoder) throws Exception {
try {
return Cast.uncheckedNonnullCast(new ClassLoaderObjectInputStream(decoder.getInputStream(), classLoader).readObject());
} catch (StreamCorruptedException e) {
return null;
}
}
@Override
public void write(Encoder encoder, T value) throws IOException {
ObjectOutputStream objectStr = new ObjectOutputStream(encoder.getOutputStream());
objectStr.writeObject(value);
objectStr.flush();
}
@Override
public boolean equals(Object obj) {
if (!super.equals(obj)) {
return false;
}
DefaultSerializer<?> rhs = (DefaultSerializer<?>) obj;
return Objects.equal(classLoader, rhs.classLoader);
}
@Override
public int hashCode() {
return Objects.hashCode(super.hashCode(), classLoader);
}
}

View File

@ -0,0 +1,110 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.OutputStream;
/**
* Provides a way to encode structured data to a backing byte stream. Implementations may buffer outgoing encoded bytes prior
* to writing to the backing byte stream.
*/
public interface Encoder {
/**
* Returns an {@link OutputStream) that can be used to write raw bytes to the stream.
*/
OutputStream getOutputStream();
/**
* Writes a raw byte value to the stream.
*/
void writeByte(byte value) throws IOException;
/**
* Writes the given raw bytes to the stream. Does not encode any length information.
*/
void writeBytes(byte[] bytes) throws IOException;
/**
* Writes the given raw bytes to the stream. Does not encode any length information.
*/
void writeBytes(byte[] bytes, int offset, int count) throws IOException;
/**
* Writes the given byte array to the stream. Encodes the bytes and length information.
*/
void writeBinary(byte[] bytes) throws IOException;
/**
* Writes the given byte array to the stream. Encodes the bytes and length information.
*/
void writeBinary(byte[] bytes, int offset, int count) throws IOException;
/**
* Appends an encoded stream to this stream. Encodes the stream as a series of chunks with length information.
*/
void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception;
/**
* Writes a signed 64 bit long value. The implementation may encode the value as a variable number of bytes, not necessarily as 8 bytes.
*/
void writeLong(long value) throws IOException;
/**
* Writes a signed 64 bit long value whose value is likely to be small and positive but may not be. The implementation may encode the value in a way that is more efficient for small positive
* values.
*/
void writeSmallLong(long value) throws IOException;
/**
* Writes a signed 32 bit int value. The implementation may encode the value as a variable number of bytes, not necessarily as 4 bytes.
*/
void writeInt(int value) throws IOException;
/**
* Writes a signed 32 bit int value whose value is likely to be small and positive but may not be. The implementation may encode the value in a way that
* is more efficient for small positive values.
*/
void writeSmallInt(int value) throws IOException;
/**
* Writes a nullable signed 32 bit int value whose value is likely to be small and positive but may not be.
*
* @see #writeSmallInt(int)
*/
void writeNullableSmallInt(@Nullable Integer value) throws IOException;
/**
* Writes a boolean value.
*/
void writeBoolean(boolean value) throws IOException;
/**
* Writes a non-null string value.
*/
void writeString(CharSequence value) throws IOException;
/**
* Writes a nullable string value.
*/
void writeNullableString(@Nullable CharSequence value) throws IOException;
interface EncodeAction<T> {
void write(T target) throws Exception;
}
}

View File

@ -0,0 +1,31 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.Flushable;
import java.io.IOException;
/**
* Represents an {@link Encoder} that buffers encoded data prior to writing to the backing stream.
*/
public interface FlushableEncoder extends Encoder, Flushable {
/**
* Ensures that all buffered data has been written to the backing stream. Does not flush the backing stream.
*/
@Override
void flush() throws IOException;
}

View File

@ -0,0 +1,28 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.EOFException;
public interface ObjectReader<T> {
/**
* Reads the next object from the stream.
*
* @throws EOFException When the next object cannot be fully read due to reaching the end of stream.
*/
T read() throws EOFException, Exception;
}

View File

@ -0,0 +1,21 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
public interface ObjectWriter<T> {
void write(T value) throws Exception;
}

View File

@ -0,0 +1,33 @@
/*
* Copyright 2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
import java.io.EOFException;
public interface Serializer<T> {
/**
* Reads the next object from the given stream. The implementation must not perform any buffering, so that it reads only those bytes from the input stream that are
* required to deserialize the next object.
*
* @throws EOFException When the next object cannot be fully read due to reaching the end of stream.
*/
T read(Decoder decoder) throws EOFException, Exception;
/**
* Writes the given object to the given stream. The implementation must not perform any buffering.
*/
void write(Encoder encoder, T value) throws Exception;
}

View File

@ -0,0 +1,33 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize;
/**
* Implementations must allow concurrent reading and writing, so that a thread can read and a thread can write at the same time.
* Implementations do not need to support multiple read threads or multiple write threads.
*/
public interface StatefulSerializer<T> {
/**
* Should not perform any buffering
*/
ObjectReader<T> newReader(Decoder decoder);
/**
* Should not perform any buffering
*/
ObjectWriter<T> newWriter(Encoder encoder);
}

View File

@ -0,0 +1,210 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Input;
import seaweedfs.client.btree.serialize.AbstractDecoder;
import seaweedfs.client.btree.serialize.Decoder;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Note that this decoder uses buffering, so will attempt to read beyond the end of the encoded data. This means you should use this type only when this decoder will be used to decode the entire
* stream.
*/
public class KryoBackedDecoder extends AbstractDecoder implements Decoder, Closeable {
private final Input input;
private final InputStream inputStream;
private long extraSkipped;
private KryoBackedDecoder nested;
public KryoBackedDecoder(InputStream inputStream) {
this(inputStream, 4096);
}
public KryoBackedDecoder(InputStream inputStream, int bufferSize) {
this.inputStream = inputStream;
input = new Input(this.inputStream, bufferSize);
}
@Override
protected int maybeReadBytes(byte[] buffer, int offset, int count) {
return input.read(buffer, offset, count);
}
@Override
protected long maybeSkip(long count) throws IOException {
// Work around some bugs in Input.skip()
int remaining = input.limit() - input.position();
if (remaining == 0) {
long skipped = inputStream.skip(count);
if (skipped > 0) {
extraSkipped += skipped;
}
return skipped;
} else if (count <= remaining) {
input.setPosition(input.position() + (int) count);
return count;
} else {
input.setPosition(input.limit());
return remaining;
}
}
private RuntimeException maybeEndOfStream(KryoException e) throws EOFException {
if (e.getMessage().equals("Buffer underflow.")) {
throw (EOFException) (new EOFException().initCause(e));
}
throw e;
}
@Override
public byte readByte() throws EOFException {
try {
return input.readByte();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void readBytes(byte[] buffer, int offset, int count) throws EOFException {
try {
input.readBytes(buffer, offset, count);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readLong() throws EOFException {
try {
return input.readLong();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readSmallLong() throws EOFException, IOException {
try {
return input.readLong(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readInt() throws EOFException {
try {
return input.readInt();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readSmallInt() throws EOFException {
try {
return input.readInt(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public boolean readBoolean() throws EOFException {
try {
return input.readBoolean();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public String readString() throws EOFException {
return readNullableString();
}
@Override
public String readNullableString() throws EOFException {
try {
return input.readString();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void skipChunked() throws EOFException, IOException {
while (true) {
int count = readSmallInt();
if (count == 0) {
break;
}
skipBytes(count);
}
}
@Override
public <T> T decodeChunked(DecodeAction<Decoder, T> decodeAction) throws EOFException, Exception {
if (nested == null) {
nested = new KryoBackedDecoder(new InputStream() {
@Override
public int read() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int read(byte[] buffer, int offset, int length) throws IOException {
int count = readSmallInt();
if (count == 0) {
// End of stream has been reached
return -1;
}
if (count > length) {
// For now, assume same size buffers used to read and write
throw new UnsupportedOperationException();
}
readBytes(buffer, offset, count);
return count;
}
});
}
T value = decodeAction.read(nested);
if (readSmallInt() != 0) {
throw new IllegalStateException("Expecting the end of nested stream.");
}
return value;
}
/**
* Returns the total number of bytes consumed by this decoder. Some additional bytes may also be buffered by this decoder but have not been consumed.
*/
public long getReadPosition() {
return input.total() + extraSkipped;
}
@Override
public void close() throws IOException {
input.close();
}
}

View File

@ -0,0 +1,134 @@
/*
* Copyright 2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import seaweedfs.client.btree.serialize.AbstractEncoder;
import seaweedfs.client.btree.serialize.Encoder;
import seaweedfs.client.btree.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
public class KryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private final Output output;
private KryoBackedEncoder nested;
public KryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public KryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
@Override
public void writeByte(byte value) {
output.writeByte(value);
}
@Override
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
@Override
public void writeLong(long value) {
output.writeLong(value);
}
@Override
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
@Override
public void writeInt(int value) {
output.writeInt(value);
}
@Override
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
@Override
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
@Override
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
output.writeString(value);
}
@Override
public void writeNullableString(@Nullable CharSequence value) {
output.writeString(value);
}
@Override
public void encodeChunked(EncodeAction<Encoder> writeAction) throws Exception {
if (nested == null) {
nested = new KryoBackedEncoder(new OutputStream() {
@Override
public void write(byte[] buffer, int offset, int length) {
if (length == 0) {
return;
}
writeSmallInt(length);
writeBytes(buffer, offset, length);
}
@Override
public void write(byte[] buffer) throws IOException {
write(buffer, 0, buffer.length);
}
@Override
public void write(int b) {
throw new UnsupportedOperationException();
}
});
}
writeAction.write(nested);
nested.flush();
writeSmallInt(0);
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
@Override
public void flush() {
output.flush();
}
@Override
public void close() {
output.close();
}
}

View File

@ -0,0 +1,188 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Input;
import seaweedfs.client.btree.serialize.AbstractDecoder;
import seaweedfs.client.btree.serialize.Decoder;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Note that this decoder uses buffering, so will attempt to read beyond the end of the encoded data. This means you should use this type only when this decoder will be used to decode the entire
* stream.
*/
public class StringDeduplicatingKryoBackedDecoder extends AbstractDecoder implements Decoder, Closeable {
public static final int INITIAL_CAPACITY = 32;
private final Input input;
private final InputStream inputStream;
private String[] strings;
private long extraSkipped;
public StringDeduplicatingKryoBackedDecoder(InputStream inputStream) {
this(inputStream, 4096);
}
public StringDeduplicatingKryoBackedDecoder(InputStream inputStream, int bufferSize) {
this.inputStream = inputStream;
input = new Input(this.inputStream, bufferSize);
}
@Override
protected int maybeReadBytes(byte[] buffer, int offset, int count) {
return input.read(buffer, offset, count);
}
@Override
protected long maybeSkip(long count) throws IOException {
// Work around some bugs in Input.skip()
int remaining = input.limit() - input.position();
if (remaining == 0) {
long skipped = inputStream.skip(count);
if (skipped > 0) {
extraSkipped += skipped;
}
return skipped;
} else if (count <= remaining) {
input.setPosition(input.position() + (int) count);
return count;
} else {
input.setPosition(input.limit());
return remaining;
}
}
private RuntimeException maybeEndOfStream(KryoException e) throws EOFException {
if (e.getMessage().equals("Buffer underflow.")) {
throw (EOFException) (new EOFException().initCause(e));
}
throw e;
}
@Override
public byte readByte() throws EOFException {
try {
return input.readByte();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public void readBytes(byte[] buffer, int offset, int count) throws EOFException {
try {
input.readBytes(buffer, offset, count);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readLong() throws EOFException {
try {
return input.readLong();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public long readSmallLong() throws EOFException, IOException {
try {
return input.readLong(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readInt() throws EOFException {
try {
return input.readInt();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public int readSmallInt() throws EOFException {
try {
return input.readInt(true);
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public boolean readBoolean() throws EOFException {
try {
return input.readBoolean();
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
@Override
public String readString() throws EOFException {
return readNullableString();
}
@Override
public String readNullableString() throws EOFException {
try {
int idx = readInt();
if (idx == -1) {
return null;
}
if (strings == null) {
strings = new String[INITIAL_CAPACITY];
}
String string = null;
if (idx >= strings.length) {
String[] grow = new String[strings.length * 3 / 2];
System.arraycopy(strings, 0, grow, 0, strings.length);
strings = grow;
} else {
string = strings[idx];
}
if (string == null) {
string = input.readString();
strings[idx] = string;
}
return string;
} catch (KryoException e) {
throw maybeEndOfStream(e);
}
}
/**
* Returns the total number of bytes consumed by this decoder. Some additional bytes may also be buffered by this decoder but have not been consumed.
*/
public long getReadPosition() {
return input.total() + extraSkipped;
}
@Override
public void close() throws IOException {
strings = null;
input.close();
}
}

View File

@ -0,0 +1,128 @@
/*
* Copyright 2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.collect.Maps;
import seaweedfs.client.btree.serialize.AbstractEncoder;
import seaweedfs.client.btree.serialize.FlushableEncoder;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.OutputStream;
import java.util.Map;
public class StringDeduplicatingKryoBackedEncoder extends AbstractEncoder implements FlushableEncoder, Closeable {
private Map<String, Integer> strings;
private final Output output;
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream) {
this(outputStream, 4096);
}
public StringDeduplicatingKryoBackedEncoder(OutputStream outputStream, int bufferSize) {
output = new Output(outputStream, bufferSize);
}
@Override
public void writeByte(byte value) {
output.writeByte(value);
}
@Override
public void writeBytes(byte[] bytes, int offset, int count) {
output.writeBytes(bytes, offset, count);
}
@Override
public void writeLong(long value) {
output.writeLong(value);
}
@Override
public void writeSmallLong(long value) {
output.writeLong(value, true);
}
@Override
public void writeInt(int value) {
output.writeInt(value);
}
@Override
public void writeSmallInt(int value) {
output.writeInt(value, true);
}
@Override
public void writeBoolean(boolean value) {
output.writeBoolean(value);
}
@Override
public void writeString(CharSequence value) {
if (value == null) {
throw new IllegalArgumentException("Cannot encode a null string.");
}
writeNullableString(value);
}
@Override
public void writeNullableString(@Nullable CharSequence value) {
if (value == null) {
output.writeInt(-1);
return;
} else {
if (strings == null) {
strings = Maps.newHashMapWithExpectedSize(1024);
}
}
String key = value.toString();
Integer index = strings.get(key);
if (index == null) {
index = strings.size();
output.writeInt(index);
strings.put(key, index);
output.writeString(key);
} else {
output.writeInt(index);
}
}
/**
* Returns the total number of bytes written by this encoder, some of which may still be buffered.
*/
public long getWritePosition() {
return output.total();
}
@Override
public void flush() {
output.flush();
}
@Override
public void close() {
output.close();
}
public void done() {
strings = null;
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree.serialize.kryo;
import seaweedfs.client.btree.serialize.*;
public class TypeSafeSerializer<T> implements StatefulSerializer<Object> {
private final Class<T> type;
private final StatefulSerializer<T> serializer;
public TypeSafeSerializer(Class<T> type, StatefulSerializer<T> serializer) {
this.type = type;
this.serializer = serializer;
}
@Override
public ObjectReader<Object> newReader(Decoder decoder) {
final ObjectReader<T> reader = serializer.newReader(decoder);
return new ObjectReader<Object>() {
@Override
public Object read() throws Exception {
return reader.read();
}
};
}
@Override
public ObjectWriter<Object> newWriter(Encoder encoder) {
final ObjectWriter<T> writer = serializer.newWriter(encoder);
return new ObjectWriter<Object>() {
@Override
public void write(Object value) throws Exception {
writer.write(type.cast(value));
}
};
}
}

View File

@ -0,0 +1,143 @@
package seaewedfs.mmap;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
public class MmapFileTest {
File dir = new File("/Users/chris/tmp/mm/dev");
@Test
public void testMmap() {
try {
System.out.println("starting ...");
File f = new File(dir, "mmap_file.txt");
RandomAccessFile raf = new RandomAccessFile(f, "rw");
FileChannel fc = raf.getChannel();
MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
fc.close();
raf.close();
FileOutputStream fos = new FileOutputStream(f);
fos.write("abcdefg".getBytes());
fos.close();
System.out.println("completed!");
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void testBigMmap() throws IOException {
/*
// new file
I0817 09:48:02 25175 dir.go:147] create /dev/mmap_big.txt: OpenReadWrite+OpenCreate
I0817 09:48:02 25175 wfs.go:116] AcquireHandle /dev/mmap_big.txt uid=502 gid=20
I0817 09:48:02 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:02 25175 meta_cache_subscribe.go:32] creating /dev/mmap_big.txt
//get channel
I0817 09:48:26 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:32 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:48:32 25175 wfs.go:116] AcquireHandle /dev/mmap_big.txt uid=0 gid=0
I0817 09:48:32 25175 filehandle.go:160] Release /dev/mmap_big.txt fh 14968871991130164560
//fileChannel.map
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 0
I0817 09:49:18 25175 file.go:112] /dev/mmap_big.txt file setattr set size=262144 chunks=0
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:18 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
// buffer.put
I0817 09:49:49 25175 filehandle.go:57] /dev/mmap_big.txt read fh 14968871991130164560: [0,32768) size 32768 resp.Data len=0 cap=32768
I0817 09:49:49 25175 reader_at.go:113] zero2 [0,32768)
I0817 09:49:50 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:49:53 25175 file.go:233] /dev/mmap_big.txt fsync file Fsync [ID=0x4 Node=0xe Uid=0 Gid=0 Pid=0] Handle 0x2 Flags 1
//close
I0817 09:50:14 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:50:14 25175 dirty_page.go:130] saveToStorage /dev/mmap_big.txt 1,315b69812039e5 [0,4096) of 262144 bytes
I0817 09:50:14 25175 file.go:274] /dev/mmap_big.txt existing 0 chunks adds 1 more
I0817 09:50:14 25175 filehandle.go:218] /dev/mmap_big.txt set chunks: 1
I0817 09:50:14 25175 filehandle.go:220] /dev/mmap_big.txt chunks 0: 1,315b69812039e5 [0,4096)
I0817 09:50:14 25175 meta_cache_subscribe.go:23] deleting /dev/mmap_big.txt
I0817 09:50:14 25175 meta_cache_subscribe.go:32] creating /dev/mmap_big.txt
// end of test
I0817 09:50:41 25175 file.go:62] file Attr /dev/mmap_big.txt, open:1, size: 262144
I0817 09:50:41 25175 filehandle.go:160] Release /dev/mmap_big.txt fh 14968871991130164560
*/
// Create file object
File file = new File(dir, "mmap_big.txt");
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw")) {
// Get file channel in read-write mode
FileChannel fileChannel = randomAccessFile.getChannel();
// Get direct byte buffer access using channel.map() operation
MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, 4096 * 8 * 8);
//Write the content using put methods
buffer.put("howtodoinjava.com".getBytes());
}
/*
> meta.cat /dev/mmap_big.txt
{
"name": "mmap_big.txt",
"isDirectory": false,
"chunks": [
{
"fileId": "1,315b69812039e5",
"offset": "0",
"size": "4096",
"mtime": "1597683014026365000",
"eTag": "985ab0ac",
"sourceFileId": "",
"fid": {
"volumeId": 1,
"fileKey": "3234665",
"cookie": 2166372837
},
"sourceFid": null,
"cipherKey": null,
"isCompressed": true,
"isChunkManifest": false
}
],
"attributes": {
"fileSize": "262144",
"mtime": "1597683014",
"fileMode": 420,
"uid": 502,
"gid": 20,
"crtime": "1597682882",
"mime": "application/octet-stream",
"replication": "",
"collection": "",
"ttlSec": 0,
"userName": "",
"groupName": [
],
"symlinkTarget": "",
"md5": null
},
"extended": {
}
}
*/
}
}

View File

@ -0,0 +1,476 @@
/*
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package seaweedfs.client.btree;
import seaweedfs.client.btree.serialize.DefaultSerializer;
import seaweedfs.client.btree.serialize.Serializer;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.CoreMatchers.*;
import static org.junit.Assert.assertNull;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertTrue;
public class BTreePersistentIndexedCacheTest {
private final Serializer<String> stringSerializer = new DefaultSerializer<String>();
private final Serializer<Integer> integerSerializer = new DefaultSerializer<Integer>();
private BTreePersistentIndexedCache<String, Integer> cache;
private File cacheFile;
@Before
public void setup() {
cacheFile = tmpDirFile("cache.bin");
}
public File tmpDirFile(String filename) {
File f = new File("/Users/chris/tmp/mm/dev/btree_test");
// File f = new File("/tmp/btree_test");
f.mkdirs();
return new File(f, filename);
}
private void createCache() {
cache = new BTreePersistentIndexedCache<String, Integer>(cacheFile, stringSerializer, integerSerializer, (short) 4, 100);
}
private void verifyAndCloseCache() {
cache.verify();
cache.close();
}
@Test
public void getReturnsNullWhenEntryDoesNotExist() {
createCache();
assertNull(cache.get("unknown"));
verifyAndCloseCache();
}
@Test
public void persistsAddedEntries() {
createCache();
checkAdds(1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsAddedEntriesInReverseOrder() {
createCache();
checkAdds(5, 4, 3, 2, 1);
verifyAndCloseCache();
}
@Test
public void persistsAddedEntriesOverMultipleIndexBlocks() {
createCache();
checkAdds(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
verifyAndCloseCache();
}
@Test
public void persistsUpdates() {
createCache();
checkUpdates(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
verifyAndCloseCache();
}
@Test
public void handlesUpdatesWhenBlockSizeDecreases() {
BTreePersistentIndexedCache<String, List<Integer>> cache =
new BTreePersistentIndexedCache<String, List<Integer>>(
tmpDirFile("listcache.bin"), stringSerializer,
new DefaultSerializer<List<Integer>>(), (short) 4, 100);
List<Integer> values = Arrays.asList(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
Map<Integer, List<Integer>> updated = new LinkedHashMap<Integer, List<Integer>>();
for (int i = 10; i > 0; i--) {
for (Integer value : values) {
String key = String.format("key_%d", value);
List<Integer> newValue = new ArrayList<Integer>(i);
for (int j = 0; j < i * 2; j++) {
newValue.add(j);
}
cache.put(key, newValue);
updated.put(value, newValue);
}
checkListEntries(cache, updated);
}
cache.reset();
checkListEntries(cache, updated);
cache.verify();
cache.close();
}
private void checkListEntries(BTreePersistentIndexedCache<String, List<Integer>> cache, Map<Integer, List<Integer>> updated) {
for (Map.Entry<Integer, List<Integer>> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
}
@Test
public void handlesUpdatesWhenBlockSizeIncreases() {
BTreePersistentIndexedCache<String, List<Integer>> cache =
new BTreePersistentIndexedCache<String, List<Integer>>(
tmpDirFile("listcache.bin"), stringSerializer,
new DefaultSerializer<List<Integer>>(), (short) 4, 100);
List<Integer> values = Arrays.asList(3, 2, 11, 5, 7, 1, 10, 8, 9, 4, 6, 0);
Map<Integer, List<Integer>> updated = new LinkedHashMap<Integer, List<Integer>>();
for (int i = 1; i < 10; i++) {
for (Integer value : values) {
String key = String.format("key_%d", value);
List<Integer> newValue = new ArrayList<Integer>(i);
for (int j = 0; j < i * 2; j++) {
newValue.add(j);
}
cache.put(key, newValue);
updated.put(value, newValue);
}
checkListEntries(cache, updated);
}
cache.reset();
checkListEntries(cache, updated);
cache.verify();
cache.close();
}
@Test
public void persistsAddedEntriesAfterReopen() {
createCache();
checkAdds(1, 2, 3, 4);
cache.reset();
checkAdds(5, 6, 7, 8);
verifyAndCloseCache();
}
@Test
public void persistsReplacedEntries() {
createCache();
cache.put("key_1", 1);
cache.put("key_2", 2);
cache.put("key_3", 3);
cache.put("key_4", 4);
cache.put("key_5", 5);
cache.put("key_1", 1);
cache.put("key_4", 12);
assertThat(cache.get("key_1"), equalTo(1));
assertThat(cache.get("key_2"), equalTo(2));
assertThat(cache.get("key_3"), equalTo(3));
assertThat(cache.get("key_4"), equalTo(12));
assertThat(cache.get("key_5"), equalTo(5));
cache.reset();
assertThat(cache.get("key_1"), equalTo(1));
assertThat(cache.get("key_2"), equalTo(2));
assertThat(cache.get("key_3"), equalTo(3));
assertThat(cache.get("key_4"), equalTo(12));
assertThat(cache.get("key_5"), equalTo(5));
verifyAndCloseCache();
}
@Test
public void reusesEmptySpaceWhenPuttingEntries() {
BTreePersistentIndexedCache<String, String> cache = new BTreePersistentIndexedCache<String, String>(cacheFile, stringSerializer, stringSerializer, (short) 4, 100);
long beforeLen = cacheFile.length();
if (beforeLen>0){
System.out.println(String.format("cache %s: %s", "key_new", cache.get("key_new")));
}
cache.put("key_1", "abcd");
cache.put("key_2", "abcd");
cache.put("key_3", "abcd");
cache.put("key_4", "abcd");
cache.put("key_5", "abcd");
long len = cacheFile.length();
assertTrue(len > 0L);
System.out.println(String.format("cache file size %d => %d", beforeLen, len));
cache.put("key_1", "1234");
assertThat(cacheFile.length(), equalTo(len));
cache.remove("key_1");
cache.put("key_new", "a1b2");
assertThat(cacheFile.length(), equalTo(len));
cache.put("key_new", "longer value assertThat(cacheFile.length(), equalTo(len))");
System.out.println(String.format("cache file size %d beforeLen %d", cacheFile.length(), len));
// assertTrue(cacheFile.length() > len);
len = cacheFile.length();
cache.put("key_1", "1234");
assertThat(cacheFile.length(), equalTo(len));
cache.close();
}
@Test
public void canHandleLargeNumberOfEntries() {
createCache();
int count = 2000;
List<Integer> values = new ArrayList<Integer>();
for (int i = 0; i < count; i++) {
values.add(i);
}
checkAddsAndRemoves(null, values);
long len = cacheFile.length();
checkAddsAndRemoves(Collections.reverseOrder(), values);
// need to make this better
assertTrue(cacheFile.length() < (long)(1.4 * len));
checkAdds(values);
// need to make this better
assertTrue(cacheFile.length() < (long) (1.4 * 1.4 * len));
cache.close();
}
@Test
public void persistsRemovalOfEntries() {
createCache();
checkAddsAndRemoves(1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsRemovalOfEntriesInReverse() {
createCache();
checkAddsAndRemoves(Collections.<Integer>reverseOrder(), 1, 2, 3, 4, 5);
verifyAndCloseCache();
}
@Test
public void persistsRemovalOfEntriesOverMultipleIndexBlocks() {
createCache();
checkAddsAndRemoves(4, 12, 9, 1, 3, 10, 11, 7, 8, 2, 5, 6);
verifyAndCloseCache();
}
@Test
public void removalRedistributesRemainingEntriesWithLeftSibling() {
createCache();
// Ends up with: 1 2 3 -> 4 <- 5 6
checkAdds(1, 2, 5, 6, 4, 3);
cache.verify();
cache.remove("key_5");
verifyAndCloseCache();
}
@Test
public void removalMergesRemainingEntriesIntoLeftSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5
checkAdds(1, 2, 4, 5, 3);
cache.verify();
cache.remove("key_4");
verifyAndCloseCache();
}
@Test
public void removalRedistributesRemainingEntriesWithRightSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5 6
checkAdds(1, 2, 4, 5, 3, 6);
cache.verify();
cache.remove("key_2");
verifyAndCloseCache();
}
@Test
public void removalMergesRemainingEntriesIntoRightSibling() {
createCache();
// Ends up with: 1 2 -> 3 <- 4 5
checkAdds(1, 2, 4, 5, 3);
cache.verify();
cache.remove("key_2");
verifyAndCloseCache();
}
@Test
public void handlesOpeningATruncatedCacheFile() throws IOException {
BTreePersistentIndexedCache<String, Integer> cache = new BTreePersistentIndexedCache<String, Integer>(cacheFile, stringSerializer, integerSerializer);
assertNull(cache.get("key_1"));
cache.put("key_1", 99);
RandomAccessFile file = new RandomAccessFile(cacheFile, "rw");
file.setLength(file.length() - 10);
file.close();
cache.reset();
assertNull(cache.get("key_1"));
cache.verify();
cache.close();
}
@Test
public void canUseFileAsKey() {
BTreePersistentIndexedCache<File, Integer> cache = new BTreePersistentIndexedCache<File, Integer>(cacheFile, new DefaultSerializer<File>(), integerSerializer);
cache.put(new File("file"), 1);
cache.put(new File("dir/file"), 2);
cache.put(new File("File"), 3);
assertThat(cache.get(new File("file")), equalTo(1));
assertThat(cache.get(new File("dir/file")), equalTo(2));
assertThat(cache.get(new File("File")), equalTo(3));
cache.close();
}
@Test
public void handlesKeysWithSameHashCode() {
createCache();
String key1 = new String(new byte[]{2, 31});
String key2 = new String(new byte[]{1, 62});
cache.put(key1, 1);
cache.put(key2, 2);
assertThat(cache.get(key1), equalTo(1));
assertThat(cache.get(key2), equalTo(2));
cache.close();
}
private void checkAdds(Integer... values) {
checkAdds(Arrays.asList(values));
}
private Map<String, Integer> checkAdds(Iterable<Integer> values) {
Map<String, Integer> added = new LinkedHashMap<String, Integer>();
for (Integer value : values) {
String key = String.format("key_%d", value);
cache.put(key, value);
added.put(String.format("key_%d", value), value);
}
for (Map.Entry<String, Integer> entry : added.entrySet()) {
assertThat(cache.get(entry.getKey()), equalTo(entry.getValue()));
}
cache.reset();
for (Map.Entry<String, Integer> entry : added.entrySet()) {
assertThat(cache.get(entry.getKey()), equalTo(entry.getValue()));
}
return added;
}
private void checkUpdates(Integer... values) {
checkUpdates(Arrays.asList(values));
}
private Map<Integer, Integer> checkUpdates(Iterable<Integer> values) {
Map<Integer, Integer> updated = new LinkedHashMap<Integer, Integer>();
for (int i = 0; i < 10; i++) {
for (Integer value : values) {
String key = String.format("key_%d", value);
int newValue = value + (i * 100);
cache.put(key, newValue);
updated.put(value, newValue);
}
for (Map.Entry<Integer, Integer> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
}
cache.reset();
for (Map.Entry<Integer, Integer> entry : updated.entrySet()) {
String key = String.format("key_%d", entry.getKey());
assertThat(cache.get(key), equalTo(entry.getValue()));
}
return updated;
}
private void checkAddsAndRemoves(Integer... values) {
checkAddsAndRemoves(null, values);
}
private void checkAddsAndRemoves(Comparator<Integer> comparator, Integer... values) {
checkAddsAndRemoves(comparator, Arrays.asList(values));
}
private void checkAddsAndRemoves(Comparator<Integer> comparator, Collection<Integer> values) {
checkAdds(values);
List<Integer> deleteValues = new ArrayList<Integer>(values);
Collections.sort(deleteValues, comparator);
for (Integer value : deleteValues) {
String key = String.format("key_%d", value);
assertThat(cache.get(key), notNullValue());
cache.remove(key);
assertThat(cache.get(key), nullValue());
}
cache.reset();
cache.verify();
for (Integer value : deleteValues) {
String key = String.format("key_%d", value);
assertThat(cache.get(key), nullValue());
}
}
}

View File

@ -61,7 +61,7 @@ func TestCreateBucket(t *testing.T) {
}
func TestListBuckets(t *testing.T) {
func TestPutObject(t *testing.T) {
input := &s3.PutObjectInput{
ACL: aws.String("authenticated-read"),
@ -89,7 +89,7 @@ func TestListBuckets(t *testing.T) {
}
func TestPutObject(t *testing.T) {
func TestListBucket(t *testing.T) {
result, err := svc.ListBuckets(nil)
if err != nil {
@ -105,6 +105,23 @@ func TestPutObject(t *testing.T) {
}
func TestListObjectV2(t *testing.T) {
listObj, err := svc.ListObjectsV2(&s3.ListObjectsV2Input{
Bucket: aws.String(Bucket),
Prefix: aws.String("foo"),
Delimiter: aws.String("/"),
})
if err != nil {
exitErrorf("Unable to list objects, %v", err)
}
for _, content := range listObj.Contents {
fmt.Println(aws.StringValue(content.Key))
}
fmt.Printf("list: %s\n", listObj)
}
func exitErrorf(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, msg+"\n", args...)
os.Exit(1)

View File

@ -118,7 +118,7 @@ const (
type needleState struct {
state uint8
size uint32
size types.Size
}
func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int64, error) {
@ -154,8 +154,8 @@ func getVolumeFiles(v uint32, addr string) (map[types.NeedleId]needleState, int6
var maxOffset int64
files := map[types.NeedleId]needleState{}
err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
if offset.IsZero() || size == types.TombstoneFileSize {
err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
if offset.IsZero() || size.IsDeleted() {
files[key] = needleState{
state: stateDeleted,
size: size,

View File

@ -98,7 +98,7 @@ func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, vis
// parse index file entry
key := util.BytesToUint64(bytes[0:8])
offsetFromIndex := util.BytesToUint32(bytes[8:12])
sizeFromIndex := util.BytesToUint32(bytes[12:16])
sizeFromIndex := types.BytesToSize(bytes[12:16])
count, _ = idxFile.ReadAt(bytes, readerOffset)
readerOffset += int64(count)
@ -123,7 +123,7 @@ func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, vis
}
}()
if n.Size <= n.DataSize {
if n.Size <= types.Size(n.DataSize) {
continue
}
visitNeedle(n, offset)

View File

@ -1,83 +0,0 @@
package main
import (
"bytes"
"compress/gzip"
"crypto/md5"
"flag"
"io"
"io/ioutil"
"net/http"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
"github.com/chrislusf/seaweedfs/weed/util"
)
type VolumeFileScanner4SeeDat struct {
version needle.Version
}
func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error {
scanner.version = superBlock.Version
return nil
}
func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
return true
}
var (
files = int64(0)
filebytes = int64(0)
diffbytes = int64(0)
)
func Compresssion(data []byte) float64 {
if len(data) <= 128 {
return 100.0
}
compressed, _ := util.GzipData(data[0:128])
return float64(len(compressed)*10) / 1280.0
}
func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
t := time.Unix(int64(n.AppendAtNs)/int64(time.Second), int64(n.AppendAtNs)%int64(time.Second))
glog.V(0).Info("----------------------------------------------------------------------------------")
glog.V(0).Infof("%d,%s%x offset %d size %d(%s) cookie %x appendedAt %v hasmime[%t] mime[%s] (len: %d)",
*volumeId, n.Id, n.Cookie, offset, n.Size, util.BytesToHumanReadable(uint64(n.Size)), n.Cookie, t, n.HasMime(), string(n.Mime), len(n.Mime))
r, err := gzip.NewReader(bytes.NewReader(n.Data))
if err == nil {
buf := bytes.Buffer{}
h := md5.New()
c, _ := io.Copy(&buf, r)
d := buf.Bytes()
io.Copy(h, bytes.NewReader(d))
diff := (int64(n.DataSize) - int64(c))
diffbytes += diff
glog.V(0).Infof("was gzip! stored_size: %d orig_size: %d diff: %d(%d) mime:%s compression-of-128: %.2f md5: %x", n.DataSize, c, diff, diffbytes, http.DetectContentType(d), Compresssion(d), h.Sum(nil))
} else {
glog.V(0).Infof("no gzip!")
}
return nil
}
var (
_ = ioutil.ReadAll
volumePath = flag.String("dir", "/tmp", "data directory to store files")
volumeCollection = flag.String("collection", "", "the volume collection name")
volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
)
func main() {
flag.Parse()
vid := needle.VolumeId(*volumeId)
glog.V(0).Info("Starting")
scanner := &VolumeFileScanner4SeeDat{}
err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
if err != nil {
glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
}
}

View File

@ -36,7 +36,7 @@ func main() {
}
defer indexFile.Close()
idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
fmt.Printf("key:%v offset:%v size:%v(%v)\n", key, offset, size, util.BytesToHumanReadable(uint64(size)))
return nil
})

19
weed/Makefile Normal file
View File

@ -0,0 +1,19 @@
BINARY = weed
SOURCE_DIR = .
all: debug_mount
.PHONY : clean debug_mount
clean:
go clean $(SOURCE_DIR)
rm -f $(BINARY)
debug_mount:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- mount -dir=~/tmp/mm
debug_server:
go build -gcflags="all=-N -l"
dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- server -dir=/Volumes/mobile_disk/99 -filer -volume.port=8343 -s3 -volume.max=0

View File

@ -72,9 +72,9 @@ var (
func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) {
key := needle.NewFileIdFromNeedle(vid, n).String()
size := n.DataSize
size := int32(n.DataSize)
if version == needle.Version1 {
size = n.Size
size = int32(n.Size)
}
fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n",
key,
@ -111,7 +111,7 @@ func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset in
nv, ok := needleMap.Get(n.Id)
glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v",
n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv)
if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset {
if ok && nv.Size.IsValid() && nv.Offset.ToAcutalOffset() == offset {
if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
n.LastModified, newerThanUnix)

View File

@ -48,7 +48,7 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool {
func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed())
if n.Size > 0 && n.Size != types.TombstoneFileSize {
if n.Size.IsValid() {
pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size)
glog.V(2).Infof("saved %d with error %v", n.Size, pe)
} else {

View File

@ -96,7 +96,7 @@ func init() {
serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.")
serverOptions.v.readRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.")
serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 256, "limit file size to avoid out of memory")
serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 1024, "limit file size to avoid out of memory")
serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address")
serverOptions.v.pprof = &False

View File

@ -76,7 +76,7 @@ func init() {
v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file")
v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file")
v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 256, "limit file size to avoid out of memory")
v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 1024, "limit file size to avoid out of memory")
v.pprof = cmdVolume.Flag.Bool("pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile")
}

View File

@ -22,6 +22,7 @@ type Attr struct {
GroupNames []string
SymlinkTarget string
Md5 []byte
FileSize uint64
}
func (attr Attr) IsDirectory() bool {
@ -39,7 +40,7 @@ type Entry struct {
}
func (entry *Entry) Size() uint64 {
return TotalSize(entry.Chunks)
return maxUint64(TotalSize(entry.Chunks), entry.FileSize)
}
func (entry *Entry) Timestamp() time.Time {
@ -81,3 +82,10 @@ func FromPbEntry(dir string, entry *filer_pb.Entry) *Entry {
Chunks: entry.Chunks,
}
}
func maxUint64(x, y uint64) uint64 {
if x > y {
return x
}
return y
}

View File

@ -53,6 +53,7 @@ func EntryAttributeToPb(entry *Entry) *filer_pb.FuseAttributes {
GroupName: entry.Attr.GroupNames,
SymlinkTarget: entry.Attr.SymlinkTarget,
Md5: entry.Attr.Md5,
FileSize: entry.Attr.FileSize,
}
}
@ -73,6 +74,7 @@ func PbToEntryAttribute(attr *filer_pb.FuseAttributes) Attr {
t.GroupNames = attr.GroupName
t.SymlinkTarget = attr.SymlinkTarget
t.Md5 = attr.Md5
t.FileSize = attr.FileSize
return t
}

View File

@ -64,7 +64,7 @@ func fetchChunk(lookupFileIdFn LookupFileIdFunctionType, fileId string, cipherKe
return nil, err
}
var buffer bytes.Buffer
err = util.ReadUrlAsStream(urlString, cipherKey, isGzipped, true, 0, 0, func(data []byte) {
err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, true, 0, 0, func(data []byte) {
buffer.Write(data)
})
if err != nil {

View File

@ -20,6 +20,10 @@ func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) {
return
}
func FileSize(entry *filer_pb.Entry) (size uint64) {
return maxUint64(TotalSize(entry.Chunks), entry.Attributes.FileSize)
}
func ETag(entry *filer_pb.Entry) (etag string) {
if entry.Attributes == nil || entry.Attributes.Md5 == nil {
return ETagChunks(entry.Chunks)
@ -100,7 +104,7 @@ type ChunkView struct {
FileId string
Offset int64
Size uint64
LogicOffset int64
LogicOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk
ChunkSize uint64
CipherKey []byte
IsGzipped bool
@ -130,17 +134,18 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
for _, chunk := range visibles {
if chunk.start <= offset && offset < chunk.stop && offset < stop {
chunkStart, chunkStop := max(offset, chunk.start), min(stop, chunk.stop)
if chunkStart < chunkStop {
views = append(views, &ChunkView{
FileId: chunk.fileId,
Offset: offset - chunk.start, // offset is the data starting location in this file id
Size: uint64(min(chunk.stop, stop) - offset),
LogicOffset: offset,
Offset: chunkStart - chunk.start + chunk.chunkOffset,
Size: uint64(chunkStop - chunkStart),
LogicOffset: chunkStart,
ChunkSize: chunk.chunkSize,
CipherKey: chunk.cipherKey,
IsGzipped: chunk.isGzipped,
})
offset = min(chunk.stop, stop)
}
}
@ -149,10 +154,11 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
}
func logPrintf(name string, visibles []VisibleInterval) {
/*
log.Printf("%s len %d", name, len(visibles))
glog.V(0).Infof("%s len %d", name, len(visibles))
for _, v := range visibles {
log.Printf("%s: => %+v", name, v)
glog.V(0).Infof("%s: [%d,%d)", name, v.start, v.stop)
}
*/
}
@ -165,7 +171,7 @@ var bufPool = sync.Pool{
func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.FileChunk) []VisibleInterval {
newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, chunk.Size, chunk.CipherKey, chunk.IsCompressed)
newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, 0, chunk.Size, chunk.CipherKey, chunk.IsCompressed)
length := len(visibles)
if length == 0 {
@ -177,13 +183,13 @@ func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.
}
logPrintf(" before", visibles)
chunkStop := chunk.Offset + int64(chunk.Size)
for _, v := range visibles {
if v.start < chunk.Offset && chunk.Offset < v.stop {
newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, chunk.Size, v.cipherKey, v.isGzipped))
newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, v.chunkOffset, v.chunkSize, v.cipherKey, v.isGzipped))
}
chunkStop := chunk.Offset + int64(chunk.Size)
if v.start < chunkStop && chunkStop < v.stop {
newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, chunk.Size, v.cipherKey, v.isGzipped))
newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, v.chunkOffset+(chunkStop-v.start), v.chunkSize, v.cipherKey, v.isGzipped))
}
if chunkStop <= v.start || v.stop <= chunk.Offset {
newVisibles = append(newVisibles, v)
@ -219,6 +225,7 @@ func NonOverlappingVisibleIntervals(lookupFileIdFn LookupFileIdFunctionType, chu
var newVisibles []VisibleInterval
for _, chunk := range chunks {
// glog.V(0).Infof("merge [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
newVisibles = MergeIntoVisibles(visibles, newVisibles, chunk)
t := visibles[:0]
visibles = newVisibles
@ -239,17 +246,19 @@ type VisibleInterval struct {
stop int64
modifiedTime int64
fileId string
chunkOffset int64
chunkSize uint64
cipherKey []byte
isGzipped bool
}
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkOffset int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
return VisibleInterval{
start: start,
stop: stop,
fileId: fileId,
modifiedTime: modifiedTime,
chunkOffset: chunkOffset, // the starting position in the chunk
chunkSize: chunkSize,
cipherKey: cipherKey,
isGzipped: isGzipped,
@ -262,3 +271,9 @@ func min(x, y int64) int64 {
}
return y
}
func max(x, y int64) int64 {
if x <= y {
return y
}
return x
}

View File

@ -1,10 +1,13 @@
package filer2
import (
"fmt"
"log"
"math"
"testing"
"fmt"
"github.com/stretchr/testify/assert"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
)
@ -91,12 +94,12 @@ func TestIntervalMerging(t *testing.T) {
// case 2: updates overwrite part of previous chunks
{
Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 50, FileId: "asdf", Mtime: 134},
{Offset: 0, Size: 100, FileId: "a", Mtime: 123},
{Offset: 0, Size: 70, FileId: "b", Mtime: 134},
},
Expected: []*VisibleInterval{
{start: 0, stop: 50, fileId: "asdf"},
{start: 50, stop: 100, fileId: "abc"},
{start: 0, stop: 70, fileId: "b"},
{start: 70, stop: 100, fileId: "a", chunkOffset: 70},
},
},
// case 3: updates overwrite full chunks
@ -126,14 +129,14 @@ func TestIntervalMerging(t *testing.T) {
// case 5: updates overwrite full chunks
{
Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 184},
{Offset: 70, Size: 150, FileId: "abc", Mtime: 143},
{Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134},
{Offset: 0, Size: 100, FileId: "a", Mtime: 123},
{Offset: 0, Size: 200, FileId: "d", Mtime: 184},
{Offset: 70, Size: 150, FileId: "c", Mtime: 143},
{Offset: 80, Size: 100, FileId: "b", Mtime: 134},
},
Expected: []*VisibleInterval{
{start: 0, stop: 200, fileId: "asdf"},
{start: 200, stop: 220, fileId: "abc"},
{start: 0, stop: 200, fileId: "d"},
{start: 200, stop: 220, fileId: "c", chunkOffset: 130},
},
},
// case 6: same updates
@ -204,6 +207,10 @@ func TestIntervalMerging(t *testing.T) {
t.Fatalf("failed on test case %d, interval %d, chunkId %s, expect %s",
i, x, interval.fileId, testcase.Expected[x].fileId)
}
if interval.chunkOffset != testcase.Expected[x].chunkOffset {
t.Fatalf("failed on test case %d, interval %d, chunkOffset %d, expect %d",
i, x, interval.chunkOffset, testcase.Expected[x].chunkOffset)
}
}
if len(intervals) != len(testcase.Expected) {
t.Fatalf("failed to compact test case %d, len %d expected %d", i, len(intervals), len(testcase.Expected))
@ -251,14 +258,14 @@ func TestChunksReading(t *testing.T) {
// case 2: updates overwrite part of previous chunks
{
Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 50, FileId: "asdf", Mtime: 134},
{Offset: 3, Size: 100, FileId: "a", Mtime: 123},
{Offset: 10, Size: 50, FileId: "b", Mtime: 134},
},
Offset: 25,
Size: 50,
Offset: 30,
Size: 40,
Expected: []*ChunkView{
{Offset: 25, Size: 25, FileId: "asdf", LogicOffset: 25},
{Offset: 0, Size: 25, FileId: "abc", LogicOffset: 50},
{Offset: 20, Size: 30, FileId: "b", LogicOffset: 30},
{Offset: 57, Size: 10, FileId: "a", LogicOffset: 60},
},
},
// case 3: updates overwrite full chunks
@ -286,22 +293,22 @@ func TestChunksReading(t *testing.T) {
Size: 400,
Expected: []*ChunkView{
{Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0},
// {Offset: 0, Size: 150, FileId: "xxxx"}, // missing intervals should not happen
{Offset: 0, Size: 150, FileId: "xxxx", LogicOffset: 250},
},
},
// case 5: updates overwrite full chunks
{
Chunks: []*filer_pb.FileChunk{
{Offset: 0, Size: 100, FileId: "abc", Mtime: 123},
{Offset: 0, Size: 200, FileId: "asdf", Mtime: 184},
{Offset: 70, Size: 150, FileId: "abc", Mtime: 143},
{Offset: 0, Size: 100, FileId: "a", Mtime: 123},
{Offset: 0, Size: 200, FileId: "c", Mtime: 184},
{Offset: 70, Size: 150, FileId: "b", Mtime: 143},
{Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134},
},
Offset: 0,
Size: 220,
Expected: []*ChunkView{
{Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0},
{Offset: 0, Size: 20, FileId: "abc", LogicOffset: 200},
{Offset: 0, Size: 200, FileId: "c", LogicOffset: 0},
{Offset: 130, Size: 20, FileId: "b", LogicOffset: 200},
},
},
// case 6: same updates
@ -370,18 +377,21 @@ func TestChunksReading(t *testing.T) {
}
for i, testcase := range testcases {
if i != 2 {
// continue
}
log.Printf("++++++++++ read test case %d ++++++++++++++++++++", i)
chunks := ViewFromChunks(nil, testcase.Chunks, testcase.Offset, testcase.Size)
for x, chunk := range chunks {
log.Printf("read case %d, chunk %d, offset=%d, size=%d, fileId=%s",
i, x, chunk.Offset, chunk.Size, chunk.FileId)
if chunk.Offset != testcase.Expected[x].Offset {
t.Fatalf("failed on read case %d, chunk %d, Offset %d, expect %d",
i, x, chunk.Offset, testcase.Expected[x].Offset)
t.Fatalf("failed on read case %d, chunk %s, Offset %d, expect %d",
i, chunk.FileId, chunk.Offset, testcase.Expected[x].Offset)
}
if chunk.Size != testcase.Expected[x].Size {
t.Fatalf("failed on read case %d, chunk %d, Size %d, expect %d",
i, x, chunk.Size, testcase.Expected[x].Size)
t.Fatalf("failed on read case %d, chunk %s, Size %d, expect %d",
i, chunk.FileId, chunk.Size, testcase.Expected[x].Size)
}
if chunk.FileId != testcase.Expected[x].FileId {
t.Fatalf("failed on read case %d, chunk %d, FileId %s, expect %s",
@ -418,3 +428,74 @@ func BenchmarkCompactFileChunks(b *testing.B) {
CompactFileChunks(nil, chunks)
}
}
func TestViewFromVisibleIntervals(t *testing.T) {
visibles := []VisibleInterval{
{
start: 0,
stop: 25,
fileId: "fid1",
},
{
start: 4096,
stop: 8192,
fileId: "fid2",
},
{
start: 16384,
stop: 18551,
fileId: "fid3",
},
}
views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32)
if len(views) != len(visibles) {
assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
}
}
func TestViewFromVisibleIntervals2(t *testing.T) {
visibles := []VisibleInterval{
{
start: 344064,
stop: 348160,
fileId: "fid1",
},
{
start: 348160,
stop: 356352,
fileId: "fid2",
},
}
views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32)
if len(views) != len(visibles) {
assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
}
}
func TestViewFromVisibleIntervals3(t *testing.T) {
visibles := []VisibleInterval{
{
start: 1000,
stop: 2000,
fileId: "fid1",
},
{
start: 3000,
stop: 4000,
fileId: "fid2",
},
}
views := ViewFromVisibleIntervals(visibles, 1700, 1500)
if len(views) != len(visibles) {
assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error")
}
}

View File

@ -9,8 +9,6 @@ import (
"google.golang.org/grpc"
"github.com/karlseguin/ccache"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
@ -27,7 +25,6 @@ var (
type Filer struct {
Store *FilerStoreWrapper
directoryCache *ccache.Cache
MasterClient *wdclient.MasterClient
fileIdDeletionQueue *util.UnboundedQueue
GrpcDialOption grpc.DialOption
@ -44,7 +41,6 @@ type Filer struct {
func NewFiler(masters []string, grpcDialOption grpc.DialOption,
filerHost string, filerGrpcPort uint32, collection string, replication string, notifyFn func()) *Filer {
f := &Filer{
directoryCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(100)),
MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerHost, filerGrpcPort, masters),
fileIdDeletionQueue: util.NewUnboundedQueue(),
GrpcDialOption: grpcDialOption,
@ -77,10 +73,6 @@ func (f *Filer) GetStore() (store FilerStore) {
return f.Store
}
func (f *Filer) DisableDirectoryCache() {
f.directoryCache = nil
}
func (fs *Filer) GetMaster() string {
return fs.MasterClient.GetMaster()
}
@ -117,16 +109,9 @@ func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFr
dirPath := "/" + util.Join(dirParts[:i]...)
// fmt.Printf("%d directory: %+v\n", i, dirPath)
// first check local cache
dirEntry := f.cacheGetDirectory(dirPath)
// not found, check the store directly
if dirEntry == nil {
glog.V(4).Infof("find uncached directory: %s", dirPath)
dirEntry, _ = f.FindEntry(ctx, util.FullPath(dirPath))
} else {
// glog.V(4).Infof("found cached directory: %s", dirPath)
}
// check the store directly
glog.V(4).Infof("find uncached directory: %s", dirPath)
dirEntry, _ := f.FindEntry(ctx, util.FullPath(dirPath))
// no such existing directory
if dirEntry == nil {
@ -166,9 +151,6 @@ func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFr
return fmt.Errorf("%s is a file", dirPath)
}
// cache the directory entry
f.cacheSetDirectory(dirPath, dirEntry, i)
// remember the direct parent directory entry
if i == len(dirParts)-1 {
lastDirectoryEntry = dirEntry
@ -295,45 +277,6 @@ func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, sta
return
}
func (f *Filer) cacheDelDirectory(dirpath string) {
if dirpath == "/" {
return
}
if f.directoryCache == nil {
return
}
f.directoryCache.Delete(dirpath)
return
}
func (f *Filer) cacheGetDirectory(dirpath string) *Entry {
if f.directoryCache == nil {
return nil
}
item := f.directoryCache.Get(dirpath)
if item == nil {
return nil
}
return item.Value().(*Entry)
}
func (f *Filer) cacheSetDirectory(dirpath string, dirEntry *Entry, level int) {
if f.directoryCache == nil {
return
}
minutes := 60
if level < 10 {
minutes -= level * 6
}
f.directoryCache.Set(dirpath, dirEntry, time.Duration(minutes)*time.Minute)
}
func (f *Filer) Shutdown() {
f.LocalMetaLogBuffer.Shutdown()
f.Store.Shutdown()

View File

@ -65,6 +65,7 @@ func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry
}
if lastFileName == "" && !isRecursive && len(entries) > 0 {
// only for first iteration in the loop
glog.Errorf("deleting a folder %s has children: %+v ...", entry.FullPath, entries[0].Name())
return nil, fmt.Errorf("fail to delete non-empty folder: %s", entry.FullPath)
}
@ -73,7 +74,6 @@ func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry
var dirChunks []*filer_pb.FileChunk
if sub.IsDirectory() {
dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks, false)
f.cacheDelDirectory(string(sub.FullPath))
chunks = append(chunks, dirChunks...)
} else {
f.NotifyUpdateEvent(ctx, sub, nil, shouldDeleteChunks, isFromOtherCluster)
@ -107,9 +107,7 @@ func (f *Filer) doDeleteEntryMetaAndData(ctx context.Context, entry *Entry, shou
if storeDeletionErr := f.Store.DeleteEntry(ctx, entry.FullPath); storeDeletionErr != nil {
return fmt.Errorf("filer store delete: %v", storeDeletionErr)
}
if entry.IsDirectory() {
f.cacheDelDirectory(string(entry.FullPath))
} else {
if !entry.IsDirectory() {
f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster)
}

View File

@ -1,6 +1,7 @@
package filer2
import (
"strings"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
@ -50,15 +51,14 @@ func (f *Filer) loopProcessingDeletion() {
fileIds = fileIds[:0]
}
deletionCount = len(toDeleteFileIds)
deleteResults, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc)
_, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc)
if err != nil {
glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err)
if !strings.Contains(err.Error(), "already deleted") {
glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err)
}
} else {
glog.V(1).Infof("deleting fileIds len=%d", deletionCount)
}
if len(deleteResults) != deletionCount {
glog.V(0).Infof("delete %d fileIds actual %d", deletionCount, len(deleteResults))
}
}
})

View File

@ -17,7 +17,6 @@ func TestCreateAndFind(t *testing.T) {
store := &LevelDBStore{}
store.initialize(dir)
filer.SetStore(store)
filer.DisableDirectoryCache()
fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg")
@ -72,7 +71,6 @@ func TestEmptyRoot(t *testing.T) {
store := &LevelDBStore{}
store.initialize(dir)
filer.SetStore(store)
filer.DisableDirectoryCache()
ctx := context.Background()

View File

@ -17,7 +17,6 @@ func TestCreateAndFind(t *testing.T) {
store := &LevelDB2Store{}
store.initialize(dir, 2)
filer.SetStore(store)
filer.DisableDirectoryCache()
fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg")
@ -72,7 +71,6 @@ func TestEmptyRoot(t *testing.T) {
store := &LevelDB2Store{}
store.initialize(dir, 2)
filer.SetStore(store)
filer.DisableDirectoryCache()
ctx := context.Background()

View File

@ -15,12 +15,11 @@ import (
type ChunkReadAt struct {
masterClient *wdclient.MasterClient
chunkViews []*ChunkView
buffer []byte
bufferOffset int64
lookupFileId func(fileId string) (targetUrl string, err error)
readerLock sync.Mutex
fileSize int64
chunkCache *chunk_cache.ChunkCache
chunkCache chunk_cache.ChunkCache
}
// var _ = io.ReaderAt(&ChunkReadAt{})
@ -54,13 +53,13 @@ func LookupFn(filerClient filer_pb.FilerClient) LookupFileIdFunctionType {
}
}
func NewChunkReaderAtFromClient(filerClient filer_pb.FilerClient, chunkViews []*ChunkView, chunkCache *chunk_cache.ChunkCache) *ChunkReadAt {
func NewChunkReaderAtFromClient(filerClient filer_pb.FilerClient, chunkViews []*ChunkView, chunkCache chunk_cache.ChunkCache, fileSize int64) *ChunkReadAt {
return &ChunkReadAt{
chunkViews: chunkViews,
lookupFileId: LookupFn(filerClient),
bufferOffset: -1,
chunkCache: chunkCache,
fileSize: fileSize,
}
}
@ -69,75 +68,78 @@ func (c *ChunkReadAt) ReadAt(p []byte, offset int64) (n int, err error) {
c.readerLock.Lock()
defer c.readerLock.Unlock()
for n < len(p) && err == nil {
readCount, readErr := c.doReadAt(p[n:], offset+int64(n))
n += readCount
err = readErr
if readCount == 0 {
return n, io.EOF
}
}
return
glog.V(4).Infof("ReadAt [%d,%d) of total file size %d bytes %d chunk views", offset, offset+int64(len(p)), c.fileSize, len(c.chunkViews))
return c.doReadAt(p[n:], offset+int64(n))
}
func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) {
var found bool
for _, chunk := range c.chunkViews {
if chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size) {
found = true
if c.bufferOffset != chunk.LogicOffset {
c.buffer, err = c.fetchChunkData(chunk)
if err != nil {
glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
}
c.bufferOffset = chunk.LogicOffset
}
var buffer []byte
startOffset, remaining := offset, int64(len(p))
for i, chunk := range c.chunkViews {
if remaining <= 0 {
break
}
}
if !found {
return 0, io.EOF
if startOffset < chunk.LogicOffset {
gap := int(chunk.LogicOffset - startOffset)
glog.V(4).Infof("zero [%d,%d)", startOffset, startOffset+int64(gap))
n += int(min(int64(gap), remaining))
startOffset, remaining = chunk.LogicOffset, remaining-int64(gap)
if remaining <= 0 {
break
}
}
// fmt.Printf(">>> doReadAt [%d,%d), chunk[%d,%d)\n", offset, offset+int64(len(p)), chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
chunkStart, chunkStop := max(chunk.LogicOffset, startOffset), min(chunk.LogicOffset+int64(chunk.Size), startOffset+remaining)
if chunkStart >= chunkStop {
continue
}
glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.LogicOffset-chunk.Offset, chunk.LogicOffset-chunk.Offset+int64(chunk.Size))
buffer, err = c.readFromWholeChunkData(chunk)
if err != nil {
glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
return
}
bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset
copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer[bufferOffset:bufferOffset+chunkStop-chunkStart])
n += copied
startOffset, remaining = startOffset+int64(copied), remaining-int64(copied)
}
if err == nil {
n = copy(p, c.buffer[offset-c.bufferOffset:])
glog.V(4).Infof("doReadAt [%d,%d), n:%v, err:%v", offset, offset+int64(len(p)), n, err)
if err == nil && remaining > 0 && c.fileSize > startOffset {
delta := int(min(remaining, c.fileSize - startOffset))
glog.V(4).Infof("zero2 [%d,%d) of file size %d bytes", startOffset, startOffset+int64(delta), c.fileSize)
n += delta
}
// fmt.Printf("> doReadAt [%d,%d), buffer:[%d,%d)\n", offset, offset+int64(n), c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
if err == nil && offset+int64(len(p)) > c.fileSize {
err = io.EOF
}
// fmt.Printf("~~~ filled %d, err: %v\n\n", n, err)
return
}
func (c *ChunkReadAt) fetchChunkData(chunkView *ChunkView) (data []byte, err error) {
func (c *ChunkReadAt) readFromWholeChunkData(chunkView *ChunkView) (chunkData []byte, err error) {
glog.V(4).Infof("fetchChunkData %s [%d,%d)\n", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
glog.V(4).Infof("readFromWholeChunkData %s offset %d [%d,%d) size at least %d", chunkView.FileId, chunkView.Offset, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size), chunkView.ChunkSize)
hasDataInCache := false
chunkData := c.chunkCache.GetChunk(chunkView.FileId, chunkView.ChunkSize)
chunkData = c.chunkCache.GetChunk(chunkView.FileId, chunkView.ChunkSize)
if chunkData != nil {
glog.V(3).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
hasDataInCache = true
glog.V(5).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset-chunkView.Offset, chunkView.LogicOffset-chunkView.Offset+int64(len(chunkData)))
} else {
glog.V(4).Infof("doFetchFullChunkData %s", chunkView.FileId)
chunkData, err = c.doFetchFullChunkData(chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped)
if err != nil {
return nil, err
return
}
}
if int64(len(chunkData)) < chunkView.Offset+int64(chunkView.Size) {
glog.Errorf("unexpected larger cached:%v chunk %s [%d,%d) than %d", hasDataInCache, chunkView.FileId, chunkView.Offset, chunkView.Offset+int64(chunkView.Size), len(chunkData))
return nil, fmt.Errorf("unexpected larger cached:%v chunk %s [%d,%d) than %d", hasDataInCache, chunkView.FileId, chunkView.Offset, chunkView.Offset+int64(chunkView.Size), len(chunkData))
}
data = chunkData[chunkView.Offset : chunkView.Offset+int64(chunkView.Size)]
if !hasDataInCache {
c.chunkCache.SetChunk(chunkView.FileId, chunkData)
}
return data, nil
return
}
func (c *ChunkReadAt) doFetchFullChunkData(fileId string, cipherKey []byte, isGzipped bool) ([]byte, error) {

View File

@ -0,0 +1,156 @@
package filer2
import (
"fmt"
"io"
"math"
"strconv"
"sync"
"testing"
)
type mockChunkCache struct {
}
func (m *mockChunkCache) GetChunk(fileId string, minSize uint64) (data []byte) {
x, _ := strconv.Atoi(fileId)
data = make([]byte, minSize)
for i := 0; i < int(minSize); i++ {
data[i] = byte(x)
}
return data
}
func (m *mockChunkCache) SetChunk(fileId string, data []byte) {
}
func TestReaderAt(t *testing.T) {
visibles := []VisibleInterval{
{
start: 1,
stop: 2,
fileId: "1",
chunkSize: 9,
},
{
start: 3,
stop: 4,
fileId: "3",
chunkSize: 1,
},
{
start: 5,
stop: 6,
fileId: "5",
chunkSize: 2,
},
{
start: 7,
stop: 9,
fileId: "7",
chunkSize: 2,
},
{
start: 9,
stop: 10,
fileId: "9",
chunkSize: 2,
},
}
readerAt := &ChunkReadAt{
chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64),
lookupFileId: nil,
readerLock: sync.Mutex{},
fileSize: 10,
chunkCache: &mockChunkCache{},
}
testReadAt(t, readerAt, 0, 10, 10, nil)
testReadAt(t, readerAt, 0, 12, 10, io.EOF)
testReadAt(t, readerAt, 2, 8, 8, nil)
testReadAt(t, readerAt, 3, 6, 6, nil)
}
func testReadAt(t *testing.T, readerAt *ChunkReadAt, offset int64, size int, expected int, expectedErr error) {
data := make([]byte, size)
n, err := readerAt.ReadAt(data, offset)
for _, d := range data {
fmt.Printf("%x", d)
}
fmt.Println()
if expected != n {
t.Errorf("unexpected read size: %d, expect: %d", n, expected)
}
if err != expectedErr {
t.Errorf("unexpected read error: %v, expect: %v", err, expectedErr)
}
}
func TestReaderAt0(t *testing.T) {
visibles := []VisibleInterval{
{
start: 2,
stop: 5,
fileId: "1",
chunkSize: 9,
},
{
start: 7,
stop: 9,
fileId: "2",
chunkSize: 9,
},
}
readerAt := &ChunkReadAt{
chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64),
lookupFileId: nil,
readerLock: sync.Mutex{},
fileSize: 10,
chunkCache: &mockChunkCache{},
}
testReadAt(t, readerAt, 0, 10, 10, nil)
testReadAt(t, readerAt, 3, 16, 7, io.EOF)
testReadAt(t, readerAt, 3, 5, 5, nil)
testReadAt(t, readerAt, 11, 5, 0, io.EOF)
testReadAt(t, readerAt, 10, 5, 0, io.EOF)
}
func TestReaderAt1(t *testing.T) {
visibles := []VisibleInterval{
{
start: 2,
stop: 5,
fileId: "1",
chunkSize: 9,
},
}
readerAt := &ChunkReadAt{
chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64),
lookupFileId: nil,
readerLock: sync.Mutex{},
fileSize: 20,
chunkCache: &mockChunkCache{},
}
testReadAt(t, readerAt, 0, 20, 20, nil)
testReadAt(t, readerAt, 1, 7, 7, nil)
testReadAt(t, readerAt, 0, 1, 1, nil)
testReadAt(t, readerAt, 18, 4, 2, io.EOF)
testReadAt(t, readerAt, 12, 4, 4, nil)
testReadAt(t, readerAt, 4, 20, 16, io.EOF)
testReadAt(t, readerAt, 4, 10, 10, nil)
testReadAt(t, readerAt, 1, 10, 10, nil)
}

View File

@ -32,7 +32,7 @@ func StreamContent(masterClient *wdclient.MasterClient, w io.Writer, chunks []*f
for _, chunkView := range chunkViews {
urlString := fileId2Url[chunkView.FileId]
err := util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
err := util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
w.Write(data)
})
if err != nil {
@ -63,7 +63,7 @@ func ReadAll(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk)
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return nil, err
}
err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data)
})
if err != nil {
@ -175,7 +175,7 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
return err
}
var buffer bytes.Buffer
err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data)
})
if err != nil {

View File

@ -63,7 +63,7 @@ func (dir *Dir) Attr(ctx context.Context, attr *fuse.Attr) error {
attr.Gid = dir.entry.Attributes.Gid
attr.Uid = dir.entry.Attributes.Uid
glog.V(4).Infof("dir Attr %s, attr: %+v", dir.FullPath(), attr)
glog.V(5).Infof("dir Attr %s, attr: %+v", dir.FullPath(), attr)
return nil
}
@ -101,7 +101,7 @@ func (dir *Dir) Fsync(ctx context.Context, req *fuse.FsyncRequest) error {
}
func (dir *Dir) newFile(name string, entry *filer_pb.Entry) fs.Node {
return dir.wfs.fsNodeCache.EnsureFsNode(util.NewFullPath(dir.FullPath(), name), func() fs.Node {
f := dir.wfs.fsNodeCache.EnsureFsNode(util.NewFullPath(dir.FullPath(), name), func() fs.Node {
return &File{
Name: name,
dir: dir,
@ -110,14 +110,17 @@ func (dir *Dir) newFile(name string, entry *filer_pb.Entry) fs.Node {
entryViewCache: nil,
}
})
f.(*File).dir = dir // in case dir node was created later
return f
}
func (dir *Dir) newDirectory(fullpath util.FullPath, entry *filer_pb.Entry) fs.Node {
return dir.wfs.fsNodeCache.EnsureFsNode(fullpath, func() fs.Node {
d := dir.wfs.fsNodeCache.EnsureFsNode(fullpath, func() fs.Node {
return &Dir{name: entry.Name, wfs: dir.wfs, entry: entry, parent: dir}
})
d.(*Dir).parent = dir // in case dir node was created later
return d
}
func (dir *Dir) Create(ctx context.Context, req *fuse.CreateRequest,
@ -218,7 +221,7 @@ func (dir *Dir) Mkdir(ctx context.Context, req *fuse.MkdirRequest) (fs.Node, err
func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.LookupResponse) (node fs.Node, err error) {
glog.V(4).Infof("dir Lookup %s: %s by %s", dir.FullPath(), req.Name, req.Header.String())
glog.V(5).Infof("dir Lookup %s: %s by %s", dir.FullPath(), req.Name, req.Header.String())
fullFilePath := util.NewFullPath(dir.FullPath(), req.Name)
dirPath := util.FullPath(dir.FullPath())
@ -237,7 +240,7 @@ func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.
return nil, fuse.ENOENT
}
} else {
glog.V(4).Infof("dir Lookup cache hit %s", fullFilePath)
glog.V(5).Infof("dir Lookup cache hit %s", fullFilePath)
}
if entry != nil {
@ -265,7 +268,7 @@ func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.
func (dir *Dir) ReadDirAll(ctx context.Context) (ret []fuse.Dirent, err error) {
glog.V(3).Infof("dir ReadDirAll %s", dir.FullPath())
glog.V(5).Infof("dir ReadDirAll %s", dir.FullPath())
processEachEntryFn := func(entry *filer_pb.Entry, isLast bool) error {
fullpath := util.NewFullPath(dir.FullPath(), entry.Name)
@ -314,12 +317,8 @@ func (dir *Dir) removeOneFile(req *fuse.RemoveRequest) error {
return nil
}
dir.wfs.deleteFileChunks(entry.Chunks)
dir.wfs.fsNodeCache.DeleteFsNode(filePath)
dir.wfs.metaCache.DeleteEntry(context.Background(), filePath)
// first, ensure the filer store can correctly delete
glog.V(3).Infof("remove file: %v", req)
err = filer_pb.Remove(dir.wfs, dir.FullPath(), req.Name, false, false, false, false)
if err != nil {
@ -327,34 +326,40 @@ func (dir *Dir) removeOneFile(req *fuse.RemoveRequest) error {
return fuse.ENOENT
}
// then, delete meta cache and fsNode cache
dir.wfs.metaCache.DeleteEntry(context.Background(), filePath)
dir.wfs.fsNodeCache.DeleteFsNode(filePath)
// delete the chunks last
dir.wfs.deleteFileChunks(entry.Chunks)
return nil
}
func (dir *Dir) removeFolder(req *fuse.RemoveRequest) error {
t := util.NewFullPath(dir.FullPath(), req.Name)
dir.wfs.fsNodeCache.DeleteFsNode(t)
dir.wfs.metaCache.DeleteEntry(context.Background(), t)
glog.V(3).Infof("remove directory entry: %v", req)
err := filer_pb.Remove(dir.wfs, dir.FullPath(), req.Name, true, false, false, false)
if err != nil {
glog.V(3).Infof("remove %s/%s: %v", dir.FullPath(), req.Name, err)
glog.V(0).Infof("remove %s/%s: %v", dir.FullPath(), req.Name, err)
if strings.Contains(err.Error(), "non-empty"){
return fuse.EEXIST
}
return fuse.ENOENT
}
t := util.NewFullPath(dir.FullPath(), req.Name)
dir.wfs.metaCache.DeleteEntry(context.Background(), t)
dir.wfs.fsNodeCache.DeleteFsNode(t)
return nil
}
func (dir *Dir) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error {
glog.V(3).Infof("%v dir setattr %+v", dir.FullPath(), req)
glog.V(4).Infof("%v dir setattr %+v", dir.FullPath(), req)
if err := dir.maybeLoadEntry(); err != nil {
return err
@ -429,7 +434,7 @@ func (dir *Dir) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, resp
}
func (dir *Dir) Forget() {
glog.V(3).Infof("Forget dir %s", dir.FullPath())
glog.V(5).Infof("Forget dir %s", dir.FullPath())
dir.wfs.fsNodeCache.DeleteFsNode(util.FullPath(dir.FullPath()))
}
@ -460,7 +465,7 @@ func (dir *Dir) saveEntry() error {
glog.V(1).Infof("save dir entry: %v", request)
_, err := client.UpdateEntry(context.Background(), request)
if err != nil {
glog.V(0).Infof("UpdateEntry dir %s/%s: %v", parentDir, name, err)
glog.Errorf("UpdateEntry dir %s/%s: %v", parentDir, name, err)
return fuse.EIO
}

View File

@ -18,7 +18,7 @@ var _ = fs.NodeReadlinker(&File{})
func (dir *Dir) Symlink(ctx context.Context, req *fuse.SymlinkRequest) (fs.Node, error) {
glog.V(3).Infof("Symlink: %v/%v to %v", dir.FullPath(), req.NewName, req.Target)
glog.V(4).Infof("Symlink: %v/%v to %v", dir.FullPath(), req.NewName, req.Target)
request := &filer_pb.CreateEntryRequest{
Directory: dir.FullPath(),
@ -63,7 +63,7 @@ func (file *File) Readlink(ctx context.Context, req *fuse.ReadlinkRequest) (stri
return "", fuse.Errno(syscall.EINVAL)
}
glog.V(3).Infof("Readlink: %v/%v => %v", file.dir.FullPath(), file.Name, file.entry.Attributes.SymlinkTarget)
glog.V(4).Infof("Readlink: %v/%v => %v", file.dir.FullPath(), file.Name, file.entry.Attributes.SymlinkTarget)
return file.entry.Attributes.SymlinkTarget, nil

View File

@ -63,7 +63,17 @@ func (dir *Dir) Rename(ctx context.Context, req *fuse.RenameRequest, newDirector
// fmt.Printf("rename path: %v => %v\n", oldPath, newPath)
dir.wfs.fsNodeCache.Move(oldPath, newPath)
delete(dir.wfs.handles, oldPath.AsInode())
// change file handle
dir.wfs.handlesLock.Lock()
defer dir.wfs.handlesLock.Unlock()
inodeId := oldPath.AsInode()
existingHandle, found := dir.wfs.handles[inodeId]
if !found || existingHandle == nil {
return err
}
delete(dir.wfs.handles, inodeId)
dir.wfs.handles[newPath.AsInode()] = existingHandle
return err
}

View File

@ -25,9 +25,6 @@ func newDirtyPages(file *File) *ContinuousDirtyPages {
}
}
func (pages *ContinuousDirtyPages) releaseResource() {
}
var counter = int32(0)
func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) {
@ -35,7 +32,7 @@ func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) (chunks []
pages.lock.Lock()
defer pages.lock.Unlock()
glog.V(3).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data)))
glog.V(5).Infof("%s AddPage [%d,%d) of %d bytes", pages.f.fullpath(), offset, offset+int64(len(data)), pages.f.entry.Attributes.FileSize)
if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) {
// this is more than what buffer can hold.
@ -121,14 +118,16 @@ func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage() (chunk *fi
return nil, false, nil
}
fileSize := int64(pages.f.entry.Attributes.FileSize)
for {
chunk, err = pages.saveToStorage(maxList.ToReader(), maxList.Offset(), maxList.Size())
chunkSize := min(maxList.Size(), fileSize-maxList.Offset())
chunk, err = pages.saveToStorage(maxList.ToReader(), maxList.Offset(), chunkSize)
if err == nil {
hasSavedData = true
glog.V(3).Infof("%s saveToStorage [%d,%d) %s", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), chunk.FileId)
glog.V(4).Infof("saveToStorage %s %s [%d,%d) of %d bytes", pages.f.fullpath(), chunk.GetFileIdString(), maxList.Offset(), maxList.Offset()+chunkSize, fileSize)
return
} else {
glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), err)
glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+chunkSize, err)
time.Sleep(5 * time.Second)
}
}
@ -139,6 +138,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(reader io.Reader, offset int64,
dir, _ := pages.f.fullpath().DirAndName()
reader = io.LimitReader(reader, size)
chunk, collection, replication, err := pages.f.wfs.saveDataAsChunk(dir)(reader, pages.f.Name, offset)
if err != nil {
return nil, err
@ -149,6 +149,13 @@ func (pages *ContinuousDirtyPages) saveToStorage(reader io.Reader, offset int64,
}
func maxUint64(x, y uint64) uint64 {
if x > y {
return x
}
return y
}
func max(x, y int64) int64 {
if x > y {
return x
@ -162,11 +169,11 @@ func min(x, y int64) int64 {
return y
}
func (pages *ContinuousDirtyPages) ReadDirtyData(data []byte, startOffset int64) (offset int64, size int) {
func (pages *ContinuousDirtyPages) ReadDirtyDataAt(data []byte, startOffset int64) (maxStop int64) {
pages.lock.Lock()
defer pages.lock.Unlock()
return pages.intervals.ReadData(data, startOffset)
return pages.intervals.ReadDataAt(data, startOffset)
}

View File

@ -3,7 +3,6 @@ package filesys
import (
"bytes"
"io"
"math"
)
type IntervalNode struct {
@ -186,25 +185,15 @@ func (c *ContinuousIntervals) removeList(target *IntervalLinkedList) {
}
func (c *ContinuousIntervals) ReadData(data []byte, startOffset int64) (offset int64, size int) {
var minOffset int64 = math.MaxInt64
var maxStop int64
func (c *ContinuousIntervals) ReadDataAt(data []byte, startOffset int64) (maxStop int64) {
for _, list := range c.lists {
start := max(startOffset, list.Offset())
stop := min(startOffset+int64(len(data)), list.Offset()+list.Size())
if start <= stop {
if start < stop {
list.ReadData(data[start-startOffset:], start, stop)
minOffset = min(minOffset, start)
maxStop = max(maxStop, stop)
}
}
if minOffset == math.MaxInt64 {
return 0, 0
}
offset = minOffset
size = int(maxStop - offset)
return
}

View File

@ -7,12 +7,13 @@ import (
"sort"
"time"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
"github.com/chrislusf/seaweedfs/weed/filer2"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/seaweedfs/fuse"
"github.com/seaweedfs/fuse/fs"
)
const blockSize = 512
@ -35,6 +36,7 @@ type File struct {
entryViewCache []filer2.VisibleInterval
isOpen int
reader io.ReaderAt
dirtyMetadata bool
}
func (file *File) fullpath() util.FullPath {
@ -43,7 +45,7 @@ func (file *File) fullpath() util.FullPath {
func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error {
glog.V(4).Infof("file Attr %s, open:%v, existing attr: %+v", file.fullpath(), file.isOpen, attr)
glog.V(5).Infof("file Attr %s, open:%v, existing attr: %+v", file.fullpath(), file.isOpen, attr)
if file.isOpen <= 0 {
if err := file.maybeLoadEntry(ctx); err != nil {
@ -54,7 +56,7 @@ func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error {
attr.Inode = file.fullpath().AsInode()
attr.Valid = time.Second
attr.Mode = os.FileMode(file.entry.Attributes.FileMode)
attr.Size = filer2.TotalSize(file.entry.Chunks)
attr.Size = filer2.FileSize(file.entry)
if file.isOpen > 0 {
attr.Size = file.entry.Attributes.FileSize
glog.V(4).Infof("file Attr %s, open:%v, size: %d", file.fullpath(), file.isOpen, attr.Size)
@ -91,7 +93,7 @@ func (file *File) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.Op
resp.Handle = fuse.HandleID(handle.handle)
glog.V(3).Infof("%v file open handle id = %d", file.fullpath(), handle.handle)
glog.V(4).Infof("%v file open handle id = %d", file.fullpath(), handle.handle)
return handle, nil
@ -99,7 +101,7 @@ func (file *File) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.Op
func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error {
glog.V(3).Infof("%v file setattr %+v, old:%+v", file.fullpath(), req, file.entry.Attributes)
glog.V(5).Infof("%v file setattr %+v", file.fullpath(), req)
if err := file.maybeLoadEntry(ctx); err != nil {
return err
@ -107,49 +109,72 @@ func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *f
if req.Valid.Size() {
glog.V(3).Infof("%v file setattr set size=%v", file.fullpath(), req.Size)
glog.V(4).Infof("%v file setattr set size=%v chunks=%d", file.fullpath(), req.Size, len(file.entry.Chunks))
if req.Size < filer2.TotalSize(file.entry.Chunks) {
// fmt.Printf("truncate %v \n", fullPath)
var chunks []*filer_pb.FileChunk
var truncatedChunks []*filer_pb.FileChunk
for _, chunk := range file.entry.Chunks {
int64Size := int64(chunk.Size)
if chunk.Offset+int64Size > int64(req.Size) {
// this chunk is truncated
int64Size = int64(req.Size) - chunk.Offset
}
if int64Size > 0 {
chunks = append(chunks, chunk)
if int64Size > 0 {
chunks = append(chunks, chunk)
glog.V(4).Infof("truncated chunk %+v from %d to %d\n", chunk.GetFileIdString(), chunk.Size, int64Size)
chunk.Size = uint64(int64Size)
} else {
glog.V(4).Infof("truncated whole chunk %+v\n", chunk.GetFileIdString())
truncatedChunks = append(truncatedChunks, chunk)
}
}
}
file.wfs.deleteFileChunks(truncatedChunks)
file.entry.Chunks = chunks
file.entryViewCache = nil
file.reader = nil
}
file.entry.Attributes.FileSize = req.Size
file.dirtyMetadata = true
}
if req.Valid.Mode() {
file.entry.Attributes.FileMode = uint32(req.Mode)
file.dirtyMetadata = true
}
if req.Valid.Uid() {
file.entry.Attributes.Uid = req.Uid
file.dirtyMetadata = true
}
if req.Valid.Gid() {
file.entry.Attributes.Gid = req.Gid
file.dirtyMetadata = true
}
if req.Valid.Crtime() {
file.entry.Attributes.Crtime = req.Crtime.Unix()
file.dirtyMetadata = true
}
if req.Valid.Mtime() {
file.entry.Attributes.Mtime = req.Mtime.Unix()
file.dirtyMetadata = true
}
if req.Valid.Handle() {
// fmt.Printf("file handle => %d\n", req.Handle)
}
if file.isOpen > 0 {
return nil
}
if !file.dirtyMetadata {
return nil
}
return file.saveEntry()
}
@ -205,14 +230,14 @@ func (file *File) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, res
func (file *File) Fsync(ctx context.Context, req *fuse.FsyncRequest) error {
// fsync works at OS level
// write the file chunks to the filerGrpcAddress
glog.V(3).Infof("%s/%s fsync file %+v", file.dir.FullPath(), file.Name, req)
glog.V(4).Infof("%s/%s fsync file %+v", file.dir.FullPath(), file.Name, req)
return nil
}
func (file *File) Forget() {
t := util.NewFullPath(file.dir.FullPath(), file.Name)
glog.V(3).Infof("Forget file %s", t)
glog.V(5).Infof("Forget file %s", t)
file.wfs.fsNodeCache.DeleteFsNode(t)
}
@ -246,7 +271,7 @@ func (file *File) addChunks(chunks []*filer_pb.FileChunk) {
file.reader = nil
glog.V(3).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks))
glog.V(4).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks))
file.entry.Chunks = append(file.entry.Chunks, chunks...)
}
@ -265,10 +290,10 @@ func (file *File) saveEntry() error {
Entry: file.entry,
}
glog.V(1).Infof("save file entry: %v", request)
glog.V(4).Infof("save file entry: %v", request)
_, err := client.UpdateEntry(context.Background(), request)
if err != nil {
glog.V(0).Infof("UpdateEntry file %s/%s: %v", file.dir.FullPath(), file.Name, err)
glog.Errorf("UpdateEntry file %s/%s: %v", file.dir.FullPath(), file.Name, err)
return fuse.EIO
}

View File

@ -19,10 +19,9 @@ import (
type FileHandle struct {
// cache file has been written to
dirtyPages *ContinuousDirtyPages
contentType string
dirtyMetadata bool
handle uint64
dirtyPages *ContinuousDirtyPages
contentType string
handle uint64
f *File
RequestId fuse.RequestID // unique ID for request
@ -40,7 +39,7 @@ func newFileHandle(file *File, uid, gid uint32) *FileHandle {
Gid: gid,
}
if fh.f.entry != nil {
fh.f.entry.Attributes.FileSize = filer2.TotalSize(fh.f.entry.Chunks)
fh.f.entry.Attributes.FileSize = filer2.FileSize(fh.f.entry)
}
return fh
}
@ -55,38 +54,45 @@ var _ = fs.HandleReleaser(&FileHandle{})
func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
glog.V(4).Infof("%s read fh %d: [%d,%d)", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size))
glog.V(4).Infof("%s read fh %d: [%d,%d) size %d resp.Data cap=%d", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size), req.Size, cap(resp.Data))
buff := make([]byte, req.Size)
buff := resp.Data[:cap(resp.Data)]
if req.Size > cap(resp.Data) {
// should not happen
buff = make([]byte, req.Size)
}
totalRead, err := fh.readFromChunks(buff, req.Offset)
if err == nil {
dirtyOffset, dirtySize := fh.readFromDirtyPages(buff, req.Offset)
if totalRead+req.Offset < dirtyOffset+int64(dirtySize) {
totalRead = dirtyOffset + int64(dirtySize) - req.Offset
}
maxStop := fh.readFromDirtyPages(buff, req.Offset)
totalRead = max(maxStop - req.Offset, totalRead)
}
resp.Data = buff[:totalRead]
if err != nil {
glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err)
return fuse.EIO
}
if totalRead > int64(len(buff)) {
glog.Warningf("%s FileHandle Read %d: [%d,%d) size %d totalRead %d", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size), req.Size, totalRead)
totalRead = min(int64(len(buff)), totalRead)
}
resp.Data = buff[:totalRead]
return err
}
func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (offset int64, size int) {
return fh.dirtyPages.ReadDirtyData(buff, startOffset)
func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (maxStop int64) {
return fh.dirtyPages.ReadDirtyDataAt(buff, startOffset)
}
func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
// this value should come from the filer instead of the old f
if len(fh.f.entry.Chunks) == 0 {
fileSize := int64(filer2.FileSize(fh.f.entry))
if fileSize == 0 {
glog.V(1).Infof("empty fh %v", fh.f.fullpath())
return 0, nil
return 0, io.EOF
}
var chunkResolveErr error
@ -99,8 +105,8 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
}
if fh.f.reader == nil {
chunkViews := filer2.ViewFromVisibleIntervals(fh.f.entryViewCache, 0, math.MaxInt32)
fh.f.reader = filer2.NewChunkReaderAtFromClient(fh.f.wfs, chunkViews, fh.f.wfs.chunkCache)
chunkViews := filer2.ViewFromVisibleIntervals(fh.f.entryViewCache, 0, math.MaxInt64)
fh.f.reader = filer2.NewChunkReaderAtFromClient(fh.f.wfs, chunkViews, fh.f.wfs.chunkCache, fileSize)
}
totalRead, err := fh.f.reader.ReadAt(buff, offset)
@ -113,7 +119,7 @@ func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err)
}
// glog.V(0).Infof("file handle read %s [%d,%d] %d : %v", fh.f.fullpath(), offset, offset+int64(totalRead), totalRead, err)
glog.V(4).Infof("file handle read %s [%d,%d] %d : %v", fh.f.fullpath(), offset, offset+int64(totalRead), totalRead, err)
return int64(totalRead), err
}
@ -126,7 +132,7 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
copy(data, req.Data)
fh.f.entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(fh.f.entry.Attributes.FileSize)))
// glog.V(0).Infof("%v write [%d,%d)", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)))
glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data))
chunks, err := fh.dirtyPages.AddPage(req.Offset, data)
if err != nil {
@ -139,14 +145,14 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
if req.Offset == 0 {
// detect mime type
fh.contentType = http.DetectContentType(data)
fh.dirtyMetadata = true
fh.f.dirtyMetadata = true
}
if len(chunks) > 0 {
fh.f.addChunks(chunks)
fh.dirtyMetadata = true
fh.f.dirtyMetadata = true
}
return nil
@ -154,24 +160,28 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
func (fh *FileHandle) Release(ctx context.Context, req *fuse.ReleaseRequest) error {
glog.V(4).Infof("%v release fh %d", fh.f.fullpath(), fh.handle)
glog.V(4).Infof("Release %v fh %d", fh.f.fullpath(), fh.handle)
fh.f.isOpen--
if fh.f.isOpen <= 0 {
fh.dirtyPages.releaseResource()
fh.doFlush(ctx, req.Header)
fh.f.wfs.ReleaseHandle(fh.f.fullpath(), fuse.HandleID(fh.handle))
fh.f.entryViewCache = nil
fh.f.reader = nil
}
fh.f.entryViewCache = nil
fh.f.reader = nil
return nil
}
func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
return fh.doFlush(ctx, req.Header)
}
func (fh *FileHandle) doFlush(ctx context.Context, header fuse.Header) error {
// fflush works at fh level
// send the data to the OS
glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req)
glog.V(4).Infof("doFlush %s fh %d %v", fh.f.fullpath(), fh.handle, header)
chunks, err := fh.dirtyPages.FlushToStorage()
if err != nil {
@ -181,10 +191,10 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
if len(chunks) > 0 {
fh.f.addChunks(chunks)
fh.dirtyMetadata = true
fh.f.dirtyMetadata = true
}
if !fh.dirtyMetadata {
if !fh.f.dirtyMetadata {
return nil
}
@ -193,10 +203,10 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
if fh.f.entry.Attributes != nil {
fh.f.entry.Attributes.Mime = fh.contentType
if fh.f.entry.Attributes.Uid == 0 {
fh.f.entry.Attributes.Uid = req.Uid
fh.f.entry.Attributes.Uid = header.Uid
}
if fh.f.entry.Attributes.Gid == 0 {
fh.f.entry.Attributes.Gid = req.Gid
fh.f.entry.Attributes.Gid = header.Gid
}
if fh.f.entry.Attributes.Crtime == 0 {
fh.f.entry.Attributes.Crtime = time.Now().Unix()
@ -212,9 +222,9 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
Entry: fh.f.entry,
}
glog.V(3).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks))
glog.V(4).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks))
for i, chunk := range fh.f.entry.Chunks {
glog.V(3).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
glog.V(4).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size))
}
chunks, garbages := filer2.CompactFileChunks(filer2.LookupFn(fh.f.wfs), fh.f.entry.Chunks)
@ -239,14 +249,14 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
fh.f.wfs.deleteFileChunks(garbages)
for i, chunk := range garbages {
glog.V(3).Infof("garbage %s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
glog.V(4).Infof("garbage %s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size))
}
return nil
})
if err == nil {
fh.dirtyMetadata = false
fh.f.dirtyMetadata = false
}
if err != nil {

View File

@ -3,8 +3,9 @@ package filesys
import (
"sync"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/seaweedfs/fuse/fs"
"github.com/chrislusf/seaweedfs/weed/util"
)
type FsCache struct {
@ -118,7 +119,6 @@ func (c *FsCache) Move(oldPath util.FullPath, newPath util.FullPath) *FsNode {
target = target.ensureChild(p)
}
parent := target.parent
src.name = target.name
if dir, ok := src.node.(*Dir); ok {
dir.name = target.name // target is not Dir, but a shortcut
}
@ -132,6 +132,7 @@ func (c *FsCache) Move(oldPath util.FullPath, newPath util.FullPath) *FsNode {
target.deleteSelf()
src.name = target.name
src.connectToParent(parent)
return src
@ -144,10 +145,14 @@ func (n *FsNode) connectToParent(parent *FsNode) {
oldNode.deleteSelf()
}
if dir, ok := n.node.(*Dir); ok {
dir.parent = parent.node.(*Dir)
if parent.node != nil {
dir.parent = parent.node.(*Dir)
}
}
if f, ok := n.node.(*File); ok {
f.dir = parent.node.(*Dir)
if parent.node != nil {
f.dir = parent.node.(*Dir)
}
}
n.childrenLock.Lock()
parent.children[n.name] = n

View File

@ -94,3 +94,24 @@ func TestFsCacheMove(t *testing.T) {
}
}
func TestFsCacheMove2(t *testing.T) {
cache := newFsCache(nil)
cache.SetFsNode(util.FullPath("/a/b/d"), &File{Name: "dd"})
cache.SetFsNode(util.FullPath("/a/b/e"), &File{Name: "ee"})
cache.Move(util.FullPath("/a/b/d"), util.FullPath("/a/b/e"))
d := cache.GetFsNode(util.FullPath("/a/b/e"))
if d == nil {
t.Errorf("unexpected nil node!")
}
if d.(*File).Name != "e" {
t.Errorf("unexpected node!")
}
}

View File

@ -61,8 +61,13 @@ func (mc *MetaCache) AtomicUpdateEntry(ctx context.Context, oldPath util.FullPat
oldDir, _ := oldPath.DirAndName()
if mc.visitedBoundary.HasVisited(util.FullPath(oldDir)) {
if oldPath != "" {
if err := mc.actualStore.DeleteEntry(ctx, oldPath); err != nil {
return err
if newEntry != nil && oldPath == newEntry.FullPath {
// skip the unnecessary deletion
// leave the update to the following InsertEntry operation
} else {
if err := mc.actualStore.DeleteEntry(ctx, oldPath); err != nil {
return err
}
}
}
} else {

View File

@ -14,7 +14,7 @@ func EnsureVisited(mc *MetaCache, client filer_pb.FilerClient, dirPath util.Full
mc.visitedBoundary.EnsureVisited(dirPath, func(path util.FullPath) (childDirectories []string, err error) {
glog.V(2).Infof("ReadDirAllEntries %s ...", path)
glog.V(5).Infof("ReadDirAllEntries %s ...", path)
err = filer_pb.ReadDirAllEntries(client, dirPath, "", func(pbEntry *filer_pb.Entry, isLast bool) error {
entry := filer2.FromPbEntry(string(dirPath), pbEntry)

View File

@ -65,7 +65,7 @@ type WFS struct {
root fs.Node
fsNodeCache *FsCache
chunkCache *chunk_cache.ChunkCache
chunkCache *chunk_cache.TieredChunkCache
metaCache *meta_cache.MetaCache
}
type statsCache struct {
@ -87,10 +87,7 @@ func NewSeaweedFileSystem(option *Option) *WFS {
cacheDir := path.Join(option.CacheDir, cacheUniqueId)
if option.CacheSizeMB > 0 {
os.MkdirAll(cacheDir, 0755)
wfs.chunkCache = chunk_cache.NewChunkCache(256, cacheDir, option.CacheSizeMB)
grace.OnInterrupt(func() {
wfs.chunkCache.Shutdown()
})
wfs.chunkCache = chunk_cache.NewTieredChunkCache(256, cacheDir, option.CacheSizeMB)
}
wfs.metaCache = meta_cache.NewMetaCache(path.Join(cacheDir, "meta"))
@ -113,7 +110,7 @@ func (wfs *WFS) Root() (fs.Node, error) {
func (wfs *WFS) AcquireHandle(file *File, uid, gid uint32) (fileHandle *FileHandle) {
fullpath := file.fullpath()
glog.V(4).Infof("%s AcquireHandle uid=%d gid=%d", fullpath, uid, gid)
glog.V(4).Infof("AcquireHandle %s uid=%d gid=%d", fullpath, uid, gid)
wfs.handlesLock.Lock()
defer wfs.handlesLock.Unlock()
@ -127,7 +124,6 @@ func (wfs *WFS) AcquireHandle(file *File, uid, gid uint32) (fileHandle *FileHand
fileHandle = newFileHandle(file, uid, gid)
wfs.handles[inodeId] = fileHandle
fileHandle.handle = inodeId
glog.V(4).Infof("%s new fh %d", fullpath, fileHandle.handle)
return
}
@ -146,7 +142,7 @@ func (wfs *WFS) ReleaseHandle(fullpath util.FullPath, handleId fuse.HandleID) {
// Statfs is called to obtain file system metadata. Implements fuse.FSStatfser
func (wfs *WFS) Statfs(ctx context.Context, req *fuse.StatfsRequest, resp *fuse.StatfsResponse) error {
glog.V(4).Infof("reading fs stats: %+v", req)
glog.V(5).Infof("reading fs stats: %+v", req)
if wfs.stats.lastChecked < time.Now().Unix()-20 {
@ -158,13 +154,13 @@ func (wfs *WFS) Statfs(ctx context.Context, req *fuse.StatfsRequest, resp *fuse.
Ttl: fmt.Sprintf("%ds", wfs.option.TtlSec),
}
glog.V(4).Infof("reading filer stats: %+v", request)
glog.V(5).Infof("reading filer stats: %+v", request)
resp, err := client.Statistics(context.Background(), request)
if err != nil {
glog.V(0).Infof("reading filer stats %v: %v", request, err)
return err
}
glog.V(4).Infof("read filer stats: %+v", resp)
glog.V(5).Infof("read filer stats: %+v", resp)
wfs.stats.TotalSize = resp.TotalSize
wfs.stats.UsedSize = resp.UsedSize

View File

@ -38,7 +38,7 @@ func (wfs *WFS) deleteFileIds(grpcDialOption grpc.DialOption, client filer_pb.Se
m := make(map[string]operation.LookupResult)
glog.V(4).Infof("remove file lookup volume id locations: %v", vids)
glog.V(5).Infof("deleteFileIds lookup volume id locations: %v", vids)
resp, err := client.LookupVolume(context.Background(), &filer_pb.LookupVolumeRequest{
VolumeIds: vids,
})

View File

@ -33,6 +33,7 @@ type UploadResult struct {
}
func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *filer_pb.FileChunk {
fid, _ := filer_pb.ToFileIdObject(fileId)
return &filer_pb.FileChunk{
FileId: fileId,
Offset: offset,
@ -41,6 +42,7 @@ func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *fi
ETag: uploadResult.ETag,
CipherKey: uploadResult.CipherKey,
IsCompressed: uploadResult.Gzip > 0,
Fid: fid,
}
}
@ -63,7 +65,7 @@ var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"")
// Upload sends a POST request to a volume server to upload the content with adjustable compression level
func UploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) {
uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
uploadResult, err = retriedUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
return
}
@ -79,10 +81,22 @@ func doUpload(uploadUrl string, filename string, cipher bool, reader io.Reader,
err = fmt.Errorf("read input: %v", err)
return
}
uploadResult, uploadErr := doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
uploadResult, uploadErr := retriedUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
return uploadResult, uploadErr, data
}
func retriedUploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) {
for i := 0; i < 3; i++ {
uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
if err == nil {
return
} else {
glog.Warningf("uploading to %s: %v", uploadUrl, err)
}
}
return
}
func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputCompressed bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) {
contentIsGzipped := isInputCompressed
shouldGzipNow := false

View File

@ -7,6 +7,7 @@ import (
"io"
"math"
"os"
"strings"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
@ -82,7 +83,7 @@ func doList(filerClient FilerClient, fullDirPath util.FullPath, prefix string, f
InclusiveStartFrom: inclusive,
}
glog.V(3).Infof("read directory: %v", request)
glog.V(5).Infof("read directory: %v", request)
ctx, cancel := context.WithCancel(context.Background())
stream, err := client.ListEntries(ctx, request)
if err != nil {
@ -224,9 +225,15 @@ func Remove(filerClient FilerClient, parentDirectoryPath, name string, isDeleteD
IgnoreRecursiveError: ignoreRecursiveErr,
IsFromOtherCluster: isFromOtherCluster,
}); err != nil {
if strings.Contains(err.Error(), ErrNotFound.Error()){
return nil
}
return err
} else {
if resp.Error != "" {
if strings.Contains(resp.Error, ErrNotFound.Error()){
return nil
}
return errors.New(resp.Error)
}
}

View File

@ -10,7 +10,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/needle"
)
func toFileIdObject(fileIdStr string) (*FileId, error) {
func ToFileIdObject(fileIdStr string) (*FileId, error) {
t, err := needle.ParseFileIdFromString(fileIdStr)
if err != nil {
return nil, err
@ -43,14 +43,14 @@ func BeforeEntrySerialization(chunks []*FileChunk) {
for _, chunk := range chunks {
if chunk.FileId != "" {
if fid, err := toFileIdObject(chunk.FileId); err == nil {
if fid, err := ToFileIdObject(chunk.FileId); err == nil {
chunk.Fid = fid
chunk.FileId = ""
}
}
if chunk.SourceFileId != "" {
if fid, err := toFileIdObject(chunk.SourceFileId); err == nil {
if fid, err := ToFileIdObject(chunk.SourceFileId); err == nil {
chunk.SourceFid = fid
chunk.SourceFileId = ""
}
@ -81,7 +81,7 @@ func CreateEntry(client SeaweedFilerClient, request *CreateEntryRequest) error {
return fmt.Errorf("CreateEntry: %v", err)
}
if resp.Error != "" {
glog.V(1).Infof("create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, err)
glog.V(1).Infof("create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, resp.Error)
return fmt.Errorf("CreateEntry : %v", resp.Error)
}
return nil

View File

@ -9,7 +9,7 @@ import (
func TestFileIdSize(t *testing.T) {
fileIdStr := "11745,0293434534cbb9892b"
fid, _ := toFileIdObject(fileIdStr)
fid, _ := ToFileIdObject(fileIdStr)
bytes, _ := proto.Marshal(fid)
println(len(fileIdStr))

View File

@ -37,8 +37,12 @@ service VolumeServer {
}
rpc VolumeMarkReadonly (VolumeMarkReadonlyRequest) returns (VolumeMarkReadonlyResponse) {
}
rpc VolumeMarkWritable (VolumeMarkWritableRequest) returns (VolumeMarkWritableResponse) {
}
rpc VolumeConfigure (VolumeConfigureRequest) returns (VolumeConfigureResponse) {
}
rpc VolumeStatus (VolumeStatusRequest) returns (VolumeStatusResponse) {
}
// copy the .idx .dat files, and mount this volume
rpc VolumeCopy (VolumeCopyRequest) returns (VolumeCopyResponse) {
@ -200,6 +204,12 @@ message VolumeMarkReadonlyRequest {
message VolumeMarkReadonlyResponse {
}
message VolumeMarkWritableRequest {
uint32 volume_id = 1;
}
message VolumeMarkWritableResponse {
}
message VolumeConfigureRequest {
uint32 volume_id = 1;
string replication = 2;
@ -208,6 +218,13 @@ message VolumeConfigureResponse {
string error = 1;
}
message VolumeStatusRequest {
uint32 volume_id = 1;
}
message VolumeStatusResponse {
bool is_read_only = 1;
}
message VolumeCopyRequest {
uint32 volume_id = 1;
string collection = 2;

File diff suppressed because it is too large Load Diff

View File

@ -95,7 +95,7 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return nil
}
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
// Create a URL that references a to-be-created blob in your
@ -115,7 +115,7 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error {
}
var writeErr error
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
readErr := util.ReadUrlAsStream(fileUrl+"?readDeleted=true", nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
_, writeErr = appendBlobURL.AppendBlock(context.Background(), bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil)
})

View File

@ -84,7 +84,7 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
return nil
}
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
bucket, err := g.client.Bucket(context.Background(), g.bucket)
@ -103,7 +103,7 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
}
var writeErr error
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
readErr := util.ReadUrlAsStream(fileUrl+"?readDeleted=true", nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
_, err := writer.Write(data)
if err != nil {
writeErr = err

View File

@ -89,7 +89,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return nil
}
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(g.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
wc := g.client.Bucket(g.bucket).Object(key).NewWriter(context.Background())
@ -101,7 +101,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return err
}
err = util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
err = util.ReadUrlAsStream(fileUrl+"?readDeleted=true", nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
wc.Write(data)
})

View File

@ -107,7 +107,7 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
return err
}
totalSize := filer2.TotalSize(entry.Chunks)
totalSize := filer2.FileSize(entry)
chunkViews := filer2.ViewFromChunks(s3sink.filerSource.LookupFileId, entry.Chunks, 0, int64(totalSize))
parts := make([]*s3.CompletedPart, len(chunkViews))

Some files were not shown because too many files have changed in this diff Show More