mirror of
https://github.com/google/leveldb.git
synced 2025-01-21 16:43:16 +08:00
Merge branch 'master' into master
This commit is contained in:
commit
63d5315e1c
35
.appveyor.yml
Normal file
35
.appveyor.yml
Normal file
@ -0,0 +1,35 @@
|
||||
# Build matrix / environment variables are explained on:
|
||||
# https://www.appveyor.com/docs/appveyor-yml/
|
||||
# This file can be validated on: https://ci.appveyor.com/tools/validate-yaml
|
||||
|
||||
version: "{build}"
|
||||
|
||||
environment:
|
||||
matrix:
|
||||
# AppVeyor currently has no custom job name feature.
|
||||
# http://help.appveyor.com/discussions/questions/1623-can-i-provide-a-friendly-name-for-jobs
|
||||
- JOB: Visual Studio 2017
|
||||
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
|
||||
CMAKE_GENERATOR: Visual Studio 15 2017
|
||||
|
||||
platform:
|
||||
- x86
|
||||
- x64
|
||||
|
||||
configuration:
|
||||
- RelWithDebInfo
|
||||
- Debug
|
||||
|
||||
build_script:
|
||||
- git submodule update --init --recursive
|
||||
- mkdir build
|
||||
- cd build
|
||||
- if "%platform%"=="x64" set CMAKE_GENERATOR=%CMAKE_GENERATOR% Win64
|
||||
- cmake --version
|
||||
- cmake .. -G "%CMAKE_GENERATOR%"
|
||||
-DCMAKE_CONFIGURATION_TYPES="%CONFIGURATION%"
|
||||
- cmake --build . --config "%CONFIGURATION%"
|
||||
- cd ..
|
||||
|
||||
test_script:
|
||||
- cd build && ctest --verbose --build-config "%CONFIGURATION%" && cd ..
|
18
.clang-format
Normal file
18
.clang-format
Normal file
@ -0,0 +1,18 @@
|
||||
# Run manually to reformat a file:
|
||||
# clang-format -i --style=file <file>
|
||||
# find . -iname '*.cc' -o -iname '*.h' -o -iname '*.h.in' | xargs clang-format -i --style=file
|
||||
BasedOnStyle: Google
|
||||
DerivePointerAlignment: false
|
||||
|
||||
# Public headers are in a different location in the internal Google repository.
|
||||
# Order them so that when imported to the authoritative repository they will be
|
||||
# in correct alphabetical order.
|
||||
IncludeCategories:
|
||||
- Regex: '^(<|"(benchmarks|db|helpers)/)'
|
||||
Priority: 1
|
||||
- Regex: '^"(leveldb)/'
|
||||
Priority: 2
|
||||
- Regex: '^(<|"(issues|port|table|third_party|util)/)'
|
||||
Priority: 3
|
||||
- Regex: '.*'
|
||||
Priority: 4
|
17
.gitignore
vendored
17
.gitignore
vendored
@ -1,9 +1,8 @@
|
||||
build_config.mk
|
||||
*.a
|
||||
*.o
|
||||
*.dylib*
|
||||
*.so
|
||||
*.so.*
|
||||
*_test
|
||||
db_bench
|
||||
leveldbutil
|
||||
# Editors.
|
||||
*.sw*
|
||||
.vscode
|
||||
.DS_Store
|
||||
|
||||
# Build directory.
|
||||
build/
|
||||
out/
|
||||
|
79
.travis.yml
Normal file
79
.travis.yml
Normal file
@ -0,0 +1,79 @@
|
||||
# Build matrix / environment variables are explained on:
|
||||
# http://about.travis-ci.org/docs/user/build-configuration/
|
||||
# This file can be validated on: http://lint.travis-ci.org/
|
||||
|
||||
language: cpp
|
||||
dist: xenial
|
||||
osx_image: xcode10.2
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
env:
|
||||
- BUILD_TYPE=Debug
|
||||
- BUILD_TYPE=RelWithDebInfo
|
||||
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-xenial-8
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- clang-8
|
||||
- cmake
|
||||
- gcc-8
|
||||
- g++-8
|
||||
- libgoogle-perftools-dev
|
||||
- libkyotocabinet-dev
|
||||
- libsnappy-dev
|
||||
- libsqlite3-dev
|
||||
- ninja-build
|
||||
homebrew:
|
||||
packages:
|
||||
- cmake
|
||||
- crc32c
|
||||
- gcc@8
|
||||
- gperftools
|
||||
- kyotocabinet
|
||||
- llvm@8
|
||||
- ninja
|
||||
- snappy
|
||||
- sqlite3
|
||||
update: true
|
||||
|
||||
install:
|
||||
# The following Homebrew packages aren't linked by default, and need to be
|
||||
# prepended to the path explicitly.
|
||||
- if [ "$TRAVIS_OS_NAME" == "osx" ]; then
|
||||
export PATH="$(brew --prefix llvm)/bin:$PATH";
|
||||
fi
|
||||
# /usr/bin/gcc points to an older compiler on both Linux and macOS.
|
||||
- if [ "$CXX" = "g++" ]; then export CXX="g++-8" CC="gcc-8"; fi
|
||||
# /usr/bin/clang points to an older compiler on both Linux and macOS.
|
||||
#
|
||||
# Homebrew's llvm package doesn't ship a versioned clang++ binary, so the values
|
||||
# below don't work on macOS. Fortunately, the path change above makes the
|
||||
# default values (clang and clang++) resolve to the correct compiler on macOS.
|
||||
- if [ "$TRAVIS_OS_NAME" == "linux" ]; then
|
||||
if [ "$CXX" = "clang++" ]; then export CXX="clang++-8" CC="clang-8"; fi;
|
||||
fi
|
||||
- echo ${CC}
|
||||
- echo ${CXX}
|
||||
- ${CXX} --version
|
||||
- cmake --version
|
||||
|
||||
before_script:
|
||||
- mkdir -p build && cd build
|
||||
- cmake .. -G Ninja -DCMAKE_BUILD_TYPE=$BUILD_TYPE
|
||||
- cmake --build .
|
||||
- cd ..
|
||||
|
||||
script:
|
||||
- cd build && ctest --verbose && cd ..
|
||||
- "if [ -f build/db_bench ] ; then build/db_bench ; fi"
|
||||
- "if [ -f build/db_bench_sqlite3 ] ; then build/db_bench_sqlite3 ; fi"
|
||||
- "if [ -f build/db_bench_tree_db ] ; then build/db_bench_tree_db ; fi"
|
454
CMakeLists.txt
Normal file
454
CMakeLists.txt
Normal file
@ -0,0 +1,454 @@
|
||||
# Copyright 2017 The LevelDB Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
cmake_minimum_required(VERSION 3.9)
|
||||
# Keep the version below in sync with the one in db.h
|
||||
project(leveldb VERSION 1.22.0 LANGUAGES C CXX)
|
||||
|
||||
# This project can use C11, but will gracefully decay down to C89.
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_C_STANDARD_REQUIRED OFF)
|
||||
set(CMAKE_C_EXTENSIONS OFF)
|
||||
|
||||
# This project requires C++11.
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
if (WIN32)
|
||||
set(LEVELDB_PLATFORM_NAME LEVELDB_PLATFORM_WINDOWS)
|
||||
# TODO(cmumford): Make UNICODE configurable for Windows.
|
||||
add_definitions(-D_UNICODE -DUNICODE)
|
||||
else (WIN32)
|
||||
set(LEVELDB_PLATFORM_NAME LEVELDB_PLATFORM_POSIX)
|
||||
endif (WIN32)
|
||||
|
||||
option(LEVELDB_BUILD_TESTS "Build LevelDB's unit tests" ON)
|
||||
option(LEVELDB_BUILD_BENCHMARKS "Build LevelDB's benchmarks" ON)
|
||||
option(LEVELDB_INSTALL "Install LevelDB's header and library" ON)
|
||||
|
||||
include(TestBigEndian)
|
||||
test_big_endian(LEVELDB_IS_BIG_ENDIAN)
|
||||
|
||||
include(CheckIncludeFile)
|
||||
check_include_file("unistd.h" HAVE_UNISTD_H)
|
||||
|
||||
include(CheckLibraryExists)
|
||||
check_library_exists(crc32c crc32c_value "" HAVE_CRC32C)
|
||||
check_library_exists(snappy snappy_compress "" HAVE_SNAPPY)
|
||||
check_library_exists(tcmalloc malloc "" HAVE_TCMALLOC)
|
||||
|
||||
include(CheckCXXSymbolExists)
|
||||
# Using check_cxx_symbol_exists() instead of check_c_symbol_exists() because
|
||||
# we're including the header from C++, and feature detection should use the same
|
||||
# compiler language that the project will use later. Principles aside, some
|
||||
# versions of do not expose fdatasync() in <unistd.h> in standard C mode
|
||||
# (-std=c11), but do expose the function in standard C++ mode (-std=c++11).
|
||||
check_cxx_symbol_exists(fdatasync "unistd.h" HAVE_FDATASYNC)
|
||||
check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC)
|
||||
check_cxx_symbol_exists(O_CLOEXEC "fcntl.h" HAVE_O_CLOEXEC)
|
||||
|
||||
include(CheckCXXSourceCompiles)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wstrict-prototypes")
|
||||
|
||||
# Test whether -Wthread-safety is available. See
|
||||
# https://clang.llvm.org/docs/ThreadSafetyAnalysis.html
|
||||
# -Werror is necessary because unknown attributes only generate warnings.
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
list(APPEND CMAKE_REQUIRED_FLAGS -Werror -Wthread-safety)
|
||||
check_cxx_source_compiles("
|
||||
struct __attribute__((lockable)) Lock {
|
||||
void Acquire() __attribute__((exclusive_lock_function()));
|
||||
void Release() __attribute__((unlock_function()));
|
||||
};
|
||||
struct ThreadSafeType {
|
||||
Lock lock_;
|
||||
int data_ __attribute__((guarded_by(lock_)));
|
||||
};
|
||||
int main() { return 0; }
|
||||
" HAVE_CLANG_THREAD_SAFETY)
|
||||
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
# Test whether C++17 __has_include is available.
|
||||
check_cxx_source_compiles("
|
||||
#if defined(__has_include) && __has_include(<string>)
|
||||
#include <string>
|
||||
#endif
|
||||
int main() { std::string str; return 0; }
|
||||
" HAVE_CXX17_HAS_INCLUDE)
|
||||
|
||||
set(LEVELDB_PUBLIC_INCLUDE_DIR "include/leveldb")
|
||||
set(LEVELDB_PORT_CONFIG_DIR "include/port")
|
||||
|
||||
configure_file(
|
||||
"${PROJECT_SOURCE_DIR}/port/port_config.h.in"
|
||||
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
|
||||
)
|
||||
|
||||
include_directories(
|
||||
"${PROJECT_BINARY_DIR}/include"
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
)
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
# Only export LEVELDB_EXPORT symbols from the shared library.
|
||||
add_compile_options(-fvisibility=hidden)
|
||||
endif(BUILD_SHARED_LIBS)
|
||||
|
||||
add_library(leveldb "")
|
||||
target_sources(leveldb
|
||||
PRIVATE
|
||||
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/builder.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/builder.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/c.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/db_impl.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/db_impl.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/db_iter.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/db_iter.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/dbformat.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/dbformat.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/dumpfile.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/filename.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/filename.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/log_format.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/log_reader.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/log_reader.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/log_writer.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/log_writer.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/memtable.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/memtable.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/repair.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/skiplist.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/snapshot.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/table_cache.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/table_cache.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/version_edit.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/version_edit.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/version_set.cc"
|
||||
"${PROJECT_SOURCE_DIR}/db/version_set.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/write_batch_internal.h"
|
||||
"${PROJECT_SOURCE_DIR}/db/write_batch.cc"
|
||||
"${PROJECT_SOURCE_DIR}/port/port_stdcxx.h"
|
||||
"${PROJECT_SOURCE_DIR}/port/port.h"
|
||||
"${PROJECT_SOURCE_DIR}/port/thread_annotations.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/block_builder.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/block_builder.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/block.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/block.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/filter_block.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/filter_block.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/format.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/format.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/iterator_wrapper.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/iterator.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/merger.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/merger.h"
|
||||
"${PROJECT_SOURCE_DIR}/table/table_builder.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/table.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/two_level_iterator.cc"
|
||||
"${PROJECT_SOURCE_DIR}/table/two_level_iterator.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/arena.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/arena.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/bloom.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/cache.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/coding.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/coding.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/comparator.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/crc32c.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/crc32c.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/env.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/filter_policy.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/hash.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/hash.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/logging.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/logging.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/mutexlock.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/no_destructor.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/options.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/random.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/status.cc"
|
||||
|
||||
# Only CMake 3.3+ supports PUBLIC sources in targets exported by "install".
|
||||
$<$<VERSION_GREATER:CMAKE_VERSION,3.2>:PUBLIC>
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
|
||||
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
target_sources(leveldb
|
||||
PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/util/env_windows.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/windows_logger.h"
|
||||
)
|
||||
else (WIN32)
|
||||
target_sources(leveldb
|
||||
PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/util/env_posix.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/posix_logger.h"
|
||||
)
|
||||
endif (WIN32)
|
||||
|
||||
# MemEnv is not part of the interface and could be pulled to a separate library.
|
||||
target_sources(leveldb
|
||||
PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/helpers/memenv/memenv.cc"
|
||||
"${PROJECT_SOURCE_DIR}/helpers/memenv/memenv.h"
|
||||
)
|
||||
|
||||
target_include_directories(leveldb
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
)
|
||||
|
||||
set_target_properties(leveldb
|
||||
PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
|
||||
target_compile_definitions(leveldb
|
||||
PRIVATE
|
||||
# Used by include/export.h when building shared libraries.
|
||||
LEVELDB_COMPILE_LIBRARY
|
||||
# Used by port/port.h.
|
||||
${LEVELDB_PLATFORM_NAME}=1
|
||||
)
|
||||
if (NOT HAVE_CXX17_HAS_INCLUDE)
|
||||
target_compile_definitions(leveldb
|
||||
PRIVATE
|
||||
LEVELDB_HAS_PORT_CONFIG_H=1
|
||||
)
|
||||
endif(NOT HAVE_CXX17_HAS_INCLUDE)
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_compile_definitions(leveldb
|
||||
PUBLIC
|
||||
# Used by include/export.h.
|
||||
LEVELDB_SHARED_LIBRARY
|
||||
)
|
||||
endif(BUILD_SHARED_LIBS)
|
||||
|
||||
if(HAVE_CLANG_THREAD_SAFETY)
|
||||
target_compile_options(leveldb
|
||||
PUBLIC
|
||||
-Werror -Wthread-safety)
|
||||
endif(HAVE_CLANG_THREAD_SAFETY)
|
||||
|
||||
if(HAVE_CRC32C)
|
||||
target_link_libraries(leveldb crc32c)
|
||||
endif(HAVE_CRC32C)
|
||||
if(HAVE_SNAPPY)
|
||||
target_link_libraries(leveldb snappy)
|
||||
endif(HAVE_SNAPPY)
|
||||
if(HAVE_TCMALLOC)
|
||||
target_link_libraries(leveldb tcmalloc)
|
||||
endif(HAVE_TCMALLOC)
|
||||
|
||||
# Needed by port_stdcxx.h
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(leveldb Threads::Threads)
|
||||
|
||||
add_executable(leveldbutil
|
||||
"${PROJECT_SOURCE_DIR}/db/leveldbutil.cc"
|
||||
)
|
||||
target_link_libraries(leveldbutil leveldb)
|
||||
|
||||
if(LEVELDB_BUILD_TESTS)
|
||||
enable_testing()
|
||||
|
||||
function(leveldb_test test_file)
|
||||
get_filename_component(test_target_name "${test_file}" NAME_WE)
|
||||
|
||||
add_executable("${test_target_name}" "")
|
||||
target_sources("${test_target_name}"
|
||||
PRIVATE
|
||||
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/testharness.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/testharness.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/testutil.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/testutil.h"
|
||||
|
||||
"${test_file}"
|
||||
)
|
||||
target_link_libraries("${test_target_name}" leveldb)
|
||||
target_compile_definitions("${test_target_name}"
|
||||
PRIVATE
|
||||
${LEVELDB_PLATFORM_NAME}=1
|
||||
)
|
||||
if (NOT HAVE_CXX17_HAS_INCLUDE)
|
||||
target_compile_definitions("${test_target_name}"
|
||||
PRIVATE
|
||||
LEVELDB_HAS_PORT_CONFIG_H=1
|
||||
)
|
||||
endif(NOT HAVE_CXX17_HAS_INCLUDE)
|
||||
|
||||
add_test(NAME "${test_target_name}" COMMAND "${test_target_name}")
|
||||
endfunction(leveldb_test)
|
||||
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/c_test.c")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/fault_injection_test.cc")
|
||||
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/issues/issue178_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/issues/issue200_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/issues/issue320_test.cc")
|
||||
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/env_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/status_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/no_destructor_test.cc")
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/autocompact_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/corruption_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/db_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/dbformat_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/filename_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/log_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/recovery_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/skiplist_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/version_edit_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/version_set_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/db/write_batch_test.cc")
|
||||
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/helpers/memenv/memenv_test.cc")
|
||||
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/table/filter_block_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/table/table_test.cc")
|
||||
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/arena_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/bloom_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/cache_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/coding_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/crc32c_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/hash_test.cc")
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/logging_test.cc")
|
||||
|
||||
# TODO(costan): This test also uses
|
||||
# "${PROJECT_SOURCE_DIR}/util/env_{posix|windows}_test_helper.h"
|
||||
if (WIN32)
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/env_windows_test.cc")
|
||||
else (WIN32)
|
||||
leveldb_test("${PROJECT_SOURCE_DIR}/util/env_posix_test.cc")
|
||||
endif (WIN32)
|
||||
endif(NOT BUILD_SHARED_LIBS)
|
||||
endif(LEVELDB_BUILD_TESTS)
|
||||
|
||||
if(LEVELDB_BUILD_BENCHMARKS)
|
||||
function(leveldb_benchmark bench_file)
|
||||
get_filename_component(bench_target_name "${bench_file}" NAME_WE)
|
||||
|
||||
add_executable("${bench_target_name}" "")
|
||||
target_sources("${bench_target_name}"
|
||||
PRIVATE
|
||||
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/histogram.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/histogram.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/testharness.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/testharness.h"
|
||||
"${PROJECT_SOURCE_DIR}/util/testutil.cc"
|
||||
"${PROJECT_SOURCE_DIR}/util/testutil.h"
|
||||
|
||||
"${bench_file}"
|
||||
)
|
||||
target_link_libraries("${bench_target_name}" leveldb)
|
||||
target_compile_definitions("${bench_target_name}"
|
||||
PRIVATE
|
||||
${LEVELDB_PLATFORM_NAME}=1
|
||||
)
|
||||
if (NOT HAVE_CXX17_HAS_INCLUDE)
|
||||
target_compile_definitions("${bench_target_name}"
|
||||
PRIVATE
|
||||
LEVELDB_HAS_PORT_CONFIG_H=1
|
||||
)
|
||||
endif(NOT HAVE_CXX17_HAS_INCLUDE)
|
||||
endfunction(leveldb_benchmark)
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
leveldb_benchmark("${PROJECT_SOURCE_DIR}/benchmarks/db_bench.cc")
|
||||
endif(NOT BUILD_SHARED_LIBS)
|
||||
|
||||
check_library_exists(sqlite3 sqlite3_open "" HAVE_SQLITE3)
|
||||
if(HAVE_SQLITE3)
|
||||
leveldb_benchmark("${PROJECT_SOURCE_DIR}/benchmarks/db_bench_sqlite3.cc")
|
||||
target_link_libraries(db_bench_sqlite3 sqlite3)
|
||||
endif(HAVE_SQLITE3)
|
||||
|
||||
# check_library_exists is insufficient here because the library names have
|
||||
# different manglings when compiled with clang or gcc, at least when installed
|
||||
# with Homebrew on Mac.
|
||||
set(OLD_CMAKE_REQURED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES kyotocabinet)
|
||||
check_cxx_source_compiles("
|
||||
#include <kcpolydb.h>
|
||||
|
||||
int main() {
|
||||
kyotocabinet::TreeDB* db = new kyotocabinet::TreeDB();
|
||||
delete db;
|
||||
return 0;
|
||||
}
|
||||
" HAVE_KYOTOCABINET)
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQURED_LIBRARIES})
|
||||
if(HAVE_KYOTOCABINET)
|
||||
leveldb_benchmark("${PROJECT_SOURCE_DIR}/benchmarks/db_bench_tree_db.cc")
|
||||
target_link_libraries(db_bench_tree_db kyotocabinet)
|
||||
endif(HAVE_KYOTOCABINET)
|
||||
endif(LEVELDB_BUILD_BENCHMARKS)
|
||||
|
||||
if(LEVELDB_INSTALL)
|
||||
include(GNUInstallDirs)
|
||||
install(TARGETS leveldb
|
||||
EXPORT leveldbTargets
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
)
|
||||
install(
|
||||
FILES
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
|
||||
"${PROJECT_SOURCE_DIR}/${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/leveldb
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${PROJECT_BINARY_DIR}/leveldbConfigVersion.cmake"
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
install(
|
||||
EXPORT leveldbTargets
|
||||
NAMESPACE leveldb::
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/leveldb"
|
||||
)
|
||||
install(
|
||||
FILES
|
||||
"${PROJECT_SOURCE_DIR}/cmake/leveldbConfig.cmake"
|
||||
"${PROJECT_BINARY_DIR}/leveldbConfigVersion.cmake"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/leveldb"
|
||||
)
|
||||
endif(LEVELDB_INSTALL)
|
@ -31,6 +31,6 @@ the CLA.
|
||||
|
||||
## Writing Code ##
|
||||
|
||||
If your contribution contains code, please make sure that it follows
|
||||
[the style guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml).
|
||||
If your contribution contains code, please make sure that it follows
|
||||
[the style guide](http://google.github.io/styleguide/cppguide.html).
|
||||
Otherwise we will have to ask you to make changes, and that's no fun for anyone.
|
||||
|
231
Makefile
231
Makefile
@ -1,231 +0,0 @@
|
||||
# Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#-----------------------------------------------
|
||||
# Uncomment exactly one of the lines labelled (A), (B), and (C) below
|
||||
# to switch between compilation modes.
|
||||
|
||||
# (A) Production use (optimized mode)
|
||||
OPT ?= -O2 -DNDEBUG
|
||||
# (B) Debug mode, w/ full line-level debugging symbols
|
||||
# OPT ?= -g2
|
||||
# (C) Profiling mode: opt, but w/debugging symbols
|
||||
# OPT ?= -O2 -g2 -DNDEBUG
|
||||
#-----------------------------------------------
|
||||
|
||||
# detect what platform we're building on
|
||||
$(shell CC="$(CC)" CXX="$(CXX)" TARGET_OS="$(TARGET_OS)" \
|
||||
./build_detect_platform build_config.mk ./)
|
||||
# this file is generated by the previous line to set build flags and sources
|
||||
include build_config.mk
|
||||
|
||||
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
||||
CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT)
|
||||
|
||||
LDFLAGS += $(PLATFORM_LDFLAGS)
|
||||
LIBS += $(PLATFORM_LIBS)
|
||||
|
||||
LIBOBJECTS = $(SOURCES:.cc=.o)
|
||||
MEMENVOBJECTS = $(MEMENV_SOURCES:.cc=.o)
|
||||
|
||||
TESTUTIL = ./util/testutil.o
|
||||
TESTHARNESS = ./util/testharness.o $(TESTUTIL)
|
||||
|
||||
# Note: iOS should probably be using libtool, not ar.
|
||||
ifeq ($(PLATFORM), IOS)
|
||||
AR=xcrun ar
|
||||
endif
|
||||
|
||||
TESTS = \
|
||||
arena_test \
|
||||
autocompact_test \
|
||||
bloom_test \
|
||||
c_test \
|
||||
cache_test \
|
||||
coding_test \
|
||||
corruption_test \
|
||||
crc32c_test \
|
||||
db_test \
|
||||
dbformat_test \
|
||||
env_test \
|
||||
fault_injection_test \
|
||||
filename_test \
|
||||
filter_block_test \
|
||||
hash_test \
|
||||
issue178_test \
|
||||
issue200_test \
|
||||
log_test \
|
||||
memenv_test \
|
||||
skiplist_test \
|
||||
table_test \
|
||||
version_edit_test \
|
||||
version_set_test \
|
||||
write_batch_test
|
||||
|
||||
PROGRAMS = db_bench leveldbutil $(TESTS)
|
||||
BENCHMARKS = db_bench_sqlite3 db_bench_tree_db
|
||||
|
||||
LIBRARY = libleveldb.a
|
||||
MEMENVLIBRARY = libmemenv.a
|
||||
|
||||
default: all
|
||||
|
||||
# Should we build shared libraries?
|
||||
ifneq ($(PLATFORM_SHARED_EXT),)
|
||||
|
||||
ifneq ($(PLATFORM_SHARED_VERSIONED),true)
|
||||
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
|
||||
SHARED2 = $(SHARED1)
|
||||
SHARED3 = $(SHARED1)
|
||||
SHARED = $(SHARED1)
|
||||
else
|
||||
# Update db.h if you change these.
|
||||
SHARED_MAJOR = 1
|
||||
SHARED_MINOR = 18
|
||||
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
|
||||
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
|
||||
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
|
||||
SHARED = $(SHARED1) $(SHARED2) $(SHARED3)
|
||||
$(SHARED1): $(SHARED3)
|
||||
ln -fs $(SHARED3) $(SHARED1)
|
||||
$(SHARED2): $(SHARED3)
|
||||
ln -fs $(SHARED3) $(SHARED2)
|
||||
endif
|
||||
|
||||
$(SHARED3):
|
||||
$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) $(LIBS)
|
||||
|
||||
endif # PLATFORM_SHARED_EXT
|
||||
|
||||
all: $(SHARED) $(LIBRARY)
|
||||
|
||||
check: all $(PROGRAMS) $(TESTS)
|
||||
for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done
|
||||
|
||||
clean:
|
||||
-rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk
|
||||
-rm -rf ios-x86/* ios-arm/*
|
||||
|
||||
$(LIBRARY): $(LIBOBJECTS)
|
||||
rm -f $@
|
||||
$(AR) -rs $@ $(LIBOBJECTS)
|
||||
|
||||
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
|
||||
$(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
|
||||
|
||||
db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
|
||||
$(CXX) $(LDFLAGS) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
|
||||
|
||||
db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
|
||||
$(CXX) $(LDFLAGS) doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
|
||||
|
||||
leveldbutil: db/leveldb_main.o $(LIBOBJECTS)
|
||||
$(CXX) $(LDFLAGS) db/leveldb_main.o $(LIBOBJECTS) -o $@ $(LIBS)
|
||||
|
||||
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
fault_injection_test: db/fault_injection_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/fault_injection_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
issue200_test: issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
|
||||
rm -f $@
|
||||
$(AR) -rs $@ $(MEMENVOBJECTS)
|
||||
|
||||
memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS)
|
||||
$(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LIBS)
|
||||
|
||||
ifeq ($(PLATFORM), IOS)
|
||||
# For iOS, create universal object files to be used on both the simulator and
|
||||
# a device.
|
||||
PLATFORMSROOT=/Applications/Xcode.app/Contents/Developer/Platforms
|
||||
SIMULATORROOT=$(PLATFORMSROOT)/iPhoneSimulator.platform/Developer
|
||||
DEVICEROOT=$(PLATFORMSROOT)/iPhoneOS.platform/Developer
|
||||
IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBundleShortVersionString)
|
||||
IOSARCH=-arch armv6 -arch armv7 -arch armv7s -arch arm64
|
||||
|
||||
.cc.o:
|
||||
mkdir -p ios-x86/$(dir $@)
|
||||
xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
|
||||
mkdir -p ios-arm/$(dir $@)
|
||||
xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
|
||||
xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
|
||||
|
||||
.c.o:
|
||||
mkdir -p ios-x86/$(dir $@)
|
||||
xcrun -sdk iphonesimulator $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
|
||||
mkdir -p ios-arm/$(dir $@)
|
||||
xcrun -sdk iphoneos $(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
|
||||
xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
|
||||
|
||||
else
|
||||
.cc.o:
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
endif
|
113
README.md
113
README.md
@ -1,8 +1,12 @@
|
||||
**LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.**
|
||||
|
||||
[![Build Status](https://travis-ci.org/google/leveldb.svg?branch=master)](https://travis-ci.org/google/leveldb)
|
||||
[![Build status](https://ci.appveyor.com/api/projects/status/g2j5j4rfkda6eyw5/branch/master?svg=true)](https://ci.appveyor.com/project/pwnall/leveldb)
|
||||
|
||||
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
|
||||
|
||||
# Features
|
||||
|
||||
* Keys and values are arbitrary byte arrays.
|
||||
* Data is stored sorted by key.
|
||||
* Callers can provide a custom comparison function to override the sort order.
|
||||
@ -10,16 +14,98 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
|
||||
* Multiple changes can be made in one atomic batch.
|
||||
* Users can create a transient snapshot to get a consistent view of data.
|
||||
* Forward and backward iteration is supported over the data.
|
||||
* Data is automatically compressed using the [Snappy compression library](http://code.google.com/p/snappy).
|
||||
* Data is automatically compressed using the [Snappy compression library](http://google.github.io/snappy/).
|
||||
* External activity (file system operations etc.) is relayed through a virtual interface so users can customize the operating system interactions.
|
||||
* [Detailed documentation](http://htmlpreview.github.io/?https://github.com/google/leveldb/blob/master/doc/index.html) about how to use the library is included with the source code.
|
||||
|
||||
# Documentation
|
||||
|
||||
[LevelDB library documentation](https://github.com/google/leveldb/blob/master/doc/index.md) is online and bundled with the source code.
|
||||
|
||||
# Limitations
|
||||
|
||||
* This is not a SQL database. It does not have a relational data model, it does not support SQL queries, and it has no support for indexes.
|
||||
* Only a single process (possibly multi-threaded) can access a particular database at a time.
|
||||
* There is no client-server support builtin to the library. An application that needs such support will have to wrap their own server around the library.
|
||||
|
||||
# Building
|
||||
|
||||
This project supports [CMake](https://cmake.org/) out of the box.
|
||||
|
||||
### Build for POSIX
|
||||
|
||||
Quick start:
|
||||
|
||||
```bash
|
||||
mkdir -p build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && cmake --build .
|
||||
```
|
||||
|
||||
### Building for Windows
|
||||
|
||||
First generate the Visual Studio 2017 project/solution files:
|
||||
|
||||
```cmd
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G "Visual Studio 15" ..
|
||||
```
|
||||
The default default will build for x86. For 64-bit run:
|
||||
|
||||
```cmd
|
||||
cmake -G "Visual Studio 15 Win64" ..
|
||||
```
|
||||
|
||||
To compile the Windows solution from the command-line:
|
||||
|
||||
```cmd
|
||||
devenv /build Debug leveldb.sln
|
||||
```
|
||||
|
||||
or open leveldb.sln in Visual Studio and build from within.
|
||||
|
||||
Please see the CMake documentation and `CMakeLists.txt` for more advanced usage.
|
||||
|
||||
# Contributing to the leveldb Project
|
||||
|
||||
The leveldb project welcomes contributions. leveldb's primary goal is to be
|
||||
a reliable and fast key/value store. Changes that are in line with the
|
||||
features/limitations outlined above, and meet the requirements below,
|
||||
will be considered.
|
||||
|
||||
Contribution requirements:
|
||||
|
||||
1. **Tested platforms only**. We _generally_ will only accept changes for
|
||||
platforms that are compiled and tested. This means POSIX (for Linux and
|
||||
macOS) or Windows. Very small changes will sometimes be accepted, but
|
||||
consider that more of an exception than the rule.
|
||||
|
||||
2. **Stable API**. We strive very hard to maintain a stable API. Changes that
|
||||
require changes for projects using leveldb _might_ be rejected without
|
||||
sufficient benefit to the project.
|
||||
|
||||
3. **Tests**: All changes must be accompanied by a new (or changed) test, or
|
||||
a sufficient explanation as to why a new (or changed) test is not required.
|
||||
|
||||
4. **Consistent Style**: This project conforms to the
|
||||
[Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
|
||||
To ensure your changes are properly formatted please run:
|
||||
|
||||
```
|
||||
clang-format -i --style=file <file>
|
||||
```
|
||||
|
||||
## Submitting a Pull Request
|
||||
|
||||
Before any pull request will be accepted the author must first sign a
|
||||
Contributor License Agreement (CLA) at https://cla.developers.google.com/.
|
||||
|
||||
In order to keep the commit timeline linear
|
||||
[squash](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Squashing-Commits)
|
||||
your changes down to a single commit and [rebase](https://git-scm.com/docs/git-rebase)
|
||||
on google/leveldb/master. This keeps the commit timeline linear and more easily sync'ed
|
||||
with the internal repository at Google. More information at GitHub's
|
||||
[About Git rebase](https://help.github.com/articles/about-git-rebase/) page.
|
||||
|
||||
# Performance
|
||||
|
||||
Here is a performance report (with explanations) from the run of the
|
||||
@ -78,29 +164,30 @@ by the one or two disk seeks needed to fetch the data from disk.
|
||||
Write performance will be mostly unaffected by whether or not the
|
||||
working set fits in memory.
|
||||
|
||||
readrandom : 16.677 micros/op; (approximately 60,000 reads per second)
|
||||
readseq : 0.476 micros/op; 232.3 MB/s
|
||||
readreverse : 0.724 micros/op; 152.9 MB/s
|
||||
readrandom : 16.677 micros/op; (approximately 60,000 reads per second)
|
||||
readseq : 0.476 micros/op; 232.3 MB/s
|
||||
readreverse : 0.724 micros/op; 152.9 MB/s
|
||||
|
||||
LevelDB compacts its underlying storage data in the background to
|
||||
improve read performance. The results listed above were done
|
||||
immediately after a lot of random writes. The results after
|
||||
compactions (which are usually triggered automatically) are better.
|
||||
|
||||
readrandom : 11.602 micros/op; (approximately 85,000 reads per second)
|
||||
readseq : 0.423 micros/op; 261.8 MB/s
|
||||
readreverse : 0.663 micros/op; 166.9 MB/s
|
||||
readrandom : 11.602 micros/op; (approximately 85,000 reads per second)
|
||||
readseq : 0.423 micros/op; 261.8 MB/s
|
||||
readreverse : 0.663 micros/op; 166.9 MB/s
|
||||
|
||||
Some of the high cost of reads comes from repeated decompression of blocks
|
||||
read from disk. If we supply enough cache to the leveldb so it can hold the
|
||||
uncompressed blocks in memory, the read performance improves again:
|
||||
|
||||
readrandom : 9.775 micros/op; (approximately 100,000 reads per second before compaction)
|
||||
readrandom : 5.215 micros/op; (approximately 190,000 reads per second after compaction)
|
||||
readrandom : 9.775 micros/op; (approximately 100,000 reads per second before compaction)
|
||||
readrandom : 5.215 micros/op; (approximately 190,000 reads per second after compaction)
|
||||
|
||||
## Repository contents
|
||||
|
||||
See doc/index.html for more explanation. See doc/impl.html for a brief overview of the implementation.
|
||||
See [doc/index.md](doc/index.md) for more explanation. See
|
||||
[doc/impl.md](doc/impl.md) for a brief overview of the implementation.
|
||||
|
||||
The public interface is in include/*.h. Callers should not include or
|
||||
rely on the details of any other header files in this package. Those
|
||||
@ -113,7 +200,7 @@ Guide to header files:
|
||||
* **include/options.h**: Control over the behavior of an entire database,
|
||||
and also control over the behavior of individual reads and writes.
|
||||
|
||||
* **include/comparator.h**: Abstraction for user-specified comparison function.
|
||||
* **include/comparator.h**: Abstraction for user-specified comparison function.
|
||||
If you want just bytewise comparison of keys, you can use the default
|
||||
comparator, but clients can write their own comparator implementations if they
|
||||
want custom ordering (e.g. to handle different character encodings, etc.)
|
||||
@ -130,7 +217,7 @@ length into some other byte array.
|
||||
* **include/status.h**: Status is returned from many of the public interfaces
|
||||
and is used to report success and various kinds of errors.
|
||||
|
||||
* **include/env.h**:
|
||||
* **include/env.h**:
|
||||
Abstraction of the OS environment. A posix implementation of this interface is
|
||||
in util/env_posix.cc
|
||||
|
||||
|
@ -2,14 +2,14 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "db/db_impl.h"
|
||||
#include "db/version_set.h"
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
#include "port/port.h"
|
||||
#include "util/crc32c.h"
|
||||
@ -35,7 +35,6 @@
|
||||
// seekrandom -- N random seeks
|
||||
// open -- cost of opening a DB
|
||||
// crc32c -- repeated crc32c of 4K of data
|
||||
// acquireload -- load N*1000 times
|
||||
// Meta operations:
|
||||
// compact -- Compact the entire DB
|
||||
// stats -- Print DB stats
|
||||
@ -57,9 +56,7 @@ static const char* FLAGS_benchmarks =
|
||||
"fill100K,"
|
||||
"crc32c,"
|
||||
"snappycomp,"
|
||||
"snappyuncomp,"
|
||||
"acquireload,"
|
||||
;
|
||||
"snappyuncomp,";
|
||||
|
||||
// Number of key/values to place in database
|
||||
static int FLAGS_num = 1000000;
|
||||
@ -84,6 +81,14 @@ static bool FLAGS_histogram = false;
|
||||
// (initialized to default value by "main")
|
||||
static int FLAGS_write_buffer_size = 0;
|
||||
|
||||
// Number of bytes written to each file.
|
||||
// (initialized to default value by "main")
|
||||
static int FLAGS_max_file_size = 0;
|
||||
|
||||
// Approximate size of user data packed per block (before compression.
|
||||
// (initialized to default value by "main")
|
||||
static int FLAGS_block_size = 0;
|
||||
|
||||
// Number of bytes to use as a cache of uncompressed data.
|
||||
// Negative means use default settings.
|
||||
static int FLAGS_cache_size = -1;
|
||||
@ -100,12 +105,16 @@ static int FLAGS_bloom_bits = -1;
|
||||
// benchmark will fail.
|
||||
static bool FLAGS_use_existing_db = false;
|
||||
|
||||
// If true, reuse existing log/MANIFEST files when re-opening a database.
|
||||
static bool FLAGS_reuse_logs = false;
|
||||
|
||||
// Use the db with the following name.
|
||||
static const char* FLAGS_db = NULL;
|
||||
static const char* FLAGS_db = nullptr;
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
namespace {
|
||||
leveldb::Env* g_env = nullptr;
|
||||
|
||||
// Helper for quickly generating random data.
|
||||
class RandomGenerator {
|
||||
@ -139,17 +148,19 @@ class RandomGenerator {
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__linux)
|
||||
static Slice TrimSpace(Slice s) {
|
||||
size_t start = 0;
|
||||
while (start < s.size() && isspace(s[start])) {
|
||||
start++;
|
||||
}
|
||||
size_t limit = s.size();
|
||||
while (limit > start && isspace(s[limit-1])) {
|
||||
while (limit > start && isspace(s[limit - 1])) {
|
||||
limit--;
|
||||
}
|
||||
return Slice(s.data() + start, limit - start);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void AppendWithSpace(std::string* str, Slice msg) {
|
||||
if (msg.empty()) return;
|
||||
@ -176,14 +187,12 @@ class Stats {
|
||||
|
||||
void Start() {
|
||||
next_report_ = 100;
|
||||
last_op_finish_ = start_;
|
||||
hist_.Clear();
|
||||
done_ = 0;
|
||||
bytes_ = 0;
|
||||
seconds_ = 0;
|
||||
start_ = Env::Default()->NowMicros();
|
||||
finish_ = start_;
|
||||
message_.clear();
|
||||
start_ = finish_ = last_op_finish_ = g_env->NowMicros();
|
||||
}
|
||||
|
||||
void Merge(const Stats& other) {
|
||||
@ -199,17 +208,15 @@ class Stats {
|
||||
}
|
||||
|
||||
void Stop() {
|
||||
finish_ = Env::Default()->NowMicros();
|
||||
finish_ = g_env->NowMicros();
|
||||
seconds_ = (finish_ - start_) * 1e-6;
|
||||
}
|
||||
|
||||
void AddMessage(Slice msg) {
|
||||
AppendWithSpace(&message_, msg);
|
||||
}
|
||||
void AddMessage(Slice msg) { AppendWithSpace(&message_, msg); }
|
||||
|
||||
void FinishedSingleOp() {
|
||||
if (FLAGS_histogram) {
|
||||
double now = Env::Default()->NowMicros();
|
||||
double now = g_env->NowMicros();
|
||||
double micros = now - last_op_finish_;
|
||||
hist_.Add(micros);
|
||||
if (micros > 20000) {
|
||||
@ -221,21 +228,26 @@ class Stats {
|
||||
|
||||
done_++;
|
||||
if (done_ >= next_report_) {
|
||||
if (next_report_ < 1000) next_report_ += 100;
|
||||
else if (next_report_ < 5000) next_report_ += 500;
|
||||
else if (next_report_ < 10000) next_report_ += 1000;
|
||||
else if (next_report_ < 50000) next_report_ += 5000;
|
||||
else if (next_report_ < 100000) next_report_ += 10000;
|
||||
else if (next_report_ < 500000) next_report_ += 50000;
|
||||
else next_report_ += 100000;
|
||||
if (next_report_ < 1000)
|
||||
next_report_ += 100;
|
||||
else if (next_report_ < 5000)
|
||||
next_report_ += 500;
|
||||
else if (next_report_ < 10000)
|
||||
next_report_ += 1000;
|
||||
else if (next_report_ < 50000)
|
||||
next_report_ += 5000;
|
||||
else if (next_report_ < 100000)
|
||||
next_report_ += 10000;
|
||||
else if (next_report_ < 500000)
|
||||
next_report_ += 50000;
|
||||
else
|
||||
next_report_ += 100000;
|
||||
fprintf(stderr, "... finished %d ops%30s\r", done_, "");
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
void AddBytes(int64_t n) {
|
||||
bytes_ += n;
|
||||
}
|
||||
void AddBytes(int64_t n) { bytes_ += n; }
|
||||
|
||||
void Report(const Slice& name) {
|
||||
// Pretend at least one op was done in case we are running a benchmark
|
||||
@ -254,11 +266,8 @@ class Stats {
|
||||
}
|
||||
AppendWithSpace(&extra, message_);
|
||||
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
|
||||
name.ToString().c_str(),
|
||||
seconds_ * 1e6 / done_,
|
||||
(extra.empty() ? "" : " "),
|
||||
extra.c_str());
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(),
|
||||
seconds_ * 1e6 / done_, (extra.empty() ? "" : " "), extra.c_str());
|
||||
if (FLAGS_histogram) {
|
||||
fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
|
||||
}
|
||||
@ -269,8 +278,8 @@ class Stats {
|
||||
// State shared by all concurrent executions of the same benchmark.
|
||||
struct SharedState {
|
||||
port::Mutex mu;
|
||||
port::CondVar cv;
|
||||
int total;
|
||||
port::CondVar cv GUARDED_BY(mu);
|
||||
int total GUARDED_BY(mu);
|
||||
|
||||
// Each thread goes through the following states:
|
||||
// (1) initializing
|
||||
@ -278,24 +287,22 @@ struct SharedState {
|
||||
// (3) running
|
||||
// (4) done
|
||||
|
||||
int num_initialized;
|
||||
int num_done;
|
||||
bool start;
|
||||
int num_initialized GUARDED_BY(mu);
|
||||
int num_done GUARDED_BY(mu);
|
||||
bool start GUARDED_BY(mu);
|
||||
|
||||
SharedState() : cv(&mu) { }
|
||||
SharedState(int total)
|
||||
: cv(&mu), total(total), num_initialized(0), num_done(0), start(false) {}
|
||||
};
|
||||
|
||||
// Per-thread state for concurrent executions of the same benchmark.
|
||||
struct ThreadState {
|
||||
int tid; // 0..n-1 when running in n threads
|
||||
Random rand; // Has different seeds for different threads
|
||||
int tid; // 0..n-1 when running in n threads
|
||||
Random rand; // Has different seeds for different threads
|
||||
Stats stats;
|
||||
SharedState* shared;
|
||||
|
||||
ThreadState(int index)
|
||||
: tid(index),
|
||||
rand(1000 + index) {
|
||||
}
|
||||
ThreadState(int index) : tid(index), rand(1000 + index), shared(nullptr) {}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -321,20 +328,20 @@ class Benchmark {
|
||||
static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
|
||||
fprintf(stdout, "Entries: %d\n", num_);
|
||||
fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
|
||||
/ 1048576.0));
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) /
|
||||
1048576.0));
|
||||
fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
|
||||
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
|
||||
/ 1048576.0));
|
||||
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) /
|
||||
1048576.0));
|
||||
PrintWarnings();
|
||||
fprintf(stdout, "------------------------------------------------\n");
|
||||
}
|
||||
|
||||
void PrintWarnings() {
|
||||
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
|
||||
fprintf(stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
|
||||
);
|
||||
fprintf(
|
||||
stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
|
||||
#endif
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout,
|
||||
@ -352,22 +359,22 @@ class Benchmark {
|
||||
}
|
||||
|
||||
void PrintEnvironment() {
|
||||
fprintf(stderr, "LevelDB: version %d.%d\n",
|
||||
kMajorVersion, kMinorVersion);
|
||||
fprintf(stderr, "LevelDB: version %d.%d\n", kMajorVersion,
|
||||
kMinorVersion);
|
||||
|
||||
#if defined(__linux)
|
||||
time_t now = time(NULL);
|
||||
time_t now = time(nullptr);
|
||||
fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
|
||||
|
||||
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
|
||||
if (cpuinfo != NULL) {
|
||||
if (cpuinfo != nullptr) {
|
||||
char line[1000];
|
||||
int num_cpus = 0;
|
||||
std::string cpu_type;
|
||||
std::string cache_size;
|
||||
while (fgets(line, sizeof(line), cpuinfo) != NULL) {
|
||||
while (fgets(line, sizeof(line), cpuinfo) != nullptr) {
|
||||
const char* sep = strchr(line, ':');
|
||||
if (sep == NULL) {
|
||||
if (sep == nullptr) {
|
||||
continue;
|
||||
}
|
||||
Slice key = TrimSpace(Slice(line, sep - 1 - line));
|
||||
@ -388,21 +395,21 @@ class Benchmark {
|
||||
|
||||
public:
|
||||
Benchmark()
|
||||
: cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
|
||||
filter_policy_(FLAGS_bloom_bits >= 0
|
||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
||||
: NULL),
|
||||
db_(NULL),
|
||||
num_(FLAGS_num),
|
||||
value_size_(FLAGS_value_size),
|
||||
entries_per_batch_(1),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
heap_counter_(0) {
|
||||
: cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : nullptr),
|
||||
filter_policy_(FLAGS_bloom_bits >= 0
|
||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
||||
: nullptr),
|
||||
db_(nullptr),
|
||||
num_(FLAGS_num),
|
||||
value_size_(FLAGS_value_size),
|
||||
entries_per_batch_(1),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
heap_counter_(0) {
|
||||
std::vector<std::string> files;
|
||||
Env::Default()->GetChildren(FLAGS_db, &files);
|
||||
g_env->GetChildren(FLAGS_db, &files);
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
if (Slice(files[i]).starts_with("heap-")) {
|
||||
Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
|
||||
g_env->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
|
||||
}
|
||||
}
|
||||
if (!FLAGS_use_existing_db) {
|
||||
@ -421,12 +428,12 @@ class Benchmark {
|
||||
Open();
|
||||
|
||||
const char* benchmarks = FLAGS_benchmarks;
|
||||
while (benchmarks != NULL) {
|
||||
while (benchmarks != nullptr) {
|
||||
const char* sep = strchr(benchmarks, ',');
|
||||
Slice name;
|
||||
if (sep == NULL) {
|
||||
if (sep == nullptr) {
|
||||
name = benchmarks;
|
||||
benchmarks = NULL;
|
||||
benchmarks = nullptr;
|
||||
} else {
|
||||
name = Slice(benchmarks, sep - benchmarks);
|
||||
benchmarks = sep + 1;
|
||||
@ -439,7 +446,7 @@ class Benchmark {
|
||||
entries_per_batch_ = 1;
|
||||
write_options_ = WriteOptions();
|
||||
|
||||
void (Benchmark::*method)(ThreadState*) = NULL;
|
||||
void (Benchmark::*method)(ThreadState*) = nullptr;
|
||||
bool fresh_db = false;
|
||||
int num_threads = FLAGS_threads;
|
||||
|
||||
@ -496,8 +503,6 @@ class Benchmark {
|
||||
method = &Benchmark::Compact;
|
||||
} else if (name == Slice("crc32c")) {
|
||||
method = &Benchmark::Crc32c;
|
||||
} else if (name == Slice("acquireload")) {
|
||||
method = &Benchmark::AcquireLoad;
|
||||
} else if (name == Slice("snappycomp")) {
|
||||
method = &Benchmark::SnappyCompress;
|
||||
} else if (name == Slice("snappyuncomp")) {
|
||||
@ -509,7 +514,7 @@ class Benchmark {
|
||||
} else if (name == Slice("sstables")) {
|
||||
PrintStats("leveldb.sstables");
|
||||
} else {
|
||||
if (name != Slice()) { // No error message for empty name
|
||||
if (!name.empty()) { // No error message for empty name
|
||||
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
|
||||
}
|
||||
}
|
||||
@ -518,16 +523,16 @@ class Benchmark {
|
||||
if (FLAGS_use_existing_db) {
|
||||
fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n",
|
||||
name.ToString().c_str());
|
||||
method = NULL;
|
||||
method = nullptr;
|
||||
} else {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
DestroyDB(FLAGS_db, Options());
|
||||
Open();
|
||||
}
|
||||
}
|
||||
|
||||
if (method != NULL) {
|
||||
if (method != nullptr) {
|
||||
RunBenchmark(num_threads, name, method);
|
||||
}
|
||||
}
|
||||
@ -571,11 +576,7 @@ class Benchmark {
|
||||
|
||||
void RunBenchmark(int n, Slice name,
|
||||
void (Benchmark::*method)(ThreadState*)) {
|
||||
SharedState shared;
|
||||
shared.total = n;
|
||||
shared.num_initialized = 0;
|
||||
shared.num_done = 0;
|
||||
shared.start = false;
|
||||
SharedState shared(n);
|
||||
|
||||
ThreadArg* arg = new ThreadArg[n];
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -584,7 +585,7 @@ class Benchmark {
|
||||
arg[i].shared = &shared;
|
||||
arg[i].thread = new ThreadState(i);
|
||||
arg[i].thread->shared = &shared;
|
||||
Env::Default()->StartThread(ThreadBody, &arg[i]);
|
||||
g_env->StartThread(ThreadBody, &arg[i]);
|
||||
}
|
||||
|
||||
shared.mu.Lock();
|
||||
@ -629,22 +630,6 @@ class Benchmark {
|
||||
thread->stats.AddMessage(label);
|
||||
}
|
||||
|
||||
void AcquireLoad(ThreadState* thread) {
|
||||
int dummy;
|
||||
port::AtomicPointer ap(&dummy);
|
||||
int count = 0;
|
||||
void *ptr = NULL;
|
||||
thread->stats.AddMessage("(each op is 1000 loads)");
|
||||
while (count < 100000) {
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
ptr = ap.Acquire_Load();
|
||||
}
|
||||
count++;
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
if (ptr == NULL) exit(1); // Disable unused variable warning.
|
||||
}
|
||||
|
||||
void SnappyCompress(ThreadState* thread) {
|
||||
RandomGenerator gen;
|
||||
Slice input = gen.Generate(Options().block_size);
|
||||
@ -678,8 +663,8 @@ class Benchmark {
|
||||
int64_t bytes = 0;
|
||||
char* uncompressed = new char[input.size()];
|
||||
while (ok && bytes < 1024 * 1048576) { // Compress 1G
|
||||
ok = port::Snappy_Uncompress(compressed.data(), compressed.size(),
|
||||
uncompressed);
|
||||
ok = port::Snappy_Uncompress(compressed.data(), compressed.size(),
|
||||
uncompressed);
|
||||
bytes += input.size();
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
@ -693,13 +678,17 @@ class Benchmark {
|
||||
}
|
||||
|
||||
void Open() {
|
||||
assert(db_ == NULL);
|
||||
assert(db_ == nullptr);
|
||||
Options options;
|
||||
options.env = g_env;
|
||||
options.create_if_missing = !FLAGS_use_existing_db;
|
||||
options.block_cache = cache_;
|
||||
options.write_buffer_size = FLAGS_write_buffer_size;
|
||||
options.max_file_size = FLAGS_max_file_size;
|
||||
options.block_size = FLAGS_block_size;
|
||||
options.max_open_files = FLAGS_open_files;
|
||||
options.filter_policy = filter_policy_;
|
||||
options.reuse_logs = FLAGS_reuse_logs;
|
||||
Status s = DB::Open(options, FLAGS_db, &db_);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
|
||||
@ -715,13 +704,9 @@ class Benchmark {
|
||||
}
|
||||
}
|
||||
|
||||
void WriteSeq(ThreadState* thread) {
|
||||
DoWrite(thread, true);
|
||||
}
|
||||
void WriteSeq(ThreadState* thread) { DoWrite(thread, true); }
|
||||
|
||||
void WriteRandom(ThreadState* thread) {
|
||||
DoWrite(thread, false);
|
||||
}
|
||||
void WriteRandom(ThreadState* thread) { DoWrite(thread, false); }
|
||||
|
||||
void DoWrite(ThreadState* thread, bool seq) {
|
||||
if (num_ != FLAGS_num) {
|
||||
@ -737,7 +722,7 @@ class Benchmark {
|
||||
for (int i = 0; i < num_; i += entries_per_batch_) {
|
||||
batch.Clear();
|
||||
for (int j = 0; j < entries_per_batch_; j++) {
|
||||
const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
|
||||
const int k = seq ? i + j : (thread->rand.Next() % FLAGS_num);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
batch.Put(key, gen.Generate(value_size_));
|
||||
@ -847,7 +832,7 @@ class Benchmark {
|
||||
for (int i = 0; i < num_; i += entries_per_batch_) {
|
||||
batch.Clear();
|
||||
for (int j = 0; j < entries_per_batch_; j++) {
|
||||
const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
|
||||
const int k = seq ? i + j : (thread->rand.Next() % FLAGS_num);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
batch.Delete(key);
|
||||
@ -861,13 +846,9 @@ class Benchmark {
|
||||
}
|
||||
}
|
||||
|
||||
void DeleteSeq(ThreadState* thread) {
|
||||
DoDelete(thread, true);
|
||||
}
|
||||
void DeleteSeq(ThreadState* thread) { DoDelete(thread, true); }
|
||||
|
||||
void DeleteRandom(ThreadState* thread) {
|
||||
DoDelete(thread, false);
|
||||
}
|
||||
void DeleteRandom(ThreadState* thread) { DoDelete(thread, false); }
|
||||
|
||||
void ReadWhileWriting(ThreadState* thread) {
|
||||
if (thread->tid > 0) {
|
||||
@ -899,9 +880,7 @@ class Benchmark {
|
||||
}
|
||||
}
|
||||
|
||||
void Compact(ThreadState* thread) {
|
||||
db_->CompactRange(NULL, NULL);
|
||||
}
|
||||
void Compact(ThreadState* thread) { db_->CompactRange(nullptr, nullptr); }
|
||||
|
||||
void PrintStats(const char* key) {
|
||||
std::string stats;
|
||||
@ -919,7 +898,7 @@ class Benchmark {
|
||||
char fname[100];
|
||||
snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_);
|
||||
WritableFile* file;
|
||||
Status s = Env::Default()->NewWritableFile(fname, &file);
|
||||
Status s = g_env->NewWritableFile(fname, &file);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
return;
|
||||
@ -928,7 +907,7 @@ class Benchmark {
|
||||
delete file;
|
||||
if (!ok) {
|
||||
fprintf(stderr, "heap profiling not supported\n");
|
||||
Env::Default()->DeleteFile(fname);
|
||||
g_env->DeleteFile(fname);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -937,6 +916,8 @@ class Benchmark {
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
|
||||
FLAGS_max_file_size = leveldb::Options().max_file_size;
|
||||
FLAGS_block_size = leveldb::Options().block_size;
|
||||
FLAGS_open_files = leveldb::Options().max_open_files;
|
||||
std::string default_db_path;
|
||||
|
||||
@ -954,6 +935,9 @@ int main(int argc, char** argv) {
|
||||
} else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_use_existing_db = n;
|
||||
} else if (sscanf(argv[i], "--reuse_logs=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_reuse_logs = n;
|
||||
} else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_num = n;
|
||||
} else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
|
||||
@ -964,6 +948,10 @@ int main(int argc, char** argv) {
|
||||
FLAGS_value_size = n;
|
||||
} else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_write_buffer_size = n;
|
||||
} else if (sscanf(argv[i], "--max_file_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_max_file_size = n;
|
||||
} else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_block_size = n;
|
||||
} else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_cache_size = n;
|
||||
} else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
|
||||
@ -978,11 +966,13 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
leveldb::g_env = leveldb::Env::Default();
|
||||
|
||||
// Choose a location for the test database if none given with --db=<path>
|
||||
if (FLAGS_db == NULL) {
|
||||
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
if (FLAGS_db == nullptr) {
|
||||
leveldb::g_env->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
}
|
||||
|
||||
leveldb::Benchmark benchmark;
|
@ -2,9 +2,10 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <sqlite3.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sqlite3.h>
|
||||
|
||||
#include "util/histogram.h"
|
||||
#include "util/random.h"
|
||||
#include "util/testutil.h"
|
||||
@ -38,8 +39,7 @@ static const char* FLAGS_benchmarks =
|
||||
"fillrand100K,"
|
||||
"fillseq100K,"
|
||||
"readseq,"
|
||||
"readrand100K,"
|
||||
;
|
||||
"readrand100K,";
|
||||
|
||||
// Number of key/values to place in database
|
||||
static int FLAGS_num = 1000000;
|
||||
@ -76,10 +76,9 @@ static bool FLAGS_transaction = true;
|
||||
static bool FLAGS_WAL_enabled = true;
|
||||
|
||||
// Use the db with the following name.
|
||||
static const char* FLAGS_db = NULL;
|
||||
static const char* FLAGS_db = nullptr;
|
||||
|
||||
inline
|
||||
static void ExecErrorCheck(int status, char *err_msg) {
|
||||
inline static void ExecErrorCheck(int status, char* err_msg) {
|
||||
if (status != SQLITE_OK) {
|
||||
fprintf(stderr, "SQL error: %s\n", err_msg);
|
||||
sqlite3_free(err_msg);
|
||||
@ -87,27 +86,25 @@ static void ExecErrorCheck(int status, char *err_msg) {
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
static void StepErrorCheck(int status) {
|
||||
inline static void StepErrorCheck(int status) {
|
||||
if (status != SQLITE_DONE) {
|
||||
fprintf(stderr, "SQL step error: status = %d\n", status);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
static void ErrorCheck(int status) {
|
||||
inline static void ErrorCheck(int status) {
|
||||
if (status != SQLITE_OK) {
|
||||
fprintf(stderr, "sqlite3 error: status = %d\n", status);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
static void WalCheckpoint(sqlite3* db_) {
|
||||
inline static void WalCheckpoint(sqlite3* db_) {
|
||||
// Flush all writes to disk
|
||||
if (FLAGS_WAL_enabled) {
|
||||
sqlite3_wal_checkpoint_v2(db_, NULL, SQLITE_CHECKPOINT_FULL, NULL, NULL);
|
||||
sqlite3_wal_checkpoint_v2(db_, nullptr, SQLITE_CHECKPOINT_FULL, nullptr,
|
||||
nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,7 +149,7 @@ static Slice TrimSpace(Slice s) {
|
||||
start++;
|
||||
}
|
||||
int limit = s.size();
|
||||
while (limit > start && isspace(s[limit-1])) {
|
||||
while (limit > start && isspace(s[limit - 1])) {
|
||||
limit--;
|
||||
}
|
||||
return Slice(s.data() + start, limit - start);
|
||||
@ -176,7 +173,7 @@ class Benchmark {
|
||||
|
||||
// State kept for progress messages
|
||||
int done_;
|
||||
int next_report_; // When to report next
|
||||
int next_report_; // When to report next
|
||||
|
||||
void PrintHeader() {
|
||||
const int kKeySize = 16;
|
||||
@ -185,17 +182,17 @@ class Benchmark {
|
||||
fprintf(stdout, "Values: %d bytes each\n", FLAGS_value_size);
|
||||
fprintf(stdout, "Entries: %d\n", num_);
|
||||
fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
|
||||
/ 1048576.0));
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) /
|
||||
1048576.0));
|
||||
PrintWarnings();
|
||||
fprintf(stdout, "------------------------------------------------\n");
|
||||
}
|
||||
|
||||
void PrintWarnings() {
|
||||
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
|
||||
fprintf(stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
|
||||
);
|
||||
fprintf(
|
||||
stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
|
||||
#endif
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout,
|
||||
@ -207,18 +204,18 @@ class Benchmark {
|
||||
fprintf(stderr, "SQLite: version %s\n", SQLITE_VERSION);
|
||||
|
||||
#if defined(__linux)
|
||||
time_t now = time(NULL);
|
||||
time_t now = time(nullptr);
|
||||
fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
|
||||
|
||||
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
|
||||
if (cpuinfo != NULL) {
|
||||
if (cpuinfo != nullptr) {
|
||||
char line[1000];
|
||||
int num_cpus = 0;
|
||||
std::string cpu_type;
|
||||
std::string cache_size;
|
||||
while (fgets(line, sizeof(line), cpuinfo) != NULL) {
|
||||
while (fgets(line, sizeof(line), cpuinfo) != nullptr) {
|
||||
const char* sep = strchr(line, ':');
|
||||
if (sep == NULL) {
|
||||
if (sep == nullptr) {
|
||||
continue;
|
||||
}
|
||||
Slice key = TrimSpace(Slice(line, sep - 1 - line));
|
||||
@ -261,13 +258,20 @@ class Benchmark {
|
||||
|
||||
done_++;
|
||||
if (done_ >= next_report_) {
|
||||
if (next_report_ < 1000) next_report_ += 100;
|
||||
else if (next_report_ < 5000) next_report_ += 500;
|
||||
else if (next_report_ < 10000) next_report_ += 1000;
|
||||
else if (next_report_ < 50000) next_report_ += 5000;
|
||||
else if (next_report_ < 100000) next_report_ += 10000;
|
||||
else if (next_report_ < 500000) next_report_ += 50000;
|
||||
else next_report_ += 100000;
|
||||
if (next_report_ < 1000)
|
||||
next_report_ += 100;
|
||||
else if (next_report_ < 5000)
|
||||
next_report_ += 500;
|
||||
else if (next_report_ < 10000)
|
||||
next_report_ += 1000;
|
||||
else if (next_report_ < 50000)
|
||||
next_report_ += 5000;
|
||||
else if (next_report_ < 100000)
|
||||
next_report_ += 10000;
|
||||
else if (next_report_ < 500000)
|
||||
next_report_ += 50000;
|
||||
else
|
||||
next_report_ += 100000;
|
||||
fprintf(stderr, "... finished %d ops%30s\r", done_, "");
|
||||
fflush(stderr);
|
||||
}
|
||||
@ -285,16 +289,14 @@ class Benchmark {
|
||||
snprintf(rate, sizeof(rate), "%6.1f MB/s",
|
||||
(bytes_ / 1048576.0) / (finish - start_));
|
||||
if (!message_.empty()) {
|
||||
message_ = std::string(rate) + " " + message_;
|
||||
message_ = std::string(rate) + " " + message_;
|
||||
} else {
|
||||
message_ = rate;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
|
||||
name.ToString().c_str(),
|
||||
(finish - start_) * 1e6 / done_,
|
||||
(message_.empty() ? "" : " "),
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(),
|
||||
(finish - start_) * 1e6 / done_, (message_.empty() ? "" : " "),
|
||||
message_.c_str());
|
||||
if (FLAGS_histogram) {
|
||||
fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
|
||||
@ -303,22 +305,16 @@ class Benchmark {
|
||||
}
|
||||
|
||||
public:
|
||||
enum Order {
|
||||
SEQUENTIAL,
|
||||
RANDOM
|
||||
};
|
||||
enum DBState {
|
||||
FRESH,
|
||||
EXISTING
|
||||
};
|
||||
enum Order { SEQUENTIAL, RANDOM };
|
||||
enum DBState { FRESH, EXISTING };
|
||||
|
||||
Benchmark()
|
||||
: db_(NULL),
|
||||
db_num_(0),
|
||||
num_(FLAGS_num),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
bytes_(0),
|
||||
rand_(301) {
|
||||
: db_(nullptr),
|
||||
db_num_(0),
|
||||
num_(FLAGS_num),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
bytes_(0),
|
||||
rand_(301) {
|
||||
std::vector<std::string> files;
|
||||
std::string test_dir;
|
||||
Env::Default()->GetTestDirectory(&test_dir);
|
||||
@ -345,12 +341,12 @@ class Benchmark {
|
||||
Open();
|
||||
|
||||
const char* benchmarks = FLAGS_benchmarks;
|
||||
while (benchmarks != NULL) {
|
||||
while (benchmarks != nullptr) {
|
||||
const char* sep = strchr(benchmarks, ',');
|
||||
Slice name;
|
||||
if (sep == NULL) {
|
||||
if (sep == nullptr) {
|
||||
name = benchmarks;
|
||||
benchmarks = NULL;
|
||||
benchmarks = nullptr;
|
||||
} else {
|
||||
name = Slice(benchmarks, sep - benchmarks);
|
||||
benchmarks = sep + 1;
|
||||
@ -415,20 +411,18 @@ class Benchmark {
|
||||
}
|
||||
|
||||
void Open() {
|
||||
assert(db_ == NULL);
|
||||
assert(db_ == nullptr);
|
||||
|
||||
int status;
|
||||
char file_name[100];
|
||||
char* err_msg = NULL;
|
||||
char* err_msg = nullptr;
|
||||
db_num_++;
|
||||
|
||||
// Open database
|
||||
std::string tmp_dir;
|
||||
Env::Default()->GetTestDirectory(&tmp_dir);
|
||||
snprintf(file_name, sizeof(file_name),
|
||||
"%s/dbbench_sqlite3-%d.db",
|
||||
tmp_dir.c_str(),
|
||||
db_num_);
|
||||
snprintf(file_name, sizeof(file_name), "%s/dbbench_sqlite3-%d.db",
|
||||
tmp_dir.c_str(), db_num_);
|
||||
status = sqlite3_open(file_name, &db_);
|
||||
if (status) {
|
||||
fprintf(stderr, "open error: %s\n", sqlite3_errmsg(db_));
|
||||
@ -439,7 +433,7 @@ class Benchmark {
|
||||
char cache_size[100];
|
||||
snprintf(cache_size, sizeof(cache_size), "PRAGMA cache_size = %d",
|
||||
FLAGS_num_pages);
|
||||
status = sqlite3_exec(db_, cache_size, NULL, NULL, &err_msg);
|
||||
status = sqlite3_exec(db_, cache_size, nullptr, nullptr, &err_msg);
|
||||
ExecErrorCheck(status, err_msg);
|
||||
|
||||
// FLAGS_page_size is defaulted to 1024
|
||||
@ -447,7 +441,7 @@ class Benchmark {
|
||||
char page_size[100];
|
||||
snprintf(page_size, sizeof(page_size), "PRAGMA page_size = %d",
|
||||
FLAGS_page_size);
|
||||
status = sqlite3_exec(db_, page_size, NULL, NULL, &err_msg);
|
||||
status = sqlite3_exec(db_, page_size, nullptr, nullptr, &err_msg);
|
||||
ExecErrorCheck(status, err_msg);
|
||||
}
|
||||
|
||||
@ -457,26 +451,28 @@ class Benchmark {
|
||||
|
||||
// LevelDB's default cache size is a combined 4 MB
|
||||
std::string WAL_checkpoint = "PRAGMA wal_autocheckpoint = 4096";
|
||||
status = sqlite3_exec(db_, WAL_stmt.c_str(), NULL, NULL, &err_msg);
|
||||
status = sqlite3_exec(db_, WAL_stmt.c_str(), nullptr, nullptr, &err_msg);
|
||||
ExecErrorCheck(status, err_msg);
|
||||
status = sqlite3_exec(db_, WAL_checkpoint.c_str(), NULL, NULL, &err_msg);
|
||||
status =
|
||||
sqlite3_exec(db_, WAL_checkpoint.c_str(), nullptr, nullptr, &err_msg);
|
||||
ExecErrorCheck(status, err_msg);
|
||||
}
|
||||
|
||||
// Change locking mode to exclusive and create tables/index for database
|
||||
std::string locking_stmt = "PRAGMA locking_mode = EXCLUSIVE";
|
||||
std::string create_stmt =
|
||||
"CREATE TABLE test (key blob, value blob, PRIMARY KEY(key))";
|
||||
std::string stmt_array[] = { locking_stmt, create_stmt };
|
||||
"CREATE TABLE test (key blob, value blob, PRIMARY KEY(key))";
|
||||
std::string stmt_array[] = {locking_stmt, create_stmt};
|
||||
int stmt_array_length = sizeof(stmt_array) / sizeof(std::string);
|
||||
for (int i = 0; i < stmt_array_length; i++) {
|
||||
status = sqlite3_exec(db_, stmt_array[i].c_str(), NULL, NULL, &err_msg);
|
||||
status =
|
||||
sqlite3_exec(db_, stmt_array[i].c_str(), nullptr, nullptr, &err_msg);
|
||||
ExecErrorCheck(status, err_msg);
|
||||
}
|
||||
}
|
||||
|
||||
void Write(bool write_sync, Order order, DBState state,
|
||||
int num_entries, int value_size, int entries_per_batch) {
|
||||
void Write(bool write_sync, Order order, DBState state, int num_entries,
|
||||
int value_size, int entries_per_batch) {
|
||||
// Create new database if state == FRESH
|
||||
if (state == FRESH) {
|
||||
if (FLAGS_use_existing_db) {
|
||||
@ -484,7 +480,7 @@ class Benchmark {
|
||||
return;
|
||||
}
|
||||
sqlite3_close(db_);
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
Open();
|
||||
Start();
|
||||
}
|
||||
@ -495,7 +491,7 @@ class Benchmark {
|
||||
message_ = msg;
|
||||
}
|
||||
|
||||
char* err_msg = NULL;
|
||||
char* err_msg = nullptr;
|
||||
int status;
|
||||
|
||||
sqlite3_stmt *replace_stmt, *begin_trans_stmt, *end_trans_stmt;
|
||||
@ -504,20 +500,20 @@ class Benchmark {
|
||||
std::string end_trans_str = "END TRANSACTION;";
|
||||
|
||||
// Check for synchronous flag in options
|
||||
std::string sync_stmt = (write_sync) ? "PRAGMA synchronous = FULL" :
|
||||
"PRAGMA synchronous = OFF";
|
||||
status = sqlite3_exec(db_, sync_stmt.c_str(), NULL, NULL, &err_msg);
|
||||
std::string sync_stmt =
|
||||
(write_sync) ? "PRAGMA synchronous = FULL" : "PRAGMA synchronous = OFF";
|
||||
status = sqlite3_exec(db_, sync_stmt.c_str(), nullptr, nullptr, &err_msg);
|
||||
ExecErrorCheck(status, err_msg);
|
||||
|
||||
// Preparing sqlite3 statements
|
||||
status = sqlite3_prepare_v2(db_, replace_str.c_str(), -1,
|
||||
&replace_stmt, NULL);
|
||||
status = sqlite3_prepare_v2(db_, replace_str.c_str(), -1, &replace_stmt,
|
||||
nullptr);
|
||||
ErrorCheck(status);
|
||||
status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1,
|
||||
&begin_trans_stmt, NULL);
|
||||
&begin_trans_stmt, nullptr);
|
||||
ErrorCheck(status);
|
||||
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1,
|
||||
&end_trans_stmt, NULL);
|
||||
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, &end_trans_stmt,
|
||||
nullptr);
|
||||
ErrorCheck(status);
|
||||
|
||||
bool transaction = (entries_per_batch > 1);
|
||||
@ -535,16 +531,16 @@ class Benchmark {
|
||||
const char* value = gen_.Generate(value_size).data();
|
||||
|
||||
// Create values for key-value pair
|
||||
const int k = (order == SEQUENTIAL) ? i + j :
|
||||
(rand_.Next() % num_entries);
|
||||
const int k =
|
||||
(order == SEQUENTIAL) ? i + j : (rand_.Next() % num_entries);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
|
||||
// Bind KV values into replace_stmt
|
||||
status = sqlite3_bind_blob(replace_stmt, 1, key, 16, SQLITE_STATIC);
|
||||
ErrorCheck(status);
|
||||
status = sqlite3_bind_blob(replace_stmt, 2, value,
|
||||
value_size, SQLITE_STATIC);
|
||||
status = sqlite3_bind_blob(replace_stmt, 2, value, value_size,
|
||||
SQLITE_STATIC);
|
||||
ErrorCheck(status);
|
||||
|
||||
// Execute replace_stmt
|
||||
@ -588,12 +584,12 @@ class Benchmark {
|
||||
|
||||
// Preparing sqlite3 statements
|
||||
status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1,
|
||||
&begin_trans_stmt, NULL);
|
||||
&begin_trans_stmt, nullptr);
|
||||
ErrorCheck(status);
|
||||
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1,
|
||||
&end_trans_stmt, NULL);
|
||||
status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, &end_trans_stmt,
|
||||
nullptr);
|
||||
ErrorCheck(status);
|
||||
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &read_stmt, NULL);
|
||||
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &read_stmt, nullptr);
|
||||
ErrorCheck(status);
|
||||
|
||||
bool transaction = (entries_per_batch > 1);
|
||||
@ -618,7 +614,8 @@ class Benchmark {
|
||||
ErrorCheck(status);
|
||||
|
||||
// Execute read statement
|
||||
while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {}
|
||||
while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {
|
||||
}
|
||||
StepErrorCheck(status);
|
||||
|
||||
// Reset SQLite statement for another use
|
||||
@ -648,10 +645,10 @@ class Benchmark {
|
||||
|
||||
void ReadSequential() {
|
||||
int status;
|
||||
sqlite3_stmt *pStmt;
|
||||
sqlite3_stmt* pStmt;
|
||||
std::string read_str = "SELECT * FROM test ORDER BY key";
|
||||
|
||||
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &pStmt, NULL);
|
||||
status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &pStmt, nullptr);
|
||||
ErrorCheck(status);
|
||||
for (int i = 0; i < reads_ && SQLITE_ROW == sqlite3_step(pStmt); i++) {
|
||||
bytes_ += sqlite3_column_bytes(pStmt, 1) + sqlite3_column_bytes(pStmt, 2);
|
||||
@ -661,7 +658,6 @@ class Benchmark {
|
||||
status = sqlite3_finalize(pStmt);
|
||||
ErrorCheck(status);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
@ -706,10 +702,10 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
// Choose a location for the test database if none given with --db=<path>
|
||||
if (FLAGS_db == NULL) {
|
||||
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
if (FLAGS_db == nullptr) {
|
||||
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
}
|
||||
|
||||
leveldb::Benchmark benchmark;
|
@ -2,9 +2,10 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <kcpolydb.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <kcpolydb.h>
|
||||
|
||||
#include "util/histogram.h"
|
||||
#include "util/random.h"
|
||||
#include "util/testutil.h"
|
||||
@ -34,8 +35,7 @@ static const char* FLAGS_benchmarks =
|
||||
"fillrand100K,"
|
||||
"fillseq100K,"
|
||||
"readseq100K,"
|
||||
"readrand100K,"
|
||||
;
|
||||
"readrand100K,";
|
||||
|
||||
// Number of key/values to place in database
|
||||
static int FLAGS_num = 1000000;
|
||||
@ -69,11 +69,9 @@ static bool FLAGS_use_existing_db = false;
|
||||
static bool FLAGS_compression = true;
|
||||
|
||||
// Use the db with the following name.
|
||||
static const char* FLAGS_db = NULL;
|
||||
static const char* FLAGS_db = nullptr;
|
||||
|
||||
inline
|
||||
static void DBSynchronize(kyotocabinet::TreeDB* db_)
|
||||
{
|
||||
inline static void DBSynchronize(kyotocabinet::TreeDB* db_) {
|
||||
// Synchronize will flush writes to disk
|
||||
if (!db_->synchronize()) {
|
||||
fprintf(stderr, "synchronize error: %s\n", db_->error().name());
|
||||
@ -121,7 +119,7 @@ static Slice TrimSpace(Slice s) {
|
||||
start++;
|
||||
}
|
||||
int limit = s.size();
|
||||
while (limit > start && isspace(s[limit-1])) {
|
||||
while (limit > start && isspace(s[limit - 1])) {
|
||||
limit--;
|
||||
}
|
||||
return Slice(s.data() + start, limit - start);
|
||||
@ -146,7 +144,7 @@ class Benchmark {
|
||||
|
||||
// State kept for progress messages
|
||||
int done_;
|
||||
int next_report_; // When to report next
|
||||
int next_report_; // When to report next
|
||||
|
||||
void PrintHeader() {
|
||||
const int kKeySize = 16;
|
||||
@ -157,20 +155,20 @@ class Benchmark {
|
||||
static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
|
||||
fprintf(stdout, "Entries: %d\n", num_);
|
||||
fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
|
||||
/ 1048576.0));
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) /
|
||||
1048576.0));
|
||||
fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
|
||||
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
|
||||
/ 1048576.0));
|
||||
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) /
|
||||
1048576.0));
|
||||
PrintWarnings();
|
||||
fprintf(stdout, "------------------------------------------------\n");
|
||||
}
|
||||
|
||||
void PrintWarnings() {
|
||||
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
|
||||
fprintf(stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
|
||||
);
|
||||
fprintf(
|
||||
stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
|
||||
#endif
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout,
|
||||
@ -183,18 +181,18 @@ class Benchmark {
|
||||
kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV);
|
||||
|
||||
#if defined(__linux)
|
||||
time_t now = time(NULL);
|
||||
time_t now = time(nullptr);
|
||||
fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
|
||||
|
||||
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
|
||||
if (cpuinfo != NULL) {
|
||||
if (cpuinfo != nullptr) {
|
||||
char line[1000];
|
||||
int num_cpus = 0;
|
||||
std::string cpu_type;
|
||||
std::string cache_size;
|
||||
while (fgets(line, sizeof(line), cpuinfo) != NULL) {
|
||||
while (fgets(line, sizeof(line), cpuinfo) != nullptr) {
|
||||
const char* sep = strchr(line, ':');
|
||||
if (sep == NULL) {
|
||||
if (sep == nullptr) {
|
||||
continue;
|
||||
}
|
||||
Slice key = TrimSpace(Slice(line, sep - 1 - line));
|
||||
@ -237,13 +235,20 @@ class Benchmark {
|
||||
|
||||
done_++;
|
||||
if (done_ >= next_report_) {
|
||||
if (next_report_ < 1000) next_report_ += 100;
|
||||
else if (next_report_ < 5000) next_report_ += 500;
|
||||
else if (next_report_ < 10000) next_report_ += 1000;
|
||||
else if (next_report_ < 50000) next_report_ += 5000;
|
||||
else if (next_report_ < 100000) next_report_ += 10000;
|
||||
else if (next_report_ < 500000) next_report_ += 50000;
|
||||
else next_report_ += 100000;
|
||||
if (next_report_ < 1000)
|
||||
next_report_ += 100;
|
||||
else if (next_report_ < 5000)
|
||||
next_report_ += 500;
|
||||
else if (next_report_ < 10000)
|
||||
next_report_ += 1000;
|
||||
else if (next_report_ < 50000)
|
||||
next_report_ += 5000;
|
||||
else if (next_report_ < 100000)
|
||||
next_report_ += 10000;
|
||||
else if (next_report_ < 500000)
|
||||
next_report_ += 50000;
|
||||
else
|
||||
next_report_ += 100000;
|
||||
fprintf(stderr, "... finished %d ops%30s\r", done_, "");
|
||||
fflush(stderr);
|
||||
}
|
||||
@ -261,16 +266,14 @@ class Benchmark {
|
||||
snprintf(rate, sizeof(rate), "%6.1f MB/s",
|
||||
(bytes_ / 1048576.0) / (finish - start_));
|
||||
if (!message_.empty()) {
|
||||
message_ = std::string(rate) + " " + message_;
|
||||
message_ = std::string(rate) + " " + message_;
|
||||
} else {
|
||||
message_ = rate;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
|
||||
name.ToString().c_str(),
|
||||
(finish - start_) * 1e6 / done_,
|
||||
(message_.empty() ? "" : " "),
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(),
|
||||
(finish - start_) * 1e6 / done_, (message_.empty() ? "" : " "),
|
||||
message_.c_str());
|
||||
if (FLAGS_histogram) {
|
||||
fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
|
||||
@ -279,21 +282,15 @@ class Benchmark {
|
||||
}
|
||||
|
||||
public:
|
||||
enum Order {
|
||||
SEQUENTIAL,
|
||||
RANDOM
|
||||
};
|
||||
enum DBState {
|
||||
FRESH,
|
||||
EXISTING
|
||||
};
|
||||
enum Order { SEQUENTIAL, RANDOM };
|
||||
enum DBState { FRESH, EXISTING };
|
||||
|
||||
Benchmark()
|
||||
: db_(NULL),
|
||||
num_(FLAGS_num),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
bytes_(0),
|
||||
rand_(301) {
|
||||
: db_(nullptr),
|
||||
num_(FLAGS_num),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
bytes_(0),
|
||||
rand_(301) {
|
||||
std::vector<std::string> files;
|
||||
std::string test_dir;
|
||||
Env::Default()->GetTestDirectory(&test_dir);
|
||||
@ -321,12 +318,12 @@ class Benchmark {
|
||||
Open(false);
|
||||
|
||||
const char* benchmarks = FLAGS_benchmarks;
|
||||
while (benchmarks != NULL) {
|
||||
while (benchmarks != nullptr) {
|
||||
const char* sep = strchr(benchmarks, ',');
|
||||
Slice name;
|
||||
if (sep == NULL) {
|
||||
if (sep == nullptr) {
|
||||
name = benchmarks;
|
||||
benchmarks = NULL;
|
||||
benchmarks = nullptr;
|
||||
} else {
|
||||
name = Slice(benchmarks, sep - benchmarks);
|
||||
benchmarks = sep + 1;
|
||||
@ -386,8 +383,8 @@ class Benchmark {
|
||||
}
|
||||
|
||||
private:
|
||||
void Open(bool sync) {
|
||||
assert(db_ == NULL);
|
||||
void Open(bool sync) {
|
||||
assert(db_ == nullptr);
|
||||
|
||||
// Initialize db_
|
||||
db_ = new kyotocabinet::TreeDB();
|
||||
@ -395,16 +392,14 @@ class Benchmark {
|
||||
db_num_++;
|
||||
std::string test_dir;
|
||||
Env::Default()->GetTestDirectory(&test_dir);
|
||||
snprintf(file_name, sizeof(file_name),
|
||||
"%s/dbbench_polyDB-%d.kct",
|
||||
test_dir.c_str(),
|
||||
db_num_);
|
||||
snprintf(file_name, sizeof(file_name), "%s/dbbench_polyDB-%d.kct",
|
||||
test_dir.c_str(), db_num_);
|
||||
|
||||
// Create tuning options and open the database
|
||||
int open_options = kyotocabinet::PolyDB::OWRITER |
|
||||
kyotocabinet::PolyDB::OCREATE;
|
||||
int tune_options = kyotocabinet::TreeDB::TSMALL |
|
||||
kyotocabinet::TreeDB::TLINEAR;
|
||||
int open_options =
|
||||
kyotocabinet::PolyDB::OWRITER | kyotocabinet::PolyDB::OCREATE;
|
||||
int tune_options =
|
||||
kyotocabinet::TreeDB::TSMALL | kyotocabinet::TreeDB::TLINEAR;
|
||||
if (FLAGS_compression) {
|
||||
tune_options |= kyotocabinet::TreeDB::TCOMPRESS;
|
||||
db_->tune_compressor(&comp_);
|
||||
@ -412,7 +407,7 @@ class Benchmark {
|
||||
db_->tune_options(tune_options);
|
||||
db_->tune_page_cache(FLAGS_cache_size);
|
||||
db_->tune_page(FLAGS_page_size);
|
||||
db_->tune_map(256LL<<20);
|
||||
db_->tune_map(256LL << 20);
|
||||
if (sync) {
|
||||
open_options |= kyotocabinet::PolyDB::OAUTOSYNC;
|
||||
}
|
||||
@ -421,8 +416,8 @@ class Benchmark {
|
||||
}
|
||||
}
|
||||
|
||||
void Write(bool sync, Order order, DBState state,
|
||||
int num_entries, int value_size, int entries_per_batch) {
|
||||
void Write(bool sync, Order order, DBState state, int num_entries,
|
||||
int value_size, int entries_per_batch) {
|
||||
// Create new database if state == FRESH
|
||||
if (state == FRESH) {
|
||||
if (FLAGS_use_existing_db) {
|
||||
@ -430,7 +425,7 @@ class Benchmark {
|
||||
return;
|
||||
}
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
Open(sync);
|
||||
Start(); // Do not count time taken to destroy/open
|
||||
}
|
||||
@ -442,8 +437,7 @@ class Benchmark {
|
||||
}
|
||||
|
||||
// Write to database
|
||||
for (int i = 0; i < num_entries; i++)
|
||||
{
|
||||
for (int i = 0; i < num_entries; i++) {
|
||||
const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
@ -516,10 +510,10 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
// Choose a location for the test database if none given with --db=<path>
|
||||
if (FLAGS_db == NULL) {
|
||||
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
if (FLAGS_db == nullptr) {
|
||||
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
}
|
||||
|
||||
leveldb::Benchmark benchmark;
|
@ -1,228 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Detects OS we're compiling on and outputs a file specified by the first
|
||||
# argument, which in turn gets read while processing Makefile.
|
||||
#
|
||||
# The output will set the following variables:
|
||||
# CC C Compiler path
|
||||
# CXX C++ Compiler path
|
||||
# PLATFORM_LDFLAGS Linker flags
|
||||
# PLATFORM_LIBS Libraries flags
|
||||
# PLATFORM_SHARED_EXT Extension for shared libraries
|
||||
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
|
||||
# This flag is embedded just before the name
|
||||
# of the shared library without intervening spaces
|
||||
# PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library
|
||||
# PLATFORM_CCFLAGS C compiler flags
|
||||
# PLATFORM_CXXFLAGS C++ compiler flags. Will contain:
|
||||
# PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned
|
||||
# shared libraries, empty otherwise.
|
||||
#
|
||||
# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following:
|
||||
#
|
||||
# -DLEVELDB_ATOMIC_PRESENT if <atomic> is present
|
||||
# -DLEVELDB_PLATFORM_POSIX for Posix-based platforms
|
||||
# -DSNAPPY if the Snappy library is present
|
||||
#
|
||||
|
||||
OUTPUT=$1
|
||||
PREFIX=$2
|
||||
if test -z "$OUTPUT" || test -z "$PREFIX"; then
|
||||
echo "usage: $0 <output-filename> <directory_prefix>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Delete existing output, if it exists
|
||||
rm -f $OUTPUT
|
||||
touch $OUTPUT
|
||||
|
||||
if test -z "$CC"; then
|
||||
CC=cc
|
||||
fi
|
||||
|
||||
if test -z "$CXX"; then
|
||||
CXX=g++
|
||||
fi
|
||||
|
||||
if test -z "$TMPDIR"; then
|
||||
TMPDIR=/tmp
|
||||
fi
|
||||
|
||||
# Detect OS
|
||||
if test -z "$TARGET_OS"; then
|
||||
TARGET_OS=`uname -s`
|
||||
fi
|
||||
|
||||
COMMON_FLAGS=
|
||||
CROSS_COMPILE=
|
||||
PLATFORM_CCFLAGS=
|
||||
PLATFORM_CXXFLAGS=
|
||||
PLATFORM_LDFLAGS=
|
||||
PLATFORM_LIBS=
|
||||
PLATFORM_SHARED_EXT="so"
|
||||
PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,"
|
||||
PLATFORM_SHARED_CFLAGS="-fPIC"
|
||||
PLATFORM_SHARED_VERSIONED=true
|
||||
|
||||
MEMCMP_FLAG=
|
||||
if [ "$CXX" = "g++" ]; then
|
||||
# Use libc's memcmp instead of GCC's memcmp. This results in ~40%
|
||||
# performance improvement on readrandom under gcc 4.4.3 on Linux/x86.
|
||||
MEMCMP_FLAG="-fno-builtin-memcmp"
|
||||
fi
|
||||
|
||||
case "$TARGET_OS" in
|
||||
CYGWIN_*)
|
||||
PLATFORM=OS_LINUX
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -lpthread -DOS_LINUX -DCYGWIN"
|
||||
PLATFORM_LDFLAGS="-lpthread"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
Darwin)
|
||||
PLATFORM=OS_MACOSX
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
|
||||
PLATFORM_SHARED_EXT=dylib
|
||||
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
|
||||
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name $INSTALL_PATH/"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
Linux)
|
||||
PLATFORM=OS_LINUX
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX"
|
||||
PLATFORM_LDFLAGS="-pthread"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
SunOS)
|
||||
PLATFORM=OS_SOLARIS
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS"
|
||||
PLATFORM_LIBS="-lpthread -lrt"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
FreeBSD)
|
||||
PLATFORM=OS_FREEBSD
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD"
|
||||
PLATFORM_LIBS="-lpthread"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
NetBSD)
|
||||
PLATFORM=OS_NETBSD
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD"
|
||||
PLATFORM_LIBS="-lpthread -lgcc_s"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
OpenBSD)
|
||||
PLATFORM=OS_OPENBSD
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD"
|
||||
PLATFORM_LDFLAGS="-pthread"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
DragonFly)
|
||||
PLATFORM=OS_DRAGONFLYBSD
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
|
||||
PLATFORM_LIBS="-lpthread"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
;;
|
||||
OS_ANDROID_CROSSCOMPILE)
|
||||
PLATFORM=OS_ANDROID
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
|
||||
PLATFORM_LDFLAGS="" # All pthread features are in the Android C library
|
||||
PORT_FILE=port/port_posix.cc
|
||||
CROSS_COMPILE=true
|
||||
;;
|
||||
HP-UX)
|
||||
PLATFORM=OS_HPUX
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX"
|
||||
PLATFORM_LDFLAGS="-pthread"
|
||||
PORT_FILE=port/port_posix.cc
|
||||
# man ld: +h internal_name
|
||||
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
|
||||
;;
|
||||
IOS)
|
||||
PLATFORM=IOS
|
||||
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
|
||||
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
|
||||
PORT_FILE=port/port_posix.cc
|
||||
PLATFORM_SHARED_EXT=
|
||||
PLATFORM_SHARED_LDFLAGS=
|
||||
PLATFORM_SHARED_CFLAGS=
|
||||
PLATFORM_SHARED_VERSIONED=
|
||||
;;
|
||||
*)
|
||||
echo "Unknown platform!" >&2
|
||||
exit 1
|
||||
esac
|
||||
|
||||
# We want to make a list of all cc files within util, db, table, and helpers
|
||||
# except for the test and benchmark files. By default, find will output a list
|
||||
# of all files matching either rule, so we need to append -print to make the
|
||||
# prune take effect.
|
||||
DIRS="$PREFIX/db $PREFIX/util $PREFIX/table"
|
||||
|
||||
set -f # temporarily disable globbing so that our patterns aren't expanded
|
||||
PRUNE_TEST="-name *test*.cc -prune"
|
||||
PRUNE_BENCH="-name *_bench.cc -prune"
|
||||
PRUNE_TOOL="-name leveldb_main.cc -prune"
|
||||
PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o $PRUNE_TOOL -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "`
|
||||
|
||||
set +f # re-enable globbing
|
||||
|
||||
# The sources consist of the portable files, plus the platform-specific port
|
||||
# file.
|
||||
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
|
||||
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT
|
||||
|
||||
if [ "$CROSS_COMPILE" = "true" ]; then
|
||||
# Cross-compiling; do not try any compilation tests.
|
||||
true
|
||||
else
|
||||
CXXOUTPUT="${TMPDIR}/leveldb_build_detect_platform-cxx.$$"
|
||||
|
||||
# If -std=c++0x works, use <atomic> as fallback for when memory barriers
|
||||
# are not available.
|
||||
$CXX $CXXFLAGS -std=c++0x -x c++ - -o $CXXOUTPUT 2>/dev/null <<EOF
|
||||
#include <atomic>
|
||||
int main() {}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX -DLEVELDB_ATOMIC_PRESENT"
|
||||
PLATFORM_CXXFLAGS="-std=c++0x"
|
||||
else
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX"
|
||||
fi
|
||||
|
||||
# Test whether Snappy library is installed
|
||||
# http://code.google.com/p/snappy/
|
||||
$CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT 2>/dev/null <<EOF
|
||||
#include <snappy.h>
|
||||
int main() {}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY"
|
||||
PLATFORM_LIBS="$PLATFORM_LIBS -lsnappy"
|
||||
fi
|
||||
|
||||
# Test whether tcmalloc is available
|
||||
$CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT -ltcmalloc 2>/dev/null <<EOF
|
||||
int main() {}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
PLATFORM_LIBS="$PLATFORM_LIBS -ltcmalloc"
|
||||
fi
|
||||
|
||||
rm -f $CXXOUTPUT 2>/dev/null
|
||||
fi
|
||||
|
||||
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
|
||||
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
|
||||
|
||||
echo "CC=$CC" >> $OUTPUT
|
||||
echo "CXX=$CXX" >> $OUTPUT
|
||||
echo "PLATFORM=$PLATFORM" >> $OUTPUT
|
||||
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_LIBS=$PLATFORM_LIBS" >> $OUTPUT
|
||||
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
|
||||
echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> $OUTPUT
|
1
cmake/leveldbConfig.cmake
Normal file
1
cmake/leveldbConfig.cmake
Normal file
@ -0,0 +1 @@
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/leveldbTargets.cmake")
|
@ -2,9 +2,9 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "leveldb/db.h"
|
||||
#include "db/db_impl.h"
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
@ -12,11 +12,6 @@ namespace leveldb {
|
||||
|
||||
class AutoCompactTest {
|
||||
public:
|
||||
std::string dbname_;
|
||||
Cache* tiny_cache_;
|
||||
Options options_;
|
||||
DB* db_;
|
||||
|
||||
AutoCompactTest() {
|
||||
dbname_ = test::TmpDir() + "/autocompact_test";
|
||||
tiny_cache_ = NewLRUCache(100);
|
||||
@ -47,6 +42,12 @@ class AutoCompactTest {
|
||||
}
|
||||
|
||||
void DoReads(int n);
|
||||
|
||||
private:
|
||||
std::string dbname_;
|
||||
Cache* tiny_cache_;
|
||||
Options options_;
|
||||
DB* db_;
|
||||
};
|
||||
|
||||
static const int kValueSize = 200 * 1024;
|
||||
@ -81,17 +82,16 @@ void AutoCompactTest::DoReads(int n) {
|
||||
ASSERT_LT(read, 100) << "Taking too long to compact";
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
for (iter->SeekToFirst();
|
||||
iter->Valid() && iter->key().ToString() < limit_key;
|
||||
iter->Next()) {
|
||||
iter->Valid() && iter->key().ToString() < limit_key; iter->Next()) {
|
||||
// Drop data
|
||||
}
|
||||
delete iter;
|
||||
// Wait a little bit to allow any triggered compactions to complete.
|
||||
Env::Default()->SleepForMicroseconds(1000000);
|
||||
uint64_t size = Size(Key(0), Key(n));
|
||||
fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n",
|
||||
read+1, size/1048576.0, Size(Key(n), Key(kCount))/1048576.0);
|
||||
if (size <= initial_size/10) {
|
||||
fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n", read + 1,
|
||||
size / 1048576.0, Size(Key(n), Key(kCount)) / 1048576.0);
|
||||
if (size <= initial_size / 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -100,19 +100,13 @@ void AutoCompactTest::DoReads(int n) {
|
||||
// is pretty much unchanged.
|
||||
const int64_t final_other_size = Size(Key(n), Key(kCount));
|
||||
ASSERT_LE(final_other_size, initial_other_size + 1048576);
|
||||
ASSERT_GE(final_other_size, initial_other_size/5 - 1048576);
|
||||
ASSERT_GE(final_other_size, initial_other_size / 5 - 1048576);
|
||||
}
|
||||
|
||||
TEST(AutoCompactTest, ReadAll) {
|
||||
DoReads(kCount);
|
||||
}
|
||||
TEST(AutoCompactTest, ReadAll) { DoReads(kCount); }
|
||||
|
||||
TEST(AutoCompactTest, ReadHalf) {
|
||||
DoReads(kCount/2);
|
||||
}
|
||||
TEST(AutoCompactTest, ReadHalf) { DoReads(kCount / 2); }
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
#include "db/builder.h"
|
||||
|
||||
#include "db/filename.h"
|
||||
#include "db/dbformat.h"
|
||||
#include "db/filename.h"
|
||||
#include "db/table_cache.h"
|
||||
#include "db/version_edit.h"
|
||||
#include "leveldb/db.h"
|
||||
@ -14,12 +14,8 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
Status BuildTable(const std::string& dbname,
|
||||
Env* env,
|
||||
const Options& options,
|
||||
TableCache* table_cache,
|
||||
Iterator* iter,
|
||||
FileMetaData* meta) {
|
||||
Status BuildTable(const std::string& dbname, Env* env, const Options& options,
|
||||
TableCache* table_cache, Iterator* iter, FileMetaData* meta) {
|
||||
Status s;
|
||||
meta->file_size = 0;
|
||||
iter->SeekToFirst();
|
||||
@ -41,14 +37,10 @@ Status BuildTable(const std::string& dbname,
|
||||
}
|
||||
|
||||
// Finish and check for builder errors
|
||||
s = builder->Finish();
|
||||
if (s.ok()) {
|
||||
s = builder->Finish();
|
||||
if (s.ok()) {
|
||||
meta->file_size = builder->FileSize();
|
||||
assert(meta->file_size > 0);
|
||||
}
|
||||
} else {
|
||||
builder->Abandon();
|
||||
meta->file_size = builder->FileSize();
|
||||
assert(meta->file_size > 0);
|
||||
}
|
||||
delete builder;
|
||||
|
||||
@ -60,12 +52,11 @@ Status BuildTable(const std::string& dbname,
|
||||
s = file->Close();
|
||||
}
|
||||
delete file;
|
||||
file = NULL;
|
||||
file = nullptr;
|
||||
|
||||
if (s.ok()) {
|
||||
// Verify that the table is usable
|
||||
Iterator* it = table_cache->NewIterator(ReadOptions(),
|
||||
meta->number,
|
||||
Iterator* it = table_cache->NewIterator(ReadOptions(), meta->number,
|
||||
meta->file_size);
|
||||
s = it->status();
|
||||
delete it;
|
||||
|
@ -22,12 +22,8 @@ class VersionEdit;
|
||||
// *meta will be filled with metadata about the generated table.
|
||||
// If no data is present in *iter, meta->file_size will be set to
|
||||
// zero, and no Table file will be produced.
|
||||
extern Status BuildTable(const std::string& dbname,
|
||||
Env* env,
|
||||
const Options& options,
|
||||
TableCache* table_cache,
|
||||
Iterator* iter,
|
||||
FileMetaData* meta);
|
||||
Status BuildTable(const std::string& dbname, Env* env, const Options& options,
|
||||
TableCache* table_cache, Iterator* iter, FileMetaData* meta);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
391
db/c.cc
391
db/c.cc
@ -5,7 +5,7 @@
|
||||
#include "leveldb/c.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/db.h"
|
||||
@ -43,69 +43,72 @@ using leveldb::WriteOptions;
|
||||
|
||||
extern "C" {
|
||||
|
||||
struct leveldb_t { DB* rep; };
|
||||
struct leveldb_iterator_t { Iterator* rep; };
|
||||
struct leveldb_writebatch_t { WriteBatch rep; };
|
||||
struct leveldb_snapshot_t { const Snapshot* rep; };
|
||||
struct leveldb_readoptions_t { ReadOptions rep; };
|
||||
struct leveldb_writeoptions_t { WriteOptions rep; };
|
||||
struct leveldb_options_t { Options rep; };
|
||||
struct leveldb_cache_t { Cache* rep; };
|
||||
struct leveldb_seqfile_t { SequentialFile* rep; };
|
||||
struct leveldb_randomfile_t { RandomAccessFile* rep; };
|
||||
struct leveldb_writablefile_t { WritableFile* rep; };
|
||||
struct leveldb_logger_t { Logger* rep; };
|
||||
struct leveldb_filelock_t { FileLock* rep; };
|
||||
struct leveldb_t {
|
||||
DB* rep;
|
||||
};
|
||||
struct leveldb_iterator_t {
|
||||
Iterator* rep;
|
||||
};
|
||||
struct leveldb_writebatch_t {
|
||||
WriteBatch rep;
|
||||
};
|
||||
struct leveldb_snapshot_t {
|
||||
const Snapshot* rep;
|
||||
};
|
||||
struct leveldb_readoptions_t {
|
||||
ReadOptions rep;
|
||||
};
|
||||
struct leveldb_writeoptions_t {
|
||||
WriteOptions rep;
|
||||
};
|
||||
struct leveldb_options_t {
|
||||
Options rep;
|
||||
};
|
||||
struct leveldb_cache_t {
|
||||
Cache* rep;
|
||||
};
|
||||
struct leveldb_seqfile_t {
|
||||
SequentialFile* rep;
|
||||
};
|
||||
struct leveldb_randomfile_t {
|
||||
RandomAccessFile* rep;
|
||||
};
|
||||
struct leveldb_writablefile_t {
|
||||
WritableFile* rep;
|
||||
};
|
||||
struct leveldb_logger_t {
|
||||
Logger* rep;
|
||||
};
|
||||
struct leveldb_filelock_t {
|
||||
FileLock* rep;
|
||||
};
|
||||
|
||||
struct leveldb_comparator_t : public Comparator {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
int (*compare_)(
|
||||
void*,
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen);
|
||||
const char* (*name_)(void*);
|
||||
~leveldb_comparator_t() override { (*destructor_)(state_); }
|
||||
|
||||
virtual ~leveldb_comparator_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
|
||||
virtual int Compare(const Slice& a, const Slice& b) const {
|
||||
int Compare(const Slice& a, const Slice& b) const override {
|
||||
return (*compare_)(state_, a.data(), a.size(), b.data(), b.size());
|
||||
}
|
||||
|
||||
virtual const char* Name() const {
|
||||
return (*name_)(state_);
|
||||
}
|
||||
const char* Name() const override { return (*name_)(state_); }
|
||||
|
||||
// No-ops since the C binding does not support key shortening methods.
|
||||
virtual void FindShortestSeparator(std::string*, const Slice&) const { }
|
||||
virtual void FindShortSuccessor(std::string* key) const { }
|
||||
void FindShortestSeparator(std::string*, const Slice&) const override {}
|
||||
void FindShortSuccessor(std::string* key) const override {}
|
||||
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
int (*compare_)(void*, const char* a, size_t alen, const char* b,
|
||||
size_t blen);
|
||||
const char* (*name_)(void*);
|
||||
};
|
||||
|
||||
struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
const char* (*name_)(void*);
|
||||
char* (*create_)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length);
|
||||
unsigned char (*key_match_)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length);
|
||||
~leveldb_filterpolicy_t() override { (*destructor_)(state_); }
|
||||
|
||||
virtual ~leveldb_filterpolicy_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
const char* Name() const override { return (*name_)(state_); }
|
||||
|
||||
virtual const char* Name() const {
|
||||
return (*name_)(state_);
|
||||
}
|
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
|
||||
std::vector<const char*> key_pointers(n);
|
||||
std::vector<size_t> key_sizes(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -118,10 +121,19 @@ struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
free(filter);
|
||||
}
|
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
return (*key_match_)(state_, key.data(), key.size(),
|
||||
filter.data(), filter.size());
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
|
||||
return (*key_match_)(state_, key.data(), key.size(), filter.data(),
|
||||
filter.size());
|
||||
}
|
||||
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
const char* (*name_)(void*);
|
||||
char* (*create_)(void*, const char* const* key_array,
|
||||
const size_t* key_length_array, int num_keys,
|
||||
size_t* filter_length);
|
||||
unsigned char (*key_match_)(void*, const char* key, size_t length,
|
||||
const char* filter, size_t filter_length);
|
||||
};
|
||||
|
||||
struct leveldb_env_t {
|
||||
@ -130,10 +142,10 @@ struct leveldb_env_t {
|
||||
};
|
||||
|
||||
static bool SaveError(char** errptr, const Status& s) {
|
||||
assert(errptr != NULL);
|
||||
assert(errptr != nullptr);
|
||||
if (s.ok()) {
|
||||
return false;
|
||||
} else if (*errptr == NULL) {
|
||||
} else if (*errptr == nullptr) {
|
||||
*errptr = strdup(s.ToString().c_str());
|
||||
} else {
|
||||
// TODO(sanjay): Merge with existing error?
|
||||
@ -149,13 +161,11 @@ static char* CopyString(const std::string& str) {
|
||||
return result;
|
||||
}
|
||||
|
||||
leveldb_t* leveldb_open(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
leveldb_t* leveldb_open(const leveldb_options_t* options, const char* name,
|
||||
char** errptr) {
|
||||
DB* db;
|
||||
if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) {
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
leveldb_t* result = new leveldb_t;
|
||||
result->rep = db;
|
||||
@ -167,40 +177,27 @@ void leveldb_close(leveldb_t* db) {
|
||||
delete db;
|
||||
}
|
||||
|
||||
void leveldb_put(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
const char* val, size_t vallen,
|
||||
char** errptr) {
|
||||
void leveldb_put(leveldb_t* db, const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen, const char* val, size_t vallen,
|
||||
char** errptr) {
|
||||
SaveError(errptr,
|
||||
db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen)));
|
||||
}
|
||||
|
||||
void leveldb_delete(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
char** errptr) {
|
||||
void leveldb_delete(leveldb_t* db, const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen, char** errptr) {
|
||||
SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen)));
|
||||
}
|
||||
|
||||
|
||||
void leveldb_write(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch,
|
||||
char** errptr) {
|
||||
void leveldb_write(leveldb_t* db, const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch, char** errptr) {
|
||||
SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
|
||||
}
|
||||
|
||||
char* leveldb_get(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
size_t* vallen,
|
||||
char** errptr) {
|
||||
char* result = NULL;
|
||||
char* leveldb_get(leveldb_t* db, const leveldb_readoptions_t* options,
|
||||
const char* key, size_t keylen, size_t* vallen,
|
||||
char** errptr) {
|
||||
char* result = nullptr;
|
||||
std::string tmp;
|
||||
Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp);
|
||||
if (s.ok()) {
|
||||
@ -216,45 +213,40 @@ char* leveldb_get(
|
||||
}
|
||||
|
||||
leveldb_iterator_t* leveldb_create_iterator(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options) {
|
||||
leveldb_t* db, const leveldb_readoptions_t* options) {
|
||||
leveldb_iterator_t* result = new leveldb_iterator_t;
|
||||
result->rep = db->rep->NewIterator(options->rep);
|
||||
return result;
|
||||
}
|
||||
|
||||
const leveldb_snapshot_t* leveldb_create_snapshot(
|
||||
leveldb_t* db) {
|
||||
const leveldb_snapshot_t* leveldb_create_snapshot(leveldb_t* db) {
|
||||
leveldb_snapshot_t* result = new leveldb_snapshot_t;
|
||||
result->rep = db->rep->GetSnapshot();
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_release_snapshot(
|
||||
leveldb_t* db,
|
||||
const leveldb_snapshot_t* snapshot) {
|
||||
void leveldb_release_snapshot(leveldb_t* db,
|
||||
const leveldb_snapshot_t* snapshot) {
|
||||
db->rep->ReleaseSnapshot(snapshot->rep);
|
||||
delete snapshot;
|
||||
}
|
||||
|
||||
char* leveldb_property_value(
|
||||
leveldb_t* db,
|
||||
const char* propname) {
|
||||
char* leveldb_property_value(leveldb_t* db, const char* propname) {
|
||||
std::string tmp;
|
||||
if (db->rep->GetProperty(Slice(propname), &tmp)) {
|
||||
// We use strdup() since we expect human readable output.
|
||||
return strdup(tmp.c_str());
|
||||
} else {
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void leveldb_approximate_sizes(
|
||||
leveldb_t* db,
|
||||
int num_ranges,
|
||||
const char* const* range_start_key, const size_t* range_start_key_len,
|
||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||
uint64_t* sizes) {
|
||||
void leveldb_approximate_sizes(leveldb_t* db, int num_ranges,
|
||||
const char* const* range_start_key,
|
||||
const size_t* range_start_key_len,
|
||||
const char* const* range_limit_key,
|
||||
const size_t* range_limit_key_len,
|
||||
uint64_t* sizes) {
|
||||
Range* ranges = new Range[num_ranges];
|
||||
for (int i = 0; i < num_ranges; i++) {
|
||||
ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]);
|
||||
@ -264,28 +256,23 @@ void leveldb_approximate_sizes(
|
||||
delete[] ranges;
|
||||
}
|
||||
|
||||
void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len) {
|
||||
void leveldb_compact_range(leveldb_t* db, const char* start_key,
|
||||
size_t start_key_len, const char* limit_key,
|
||||
size_t limit_key_len) {
|
||||
Slice a, b;
|
||||
db->rep->CompactRange(
|
||||
// Pass NULL Slice if corresponding "const char*" is NULL
|
||||
(start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
|
||||
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
|
||||
// Pass null Slice if corresponding "const char*" is null
|
||||
(start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
|
||||
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
|
||||
}
|
||||
|
||||
void leveldb_destroy_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
void leveldb_destroy_db(const leveldb_options_t* options, const char* name,
|
||||
char** errptr) {
|
||||
SaveError(errptr, DestroyDB(name, options->rep));
|
||||
}
|
||||
|
||||
void leveldb_repair_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
void leveldb_repair_db(const leveldb_options_t* options, const char* name,
|
||||
char** errptr) {
|
||||
SaveError(errptr, RepairDB(name, options->rep));
|
||||
}
|
||||
|
||||
@ -310,13 +297,9 @@ void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) {
|
||||
iter->rep->Seek(Slice(k, klen));
|
||||
}
|
||||
|
||||
void leveldb_iter_next(leveldb_iterator_t* iter) {
|
||||
iter->rep->Next();
|
||||
}
|
||||
void leveldb_iter_next(leveldb_iterator_t* iter) { iter->rep->Next(); }
|
||||
|
||||
void leveldb_iter_prev(leveldb_iterator_t* iter) {
|
||||
iter->rep->Prev();
|
||||
}
|
||||
void leveldb_iter_prev(leveldb_iterator_t* iter) { iter->rep->Prev(); }
|
||||
|
||||
const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) {
|
||||
Slice s = iter->rep->key();
|
||||
@ -338,41 +321,34 @@ leveldb_writebatch_t* leveldb_writebatch_create() {
|
||||
return new leveldb_writebatch_t;
|
||||
}
|
||||
|
||||
void leveldb_writebatch_destroy(leveldb_writebatch_t* b) {
|
||||
delete b;
|
||||
}
|
||||
void leveldb_writebatch_destroy(leveldb_writebatch_t* b) { delete b; }
|
||||
|
||||
void leveldb_writebatch_clear(leveldb_writebatch_t* b) {
|
||||
b->rep.Clear();
|
||||
}
|
||||
void leveldb_writebatch_clear(leveldb_writebatch_t* b) { b->rep.Clear(); }
|
||||
|
||||
void leveldb_writebatch_put(
|
||||
leveldb_writebatch_t* b,
|
||||
const char* key, size_t klen,
|
||||
const char* val, size_t vlen) {
|
||||
void leveldb_writebatch_put(leveldb_writebatch_t* b, const char* key,
|
||||
size_t klen, const char* val, size_t vlen) {
|
||||
b->rep.Put(Slice(key, klen), Slice(val, vlen));
|
||||
}
|
||||
|
||||
void leveldb_writebatch_delete(
|
||||
leveldb_writebatch_t* b,
|
||||
const char* key, size_t klen) {
|
||||
void leveldb_writebatch_delete(leveldb_writebatch_t* b, const char* key,
|
||||
size_t klen) {
|
||||
b->rep.Delete(Slice(key, klen));
|
||||
}
|
||||
|
||||
void leveldb_writebatch_iterate(
|
||||
leveldb_writebatch_t* b,
|
||||
void* state,
|
||||
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
|
||||
void (*deleted)(void*, const char* k, size_t klen)) {
|
||||
void leveldb_writebatch_iterate(const leveldb_writebatch_t* b, void* state,
|
||||
void (*put)(void*, const char* k, size_t klen,
|
||||
const char* v, size_t vlen),
|
||||
void (*deleted)(void*, const char* k,
|
||||
size_t klen)) {
|
||||
class H : public WriteBatch::Handler {
|
||||
public:
|
||||
void* state_;
|
||||
void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen);
|
||||
void (*deleted_)(void*, const char* k, size_t klen);
|
||||
virtual void Put(const Slice& key, const Slice& value) {
|
||||
void Put(const Slice& key, const Slice& value) override {
|
||||
(*put_)(state_, key.data(), key.size(), value.data(), value.size());
|
||||
}
|
||||
virtual void Delete(const Slice& key) {
|
||||
void Delete(const Slice& key) override {
|
||||
(*deleted_)(state_, key.data(), key.size());
|
||||
}
|
||||
};
|
||||
@ -383,47 +359,46 @@ void leveldb_writebatch_iterate(
|
||||
b->rep.Iterate(&handler);
|
||||
}
|
||||
|
||||
leveldb_options_t* leveldb_options_create() {
|
||||
return new leveldb_options_t;
|
||||
void leveldb_writebatch_append(leveldb_writebatch_t* destination,
|
||||
const leveldb_writebatch_t* source) {
|
||||
destination->rep.Append(source->rep);
|
||||
}
|
||||
|
||||
void leveldb_options_destroy(leveldb_options_t* options) {
|
||||
delete options;
|
||||
}
|
||||
leveldb_options_t* leveldb_options_create() { return new leveldb_options_t; }
|
||||
|
||||
void leveldb_options_set_comparator(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_comparator_t* cmp) {
|
||||
void leveldb_options_destroy(leveldb_options_t* options) { delete options; }
|
||||
|
||||
void leveldb_options_set_comparator(leveldb_options_t* opt,
|
||||
leveldb_comparator_t* cmp) {
|
||||
opt->rep.comparator = cmp;
|
||||
}
|
||||
|
||||
void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_filterpolicy_t* policy) {
|
||||
void leveldb_options_set_filter_policy(leveldb_options_t* opt,
|
||||
leveldb_filterpolicy_t* policy) {
|
||||
opt->rep.filter_policy = policy;
|
||||
}
|
||||
|
||||
void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
void leveldb_options_set_create_if_missing(leveldb_options_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.create_if_missing = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_error_if_exists(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
void leveldb_options_set_error_if_exists(leveldb_options_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.error_if_exists = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_paranoid_checks(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
void leveldb_options_set_paranoid_checks(leveldb_options_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.paranoid_checks = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) {
|
||||
opt->rep.env = (env ? env->rep : NULL);
|
||||
opt->rep.env = (env ? env->rep : nullptr);
|
||||
}
|
||||
|
||||
void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) {
|
||||
opt->rep.info_log = (l ? l->rep : NULL);
|
||||
opt->rep.info_log = (l ? l->rep : nullptr);
|
||||
}
|
||||
|
||||
void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) {
|
||||
@ -446,17 +421,18 @@ void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) {
|
||||
opt->rep.block_restart_interval = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_file_size(leveldb_options_t* opt, size_t s) {
|
||||
opt->rep.max_file_size = s;
|
||||
}
|
||||
|
||||
void leveldb_options_set_compression(leveldb_options_t* opt, int t) {
|
||||
opt->rep.compression = static_cast<CompressionType>(t);
|
||||
}
|
||||
|
||||
leveldb_comparator_t* leveldb_comparator_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
int (*compare)(
|
||||
void*,
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen),
|
||||
void* state, void (*destructor)(void*),
|
||||
int (*compare)(void*, const char* a, size_t alen, const char* b,
|
||||
size_t blen),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_comparator_t* result = new leveldb_comparator_t;
|
||||
result->state_ = state;
|
||||
@ -466,22 +442,15 @@ leveldb_comparator_t* leveldb_comparator_create(
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
|
||||
delete cmp;
|
||||
}
|
||||
void leveldb_comparator_destroy(leveldb_comparator_t* cmp) { delete cmp; }
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
void* state, void (*destructor)(void*),
|
||||
char* (*create_filter)(void*, const char* const* key_array,
|
||||
const size_t* key_length_array, int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(void*, const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
|
||||
result->state_ = state;
|
||||
@ -501,7 +470,8 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
// they delegate to a NewBloomFilterPolicy() instead of user
|
||||
// supplied C functions.
|
||||
struct Wrapper : public leveldb_filterpolicy_t {
|
||||
const FilterPolicy* rep_;
|
||||
static void DoNothing(void*) {}
|
||||
|
||||
~Wrapper() { delete rep_; }
|
||||
const char* Name() const { return rep_->Name(); }
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
@ -510,11 +480,12 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
return rep_->KeyMayMatch(key, filter);
|
||||
}
|
||||
static void DoNothing(void*) { }
|
||||
|
||||
const FilterPolicy* rep_;
|
||||
};
|
||||
Wrapper* wrapper = new Wrapper;
|
||||
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
|
||||
wrapper->state_ = NULL;
|
||||
wrapper->state_ = nullptr;
|
||||
wrapper->destructor_ = &Wrapper::DoNothing;
|
||||
return wrapper;
|
||||
}
|
||||
@ -523,37 +494,31 @@ leveldb_readoptions_t* leveldb_readoptions_create() {
|
||||
return new leveldb_readoptions_t;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) {
|
||||
delete opt;
|
||||
}
|
||||
void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) { delete opt; }
|
||||
|
||||
void leveldb_readoptions_set_verify_checksums(
|
||||
leveldb_readoptions_t* opt,
|
||||
unsigned char v) {
|
||||
void leveldb_readoptions_set_verify_checksums(leveldb_readoptions_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.verify_checksums = v;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_fill_cache(
|
||||
leveldb_readoptions_t* opt, unsigned char v) {
|
||||
void leveldb_readoptions_set_fill_cache(leveldb_readoptions_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.fill_cache = v;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_snapshot(
|
||||
leveldb_readoptions_t* opt,
|
||||
const leveldb_snapshot_t* snap) {
|
||||
opt->rep.snapshot = (snap ? snap->rep : NULL);
|
||||
void leveldb_readoptions_set_snapshot(leveldb_readoptions_t* opt,
|
||||
const leveldb_snapshot_t* snap) {
|
||||
opt->rep.snapshot = (snap ? snap->rep : nullptr);
|
||||
}
|
||||
|
||||
leveldb_writeoptions_t* leveldb_writeoptions_create() {
|
||||
return new leveldb_writeoptions_t;
|
||||
}
|
||||
|
||||
void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) {
|
||||
delete opt;
|
||||
}
|
||||
void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) { delete opt; }
|
||||
|
||||
void leveldb_writeoptions_set_sync(
|
||||
leveldb_writeoptions_t* opt, unsigned char v) {
|
||||
void leveldb_writeoptions_set_sync(leveldb_writeoptions_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.sync = v;
|
||||
}
|
||||
|
||||
@ -580,16 +545,22 @@ void leveldb_env_destroy(leveldb_env_t* env) {
|
||||
delete env;
|
||||
}
|
||||
|
||||
void leveldb_free(void* ptr) {
|
||||
free(ptr);
|
||||
char* leveldb_env_get_test_directory(leveldb_env_t* env) {
|
||||
std::string result;
|
||||
if (!env->rep->GetTestDirectory(&result).ok()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
char* buffer = static_cast<char*>(malloc(result.size() + 1));
|
||||
memcpy(buffer, result.data(), result.size());
|
||||
buffer[result.size()] = '\0';
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int leveldb_major_version() {
|
||||
return kMajorVersion;
|
||||
}
|
||||
void leveldb_free(void* ptr) { free(ptr); }
|
||||
|
||||
int leveldb_minor_version() {
|
||||
return kMinorVersion;
|
||||
}
|
||||
int leveldb_major_version() { return kMajorVersion; }
|
||||
|
||||
int leveldb_minor_version() { return kMinorVersion; }
|
||||
|
||||
} // end extern "C"
|
||||
|
28
db/c_test.c
28
db/c_test.c
@ -8,24 +8,14 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
const char* phase = "";
|
||||
static char dbname[200];
|
||||
|
||||
static void StartPhase(const char* name) {
|
||||
fprintf(stderr, "=== Test %s\n", name);
|
||||
phase = name;
|
||||
}
|
||||
|
||||
static const char* GetTempDir(void) {
|
||||
const char* ret = getenv("TEST_TMPDIR");
|
||||
if (ret == NULL || ret[0] == '\0')
|
||||
ret = "/tmp";
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CheckNoError(err) \
|
||||
if ((err) != NULL) { \
|
||||
fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \
|
||||
@ -162,21 +152,19 @@ int main(int argc, char** argv) {
|
||||
leveldb_options_t* options;
|
||||
leveldb_readoptions_t* roptions;
|
||||
leveldb_writeoptions_t* woptions;
|
||||
char* dbname;
|
||||
char* err = NULL;
|
||||
int run = -1;
|
||||
|
||||
CheckCondition(leveldb_major_version() >= 1);
|
||||
CheckCondition(leveldb_minor_version() >= 1);
|
||||
|
||||
snprintf(dbname, sizeof(dbname),
|
||||
"%s/leveldb_c_test-%d",
|
||||
GetTempDir(),
|
||||
((int) geteuid()));
|
||||
|
||||
StartPhase("create_objects");
|
||||
cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
|
||||
env = leveldb_create_default_env();
|
||||
cache = leveldb_cache_create_lru(100000);
|
||||
dbname = leveldb_env_get_test_directory(env);
|
||||
CheckCondition(dbname != NULL);
|
||||
|
||||
options = leveldb_options_create();
|
||||
leveldb_options_set_comparator(options, cmp);
|
||||
@ -189,6 +177,7 @@ int main(int argc, char** argv) {
|
||||
leveldb_options_set_max_open_files(options, 10);
|
||||
leveldb_options_set_block_size(options, 1024);
|
||||
leveldb_options_set_block_restart_interval(options, 8);
|
||||
leveldb_options_set_max_file_size(options, 3 << 20);
|
||||
leveldb_options_set_compression(options, leveldb_no_compression);
|
||||
|
||||
roptions = leveldb_readoptions_create();
|
||||
@ -239,12 +228,18 @@ int main(int argc, char** argv) {
|
||||
leveldb_writebatch_clear(wb);
|
||||
leveldb_writebatch_put(wb, "bar", 3, "b", 1);
|
||||
leveldb_writebatch_put(wb, "box", 3, "c", 1);
|
||||
leveldb_writebatch_delete(wb, "bar", 3);
|
||||
|
||||
leveldb_writebatch_t* wb2 = leveldb_writebatch_create();
|
||||
leveldb_writebatch_delete(wb2, "bar", 3);
|
||||
leveldb_writebatch_append(wb, wb2);
|
||||
leveldb_writebatch_destroy(wb2);
|
||||
|
||||
leveldb_write(db, woptions, wb, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
CheckGet(db, roptions, "box", "c");
|
||||
|
||||
int pos = 0;
|
||||
leveldb_writebatch_iterate(wb, &pos, CheckPut, CheckDel);
|
||||
CheckCondition(pos == 3);
|
||||
@ -381,6 +376,7 @@ int main(int argc, char** argv) {
|
||||
leveldb_options_destroy(options);
|
||||
leveldb_readoptions_destroy(roptions);
|
||||
leveldb_writeoptions_destroy(woptions);
|
||||
leveldb_free(dbname);
|
||||
leveldb_cache_destroy(cache);
|
||||
leveldb_comparator_destroy(cmp);
|
||||
leveldb_env_destroy(env);
|
||||
|
@ -2,20 +2,16 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "leveldb/db.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/table.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "db/filename.h"
|
||||
#include "db/log_format.h"
|
||||
#include "db/version_set.h"
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/table.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
@ -26,44 +22,35 @@ static const int kValueSize = 1000;
|
||||
|
||||
class CorruptionTest {
|
||||
public:
|
||||
test::ErrorEnv env_;
|
||||
std::string dbname_;
|
||||
Cache* tiny_cache_;
|
||||
Options options_;
|
||||
DB* db_;
|
||||
|
||||
CorruptionTest() {
|
||||
tiny_cache_ = NewLRUCache(100);
|
||||
CorruptionTest()
|
||||
: db_(nullptr),
|
||||
dbname_("/memenv/corruption_test"),
|
||||
tiny_cache_(NewLRUCache(100)) {
|
||||
options_.env = &env_;
|
||||
options_.block_cache = tiny_cache_;
|
||||
dbname_ = test::TmpDir() + "/db_test";
|
||||
DestroyDB(dbname_, options_);
|
||||
|
||||
db_ = NULL;
|
||||
options_.create_if_missing = true;
|
||||
Reopen();
|
||||
options_.create_if_missing = false;
|
||||
}
|
||||
|
||||
~CorruptionTest() {
|
||||
delete db_;
|
||||
DestroyDB(dbname_, Options());
|
||||
delete tiny_cache_;
|
||||
delete db_;
|
||||
delete tiny_cache_;
|
||||
}
|
||||
|
||||
Status TryReopen() {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
return DB::Open(options_, dbname_, &db_);
|
||||
}
|
||||
|
||||
void Reopen() {
|
||||
ASSERT_OK(TryReopen());
|
||||
}
|
||||
void Reopen() { ASSERT_OK(TryReopen()); }
|
||||
|
||||
void RepairDB() {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
ASSERT_OK(::leveldb::RepairDB(dbname_, options_));
|
||||
}
|
||||
|
||||
@ -71,7 +58,7 @@ class CorruptionTest {
|
||||
std::string key_space, value_space;
|
||||
WriteBatch batch;
|
||||
for (int i = 0; i < n; i++) {
|
||||
//if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
|
||||
// if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
|
||||
Slice key = Key(i, &key_space);
|
||||
batch.Clear();
|
||||
batch.Put(key, Value(i, &value_space));
|
||||
@ -100,8 +87,7 @@ class CorruptionTest {
|
||||
// Ignore boundary keys.
|
||||
continue;
|
||||
}
|
||||
if (!ConsumeDecimalNumber(&in, &key) ||
|
||||
!in.empty() ||
|
||||
if (!ConsumeDecimalNumber(&in, &key) || !in.empty() ||
|
||||
key < next_expected) {
|
||||
bad_keys++;
|
||||
continue;
|
||||
@ -126,14 +112,13 @@ class CorruptionTest {
|
||||
void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
|
||||
// Pick file to corrupt
|
||||
std::vector<std::string> filenames;
|
||||
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
||||
ASSERT_OK(env_.target()->GetChildren(dbname_, &filenames));
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
std::string fname;
|
||||
int picked_number = -1;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type) &&
|
||||
type == filetype &&
|
||||
if (ParseFileName(filenames[i], &number, &type) && type == filetype &&
|
||||
int(number) > picked_number) { // Pick latest file
|
||||
fname = dbname_ + "/" + filenames[i];
|
||||
picked_number = number;
|
||||
@ -141,35 +126,32 @@ class CorruptionTest {
|
||||
}
|
||||
ASSERT_TRUE(!fname.empty()) << filetype;
|
||||
|
||||
struct stat sbuf;
|
||||
if (stat(fname.c_str(), &sbuf) != 0) {
|
||||
const char* msg = strerror(errno);
|
||||
ASSERT_TRUE(false) << fname << ": " << msg;
|
||||
}
|
||||
uint64_t file_size;
|
||||
ASSERT_OK(env_.target()->GetFileSize(fname, &file_size));
|
||||
|
||||
if (offset < 0) {
|
||||
// Relative to end of file; make it absolute
|
||||
if (-offset > sbuf.st_size) {
|
||||
if (-offset > file_size) {
|
||||
offset = 0;
|
||||
} else {
|
||||
offset = sbuf.st_size + offset;
|
||||
offset = file_size + offset;
|
||||
}
|
||||
}
|
||||
if (offset > sbuf.st_size) {
|
||||
offset = sbuf.st_size;
|
||||
if (offset > file_size) {
|
||||
offset = file_size;
|
||||
}
|
||||
if (offset + bytes_to_corrupt > sbuf.st_size) {
|
||||
bytes_to_corrupt = sbuf.st_size - offset;
|
||||
if (offset + bytes_to_corrupt > file_size) {
|
||||
bytes_to_corrupt = file_size - offset;
|
||||
}
|
||||
|
||||
// Do it
|
||||
std::string contents;
|
||||
Status s = ReadFileToString(Env::Default(), fname, &contents);
|
||||
Status s = ReadFileToString(env_.target(), fname, &contents);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
for (int i = 0; i < bytes_to_corrupt; i++) {
|
||||
contents[i + offset] ^= 0x80;
|
||||
}
|
||||
s = WriteStringToFile(Env::Default(), contents, fname);
|
||||
s = WriteStringToFile(env_.target(), contents, fname);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
}
|
||||
|
||||
@ -197,12 +179,20 @@ class CorruptionTest {
|
||||
Random r(k);
|
||||
return test::RandomString(&r, kValueSize, storage);
|
||||
}
|
||||
|
||||
test::ErrorEnv env_;
|
||||
Options options_;
|
||||
DB* db_;
|
||||
|
||||
private:
|
||||
std::string dbname_;
|
||||
Cache* tiny_cache_;
|
||||
};
|
||||
|
||||
TEST(CorruptionTest, Recovery) {
|
||||
Build(100);
|
||||
Check(100, 100);
|
||||
Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record
|
||||
Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record
|
||||
Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block
|
||||
Reopen();
|
||||
|
||||
@ -237,8 +227,8 @@ TEST(CorruptionTest, TableFile) {
|
||||
Build(100);
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
dbi->TEST_CompactRange(0, NULL, NULL);
|
||||
dbi->TEST_CompactRange(1, NULL, NULL);
|
||||
dbi->TEST_CompactRange(0, nullptr, nullptr);
|
||||
dbi->TEST_CompactRange(1, nullptr, nullptr);
|
||||
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
Check(90, 99);
|
||||
@ -251,8 +241,8 @@ TEST(CorruptionTest, TableFileRepair) {
|
||||
Build(100);
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
dbi->TEST_CompactRange(0, NULL, NULL);
|
||||
dbi->TEST_CompactRange(1, NULL, NULL);
|
||||
dbi->TEST_CompactRange(0, nullptr, nullptr);
|
||||
dbi->TEST_CompactRange(1, nullptr, nullptr);
|
||||
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
RepairDB();
|
||||
@ -302,7 +292,7 @@ TEST(CorruptionTest, CorruptedDescriptor) {
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
dbi->TEST_CompactRange(0, NULL, NULL);
|
||||
dbi->TEST_CompactRange(0, nullptr, nullptr);
|
||||
|
||||
Corrupt(kDescriptorFile, 0, 1000);
|
||||
Status s = TryReopen();
|
||||
@ -343,7 +333,7 @@ TEST(CorruptionTest, CompactionInputErrorParanoid) {
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
env_.SleepForMicroseconds(100000);
|
||||
}
|
||||
dbi->CompactRange(NULL, NULL);
|
||||
dbi->CompactRange(nullptr, nullptr);
|
||||
|
||||
// Write must fail because of corrupted table
|
||||
std::string tmp1, tmp2;
|
||||
@ -369,6 +359,4 @@ TEST(CorruptionTest, UnrelatedKeys) {
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
681
db/db_impl.cc
681
db/db_impl.cc
File diff suppressed because it is too large
Load Diff
154
db/db_impl.h
154
db/db_impl.h
@ -5,8 +5,11 @@
|
||||
#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_
|
||||
#define STORAGE_LEVELDB_DB_DB_IMPL_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "db/log_writer.h"
|
||||
#include "db/snapshot.h"
|
||||
@ -26,21 +29,25 @@ class VersionSet;
|
||||
class DBImpl : public DB {
|
||||
public:
|
||||
DBImpl(const Options& options, const std::string& dbname);
|
||||
virtual ~DBImpl();
|
||||
|
||||
DBImpl(const DBImpl&) = delete;
|
||||
DBImpl& operator=(const DBImpl&) = delete;
|
||||
|
||||
~DBImpl() override;
|
||||
|
||||
// Implementations of the DB interface
|
||||
virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
|
||||
virtual Status Delete(const WriteOptions&, const Slice& key);
|
||||
virtual Status Write(const WriteOptions& options, WriteBatch* updates);
|
||||
virtual Status Get(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value);
|
||||
virtual Iterator* NewIterator(const ReadOptions&);
|
||||
virtual const Snapshot* GetSnapshot();
|
||||
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
||||
virtual bool GetProperty(const Slice& property, std::string* value);
|
||||
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
|
||||
virtual void CompactRange(const Slice* begin, const Slice* end);
|
||||
Status Put(const WriteOptions&, const Slice& key,
|
||||
const Slice& value) override;
|
||||
Status Delete(const WriteOptions&, const Slice& key) override;
|
||||
Status Write(const WriteOptions& options, WriteBatch* updates) override;
|
||||
Status Get(const ReadOptions& options, const Slice& key,
|
||||
std::string* value) override;
|
||||
Iterator* NewIterator(const ReadOptions&) override;
|
||||
const Snapshot* GetSnapshot() override;
|
||||
void ReleaseSnapshot(const Snapshot* snapshot) override;
|
||||
bool GetProperty(const Slice& property, std::string* value) override;
|
||||
void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) override;
|
||||
void CompactRange(const Slice* begin, const Slice* end) override;
|
||||
|
||||
// Extra methods (for testing) that are not in the public DB interface
|
||||
|
||||
@ -69,6 +76,31 @@ class DBImpl : public DB {
|
||||
struct CompactionState;
|
||||
struct Writer;
|
||||
|
||||
// Information for a manual compaction
|
||||
struct ManualCompaction {
|
||||
int level;
|
||||
bool done;
|
||||
const InternalKey* begin; // null means beginning of key range
|
||||
const InternalKey* end; // null means end of key range
|
||||
InternalKey tmp_storage; // Used to keep track of compaction progress
|
||||
};
|
||||
|
||||
// Per level compaction stats. stats_[level] stores the stats for
|
||||
// compactions that produced data for the specified "level".
|
||||
struct CompactionStats {
|
||||
CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {}
|
||||
|
||||
void Add(const CompactionStats& c) {
|
||||
this->micros += c.micros;
|
||||
this->bytes_read += c.bytes_read;
|
||||
this->bytes_written += c.bytes_written;
|
||||
}
|
||||
|
||||
int64_t micros;
|
||||
int64_t bytes_read;
|
||||
int64_t bytes_written;
|
||||
};
|
||||
|
||||
Iterator* NewInternalIterator(const ReadOptions&,
|
||||
SequenceNumber* latest_snapshot,
|
||||
uint32_t* seed);
|
||||
@ -78,21 +110,21 @@ class DBImpl : public DB {
|
||||
// Recover the descriptor from persistent storage. May do a significant
|
||||
// amount of work to recover recently logged updates. Any changes to
|
||||
// be made to the descriptor are added to *edit.
|
||||
Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
Status Recover(VersionEdit* edit, bool* save_manifest)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
void MaybeIgnoreError(Status* s) const;
|
||||
|
||||
// Delete any unneeded files and stale in-memory entries.
|
||||
void DeleteObsoleteFiles();
|
||||
void DeleteObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Compact the in-memory write buffer to disk. Switches to a new
|
||||
// log-file/memtable and writes a new descriptor iff successful.
|
||||
// Errors are recorded in bg_error_.
|
||||
void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
Status RecoverLogFile(uint64_t log_number,
|
||||
VersionEdit* edit,
|
||||
SequenceNumber* max_sequence)
|
||||
Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest,
|
||||
VersionEdit* edit, SequenceNumber* max_sequence)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)
|
||||
@ -100,14 +132,15 @@ class DBImpl : public DB {
|
||||
|
||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
WriteBatch* BuildBatchGroup(Writer** last_writer);
|
||||
WriteBatch* BuildBatchGroup(Writer** last_writer)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
void RecordBackgroundError(const Status& s);
|
||||
|
||||
void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
static void BGWork(void* db);
|
||||
void BackgroundCall();
|
||||
void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
void CleanupCompaction(CompactionState* compact)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
Status DoCompactionWork(CompactionState* compact)
|
||||
@ -118,93 +151,66 @@ class DBImpl : public DB {
|
||||
Status InstallCompactionResults(CompactionState* compact)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
const Comparator* user_comparator() const {
|
||||
return internal_comparator_.user_comparator();
|
||||
}
|
||||
|
||||
// Constant after construction
|
||||
Env* const env_;
|
||||
const InternalKeyComparator internal_comparator_;
|
||||
const InternalFilterPolicy internal_filter_policy_;
|
||||
const Options options_; // options_.comparator == &internal_comparator_
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
const bool owns_info_log_;
|
||||
const bool owns_cache_;
|
||||
const std::string dbname_;
|
||||
|
||||
// table_cache_ provides its own synchronization
|
||||
TableCache* table_cache_;
|
||||
TableCache* const table_cache_;
|
||||
|
||||
// Lock over the persistent DB state. Non-NULL iff successfully acquired.
|
||||
// Lock over the persistent DB state. Non-null iff successfully acquired.
|
||||
FileLock* db_lock_;
|
||||
|
||||
// State below is protected by mutex_
|
||||
port::Mutex mutex_;
|
||||
port::AtomicPointer shutting_down_;
|
||||
port::CondVar bg_cv_; // Signalled when background work finishes
|
||||
std::atomic<bool> shutting_down_;
|
||||
port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_);
|
||||
MemTable* mem_;
|
||||
MemTable* imm_; // Memtable being compacted
|
||||
port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
|
||||
MemTable* imm_ GUARDED_BY(mutex_); // Memtable being compacted
|
||||
std::atomic<bool> has_imm_; // So bg thread can detect non-null imm_
|
||||
WritableFile* logfile_;
|
||||
uint64_t logfile_number_;
|
||||
uint64_t logfile_number_ GUARDED_BY(mutex_);
|
||||
log::Writer* log_;
|
||||
uint32_t seed_; // For sampling.
|
||||
uint32_t seed_ GUARDED_BY(mutex_); // For sampling.
|
||||
|
||||
// Queue of writers.
|
||||
std::deque<Writer*> writers_;
|
||||
WriteBatch* tmp_batch_;
|
||||
std::deque<Writer*> writers_ GUARDED_BY(mutex_);
|
||||
WriteBatch* tmp_batch_ GUARDED_BY(mutex_);
|
||||
|
||||
SnapshotList snapshots_;
|
||||
SnapshotList snapshots_ GUARDED_BY(mutex_);
|
||||
|
||||
// Set of table files to protect from deletion because they are
|
||||
// part of ongoing compactions.
|
||||
std::set<uint64_t> pending_outputs_;
|
||||
std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_);
|
||||
|
||||
// Has a background compaction been scheduled or is running?
|
||||
bool bg_compaction_scheduled_;
|
||||
bool background_compaction_scheduled_ GUARDED_BY(mutex_);
|
||||
|
||||
// Information for a manual compaction
|
||||
struct ManualCompaction {
|
||||
int level;
|
||||
bool done;
|
||||
const InternalKey* begin; // NULL means beginning of key range
|
||||
const InternalKey* end; // NULL means end of key range
|
||||
InternalKey tmp_storage; // Used to keep track of compaction progress
|
||||
};
|
||||
ManualCompaction* manual_compaction_;
|
||||
ManualCompaction* manual_compaction_ GUARDED_BY(mutex_);
|
||||
|
||||
VersionSet* versions_;
|
||||
VersionSet* const versions_ GUARDED_BY(mutex_);
|
||||
|
||||
// Have we encountered a background error in paranoid mode?
|
||||
Status bg_error_;
|
||||
Status bg_error_ GUARDED_BY(mutex_);
|
||||
|
||||
// Per level compaction stats. stats_[level] stores the stats for
|
||||
// compactions that produced data for the specified "level".
|
||||
struct CompactionStats {
|
||||
int64_t micros;
|
||||
int64_t bytes_read;
|
||||
int64_t bytes_written;
|
||||
|
||||
CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { }
|
||||
|
||||
void Add(const CompactionStats& c) {
|
||||
this->micros += c.micros;
|
||||
this->bytes_read += c.bytes_read;
|
||||
this->bytes_written += c.bytes_written;
|
||||
}
|
||||
};
|
||||
CompactionStats stats_[config::kNumLevels];
|
||||
|
||||
// No copying allowed
|
||||
DBImpl(const DBImpl&);
|
||||
void operator=(const DBImpl&);
|
||||
|
||||
const Comparator* user_comparator() const {
|
||||
return internal_comparator_.user_comparator();
|
||||
}
|
||||
CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_);
|
||||
};
|
||||
|
||||
// Sanitize db options. The caller should delete result.info_log if
|
||||
// it is not equal to src.info_log.
|
||||
extern Options SanitizeOptions(const std::string& db,
|
||||
const InternalKeyComparator* icmp,
|
||||
const InternalFilterPolicy* ipolicy,
|
||||
const Options& src);
|
||||
Options SanitizeOptions(const std::string& db,
|
||||
const InternalKeyComparator* icmp,
|
||||
const InternalFilterPolicy* ipolicy,
|
||||
const Options& src);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -4,9 +4,9 @@
|
||||
|
||||
#include "db/db_iter.h"
|
||||
|
||||
#include "db/filename.h"
|
||||
#include "db/db_impl.h"
|
||||
#include "db/dbformat.h"
|
||||
#include "db/filename.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/iterator.h"
|
||||
#include "port/port.h"
|
||||
@ -36,17 +36,14 @@ namespace {
|
||||
// combines multiple entries for the same userkey found in the DB
|
||||
// representation into a single entry while accounting for sequence
|
||||
// numbers, deletion markers, overwrites, etc.
|
||||
class DBIter: public Iterator {
|
||||
class DBIter : public Iterator {
|
||||
public:
|
||||
// Which direction is the iterator currently moving?
|
||||
// (1) When moving forward, the internal iterator is positioned at
|
||||
// the exact entry that yields this->key(), this->value()
|
||||
// (2) When moving backwards, the internal iterator is positioned
|
||||
// just before all entries whose user key == this->key().
|
||||
enum Direction {
|
||||
kForward,
|
||||
kReverse
|
||||
};
|
||||
enum Direction { kForward, kReverse };
|
||||
|
||||
DBIter(DBImpl* db, const Comparator* cmp, Iterator* iter, SequenceNumber s,
|
||||
uint32_t seed)
|
||||
@ -57,21 +54,22 @@ class DBIter: public Iterator {
|
||||
direction_(kForward),
|
||||
valid_(false),
|
||||
rnd_(seed),
|
||||
bytes_counter_(RandomPeriod()) {
|
||||
}
|
||||
virtual ~DBIter() {
|
||||
delete iter_;
|
||||
}
|
||||
virtual bool Valid() const { return valid_; }
|
||||
virtual Slice key() const {
|
||||
bytes_until_read_sampling_(RandomCompactionPeriod()) {}
|
||||
|
||||
DBIter(const DBIter&) = delete;
|
||||
DBIter& operator=(const DBIter&) = delete;
|
||||
|
||||
~DBIter() override { delete iter_; }
|
||||
bool Valid() const override { return valid_; }
|
||||
Slice key() const override {
|
||||
assert(valid_);
|
||||
return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_;
|
||||
}
|
||||
virtual Slice value() const {
|
||||
Slice value() const override {
|
||||
assert(valid_);
|
||||
return (direction_ == kForward) ? iter_->value() : saved_value_;
|
||||
}
|
||||
virtual Status status() const {
|
||||
Status status() const override {
|
||||
if (status_.ok()) {
|
||||
return iter_->status();
|
||||
} else {
|
||||
@ -79,11 +77,11 @@ class DBIter: public Iterator {
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Next();
|
||||
virtual void Prev();
|
||||
virtual void Seek(const Slice& target);
|
||||
virtual void SeekToFirst();
|
||||
virtual void SeekToLast();
|
||||
void Next() override;
|
||||
void Prev() override;
|
||||
void Seek(const Slice& target) override;
|
||||
void SeekToFirst() override;
|
||||
void SeekToLast() override;
|
||||
|
||||
private:
|
||||
void FindNextUserEntry(bool skipping, std::string* skip);
|
||||
@ -103,38 +101,35 @@ class DBIter: public Iterator {
|
||||
}
|
||||
}
|
||||
|
||||
// Pick next gap with average value of config::kReadBytesPeriod.
|
||||
ssize_t RandomPeriod() {
|
||||
return rnd_.Uniform(2*config::kReadBytesPeriod);
|
||||
// Picks the number of bytes that can be read until a compaction is scheduled.
|
||||
size_t RandomCompactionPeriod() {
|
||||
return rnd_.Uniform(2 * config::kReadBytesPeriod);
|
||||
}
|
||||
|
||||
DBImpl* db_;
|
||||
const Comparator* const user_comparator_;
|
||||
Iterator* const iter_;
|
||||
SequenceNumber const sequence_;
|
||||
|
||||
Status status_;
|
||||
std::string saved_key_; // == current key when direction_==kReverse
|
||||
std::string saved_value_; // == current raw value when direction_==kReverse
|
||||
std::string saved_key_; // == current key when direction_==kReverse
|
||||
std::string saved_value_; // == current raw value when direction_==kReverse
|
||||
Direction direction_;
|
||||
bool valid_;
|
||||
|
||||
Random rnd_;
|
||||
ssize_t bytes_counter_;
|
||||
|
||||
// No copying allowed
|
||||
DBIter(const DBIter&);
|
||||
void operator=(const DBIter&);
|
||||
size_t bytes_until_read_sampling_;
|
||||
};
|
||||
|
||||
inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
|
||||
Slice k = iter_->key();
|
||||
ssize_t n = k.size() + iter_->value().size();
|
||||
bytes_counter_ -= n;
|
||||
while (bytes_counter_ < 0) {
|
||||
bytes_counter_ += RandomPeriod();
|
||||
|
||||
size_t bytes_read = k.size() + iter_->value().size();
|
||||
while (bytes_until_read_sampling_ < bytes_read) {
|
||||
bytes_until_read_sampling_ += RandomCompactionPeriod();
|
||||
db_->RecordReadSample(k);
|
||||
}
|
||||
assert(bytes_until_read_sampling_ >= bytes_read);
|
||||
bytes_until_read_sampling_ -= bytes_read;
|
||||
|
||||
if (!ParseInternalKey(k, ikey)) {
|
||||
status_ = Status::Corruption("corrupted internal key in DBIter");
|
||||
return false;
|
||||
@ -165,6 +160,15 @@ void DBIter::Next() {
|
||||
} else {
|
||||
// Store in saved_key_ the current key so we skip it below.
|
||||
SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
|
||||
|
||||
// iter_ is pointing to current key. We can now safely move to the next to
|
||||
// avoid checking current key.
|
||||
iter_->Next();
|
||||
if (!iter_->Valid()) {
|
||||
valid_ = false;
|
||||
saved_key_.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
FindNextUserEntry(true, &saved_key_);
|
||||
@ -218,8 +222,8 @@ void DBIter::Prev() {
|
||||
ClearSavedValue();
|
||||
return;
|
||||
}
|
||||
if (user_comparator_->Compare(ExtractUserKey(iter_->key()),
|
||||
saved_key_) < 0) {
|
||||
if (user_comparator_->Compare(ExtractUserKey(iter_->key()), saved_key_) <
|
||||
0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -275,8 +279,8 @@ void DBIter::Seek(const Slice& target) {
|
||||
direction_ = kForward;
|
||||
ClearSavedValue();
|
||||
saved_key_.clear();
|
||||
AppendInternalKey(
|
||||
&saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
|
||||
AppendInternalKey(&saved_key_,
|
||||
ParsedInternalKey(target, sequence_, kValueTypeForSeek));
|
||||
iter_->Seek(saved_key_);
|
||||
if (iter_->Valid()) {
|
||||
FindNextUserEntry(false, &saved_key_ /* temporary storage */);
|
||||
@ -305,12 +309,9 @@ void DBIter::SeekToLast() {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
Iterator* NewDBIterator(
|
||||
DBImpl* db,
|
||||
const Comparator* user_key_comparator,
|
||||
Iterator* internal_iter,
|
||||
SequenceNumber sequence,
|
||||
uint32_t seed) {
|
||||
Iterator* NewDBIterator(DBImpl* db, const Comparator* user_key_comparator,
|
||||
Iterator* internal_iter, SequenceNumber sequence,
|
||||
uint32_t seed) {
|
||||
return new DBIter(db, user_key_comparator, internal_iter, sequence, seed);
|
||||
}
|
||||
|
||||
|
12
db/db_iter.h
12
db/db_iter.h
@ -6,8 +6,9 @@
|
||||
#define STORAGE_LEVELDB_DB_DB_ITER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "leveldb/db.h"
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "leveldb/db.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
@ -16,12 +17,9 @@ class DBImpl;
|
||||
// Return a new iterator that converts internal keys (yielded by
|
||||
// "*internal_iter") that were live at the specified "sequence" number
|
||||
// into appropriate user keys.
|
||||
extern Iterator* NewDBIterator(
|
||||
DBImpl* db,
|
||||
const Comparator* user_key_comparator,
|
||||
Iterator* internal_iter,
|
||||
SequenceNumber sequence,
|
||||
uint32_t seed);
|
||||
Iterator* NewDBIterator(DBImpl* db, const Comparator* user_key_comparator,
|
||||
Iterator* internal_iter, SequenceNumber sequence,
|
||||
uint32_t seed);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
745
db/db_test.cc
745
db/db_test.cc
File diff suppressed because it is too large
Load Diff
@ -2,8 +2,12 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <stdio.h>
|
||||
#include "db/dbformat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "port/port.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
@ -21,26 +25,20 @@ void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
|
||||
}
|
||||
|
||||
std::string ParsedInternalKey::DebugString() const {
|
||||
char buf[50];
|
||||
snprintf(buf, sizeof(buf), "' @ %llu : %d",
|
||||
(unsigned long long) sequence,
|
||||
int(type));
|
||||
std::string result = "'";
|
||||
result += EscapeString(user_key.ToString());
|
||||
result += buf;
|
||||
return result;
|
||||
std::ostringstream ss;
|
||||
ss << '\'' << EscapeString(user_key.ToString()) << "' @ " << sequence << " : "
|
||||
<< static_cast<int>(type);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string InternalKey::DebugString() const {
|
||||
std::string result;
|
||||
ParsedInternalKey parsed;
|
||||
if (ParseInternalKey(rep_, &parsed)) {
|
||||
result = parsed.DebugString();
|
||||
} else {
|
||||
result = "(bad)";
|
||||
result.append(EscapeString(rep_));
|
||||
return parsed.DebugString();
|
||||
}
|
||||
return result;
|
||||
std::ostringstream ss;
|
||||
ss << "(bad)" << EscapeString(rep_);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
const char* InternalKeyComparator::Name() const {
|
||||
@ -65,9 +63,8 @@ int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
|
||||
return r;
|
||||
}
|
||||
|
||||
void InternalKeyComparator::FindShortestSeparator(
|
||||
std::string* start,
|
||||
const Slice& limit) const {
|
||||
void InternalKeyComparator::FindShortestSeparator(std::string* start,
|
||||
const Slice& limit) const {
|
||||
// Attempt to shorten the user portion of the key
|
||||
Slice user_start = ExtractUserKey(*start);
|
||||
Slice user_limit = ExtractUserKey(limit);
|
||||
@ -77,7 +74,8 @@ void InternalKeyComparator::FindShortestSeparator(
|
||||
user_comparator_->Compare(user_start, tmp) < 0) {
|
||||
// User key has become shorter physically, but larger logically.
|
||||
// Tack on the earliest possible number to the shortened user key.
|
||||
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
|
||||
PutFixed64(&tmp,
|
||||
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
|
||||
assert(this->Compare(*start, tmp) < 0);
|
||||
assert(this->Compare(tmp, limit) < 0);
|
||||
start->swap(tmp);
|
||||
@ -92,15 +90,14 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||
user_comparator_->Compare(user_key, tmp) < 0) {
|
||||
// User key has become shorter physically, but larger logically.
|
||||
// Tack on the earliest possible number to the shortened user key.
|
||||
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
|
||||
PutFixed64(&tmp,
|
||||
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
|
||||
assert(this->Compare(*key, tmp) < 0);
|
||||
key->swap(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
const char* InternalFilterPolicy::Name() const {
|
||||
return user_policy_->Name();
|
||||
}
|
||||
const char* InternalFilterPolicy::Name() const { return user_policy_->Name(); }
|
||||
|
||||
void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
|
||||
std::string* dst) const {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define STORAGE_LEVELDB_DB_DBFORMAT_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
@ -48,10 +49,7 @@ class InternalKey;
|
||||
// Value types encoded as the last component of internal keys.
|
||||
// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
|
||||
// data structures.
|
||||
enum ValueType {
|
||||
kTypeDeletion = 0x0,
|
||||
kTypeValue = 0x1
|
||||
};
|
||||
enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1 };
|
||||
// kValueTypeForSeek defines the ValueType that should be passed when
|
||||
// constructing a ParsedInternalKey object for seeking to a particular
|
||||
// sequence number (since we sort sequence numbers in decreasing order
|
||||
@ -64,17 +62,16 @@ typedef uint64_t SequenceNumber;
|
||||
|
||||
// We leave eight bits empty at the bottom so a type and sequence#
|
||||
// can be packed together into 64-bits.
|
||||
static const SequenceNumber kMaxSequenceNumber =
|
||||
((0x1ull << 56) - 1);
|
||||
static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
|
||||
|
||||
struct ParsedInternalKey {
|
||||
Slice user_key;
|
||||
SequenceNumber sequence;
|
||||
ValueType type;
|
||||
|
||||
ParsedInternalKey() { } // Intentionally left uninitialized (for speed)
|
||||
ParsedInternalKey() {} // Intentionally left uninitialized (for speed)
|
||||
ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
|
||||
: user_key(u), sequence(seq), type(t) { }
|
||||
: user_key(u), sequence(seq), type(t) {}
|
||||
std::string DebugString() const;
|
||||
};
|
||||
|
||||
@ -84,15 +81,13 @@ inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
|
||||
}
|
||||
|
||||
// Append the serialization of "key" to *result.
|
||||
extern void AppendInternalKey(std::string* result,
|
||||
const ParsedInternalKey& key);
|
||||
void AppendInternalKey(std::string* result, const ParsedInternalKey& key);
|
||||
|
||||
// Attempt to parse an internal key from "internal_key". On success,
|
||||
// stores the parsed data in "*result", and returns true.
|
||||
//
|
||||
// On error, returns false, leaves "*result" in an undefined state.
|
||||
extern bool ParseInternalKey(const Slice& internal_key,
|
||||
ParsedInternalKey* result);
|
||||
bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result);
|
||||
|
||||
// Returns the user key portion of an internal key.
|
||||
inline Slice ExtractUserKey(const Slice& internal_key) {
|
||||
@ -100,27 +95,19 @@ inline Slice ExtractUserKey(const Slice& internal_key) {
|
||||
return Slice(internal_key.data(), internal_key.size() - 8);
|
||||
}
|
||||
|
||||
inline ValueType ExtractValueType(const Slice& internal_key) {
|
||||
assert(internal_key.size() >= 8);
|
||||
const size_t n = internal_key.size();
|
||||
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
|
||||
unsigned char c = num & 0xff;
|
||||
return static_cast<ValueType>(c);
|
||||
}
|
||||
|
||||
// A comparator for internal keys that uses a specified comparator for
|
||||
// the user key portion and breaks ties by decreasing sequence number.
|
||||
class InternalKeyComparator : public Comparator {
|
||||
private:
|
||||
const Comparator* user_comparator_;
|
||||
|
||||
public:
|
||||
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
|
||||
virtual const char* Name() const;
|
||||
virtual int Compare(const Slice& a, const Slice& b) const;
|
||||
virtual void FindShortestSeparator(
|
||||
std::string* start,
|
||||
const Slice& limit) const;
|
||||
virtual void FindShortSuccessor(std::string* key) const;
|
||||
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
|
||||
const char* Name() const override;
|
||||
int Compare(const Slice& a, const Slice& b) const override;
|
||||
void FindShortestSeparator(std::string* start,
|
||||
const Slice& limit) const override;
|
||||
void FindShortSuccessor(std::string* key) const override;
|
||||
|
||||
const Comparator* user_comparator() const { return user_comparator_; }
|
||||
|
||||
@ -131,11 +118,12 @@ class InternalKeyComparator : public Comparator {
|
||||
class InternalFilterPolicy : public FilterPolicy {
|
||||
private:
|
||||
const FilterPolicy* const user_policy_;
|
||||
|
||||
public:
|
||||
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
|
||||
virtual const char* Name() const;
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
|
||||
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) {}
|
||||
const char* Name() const override;
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const override;
|
||||
};
|
||||
|
||||
// Modules in this directory should keep internal keys wrapped inside
|
||||
@ -144,13 +132,18 @@ class InternalFilterPolicy : public FilterPolicy {
|
||||
class InternalKey {
|
||||
private:
|
||||
std::string rep_;
|
||||
|
||||
public:
|
||||
InternalKey() { } // Leave rep_ as empty to indicate it is invalid
|
||||
InternalKey() {} // Leave rep_ as empty to indicate it is invalid
|
||||
InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
|
||||
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
|
||||
}
|
||||
|
||||
void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
|
||||
bool DecodeFrom(const Slice& s) {
|
||||
rep_.assign(s.data(), s.size());
|
||||
return !rep_.empty();
|
||||
}
|
||||
|
||||
Slice Encode() const {
|
||||
assert(!rep_.empty());
|
||||
return rep_;
|
||||
@ -168,8 +161,8 @@ class InternalKey {
|
||||
std::string DebugString() const;
|
||||
};
|
||||
|
||||
inline int InternalKeyComparator::Compare(
|
||||
const InternalKey& a, const InternalKey& b) const {
|
||||
inline int InternalKeyComparator::Compare(const InternalKey& a,
|
||||
const InternalKey& b) const {
|
||||
return Compare(a.Encode(), b.Encode());
|
||||
}
|
||||
|
||||
@ -192,6 +185,9 @@ class LookupKey {
|
||||
// the specified sequence number.
|
||||
LookupKey(const Slice& user_key, SequenceNumber sequence);
|
||||
|
||||
LookupKey(const LookupKey&) = delete;
|
||||
LookupKey& operator=(const LookupKey&) = delete;
|
||||
|
||||
~LookupKey();
|
||||
|
||||
// Return a key suitable for lookup in a MemTable.
|
||||
@ -214,11 +210,7 @@ class LookupKey {
|
||||
const char* start_;
|
||||
const char* kstart_;
|
||||
const char* end_;
|
||||
char space_[200]; // Avoid allocation for short keys
|
||||
|
||||
// No copying allowed
|
||||
LookupKey(const LookupKey&);
|
||||
void operator=(const LookupKey&);
|
||||
char space_[200]; // Avoid allocation for short keys
|
||||
};
|
||||
|
||||
inline LookupKey::~LookupKey() {
|
||||
|
@ -8,8 +8,7 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
static std::string IKey(const std::string& user_key,
|
||||
uint64_t seq,
|
||||
static std::string IKey(const std::string& user_key, uint64_t seq,
|
||||
ValueType vt) {
|
||||
std::string encoded;
|
||||
AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt));
|
||||
@ -28,9 +27,7 @@ static std::string ShortSuccessor(const std::string& s) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static void TestKey(const std::string& key,
|
||||
uint64_t seq,
|
||||
ValueType vt) {
|
||||
static void TestKey(const std::string& key, uint64_t seq, ValueType vt) {
|
||||
std::string encoded = IKey(key, seq, vt);
|
||||
|
||||
Slice in(encoded);
|
||||
@ -44,16 +41,22 @@ static void TestKey(const std::string& key,
|
||||
ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded));
|
||||
}
|
||||
|
||||
class FormatTest { };
|
||||
class FormatTest {};
|
||||
|
||||
TEST(FormatTest, InternalKey_EncodeDecode) {
|
||||
const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" };
|
||||
const uint64_t seq[] = {
|
||||
1, 2, 3,
|
||||
(1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1,
|
||||
(1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1,
|
||||
(1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1
|
||||
};
|
||||
const char* keys[] = {"", "k", "hello", "longggggggggggggggggggggg"};
|
||||
const uint64_t seq[] = {1,
|
||||
2,
|
||||
3,
|
||||
(1ull << 8) - 1,
|
||||
1ull << 8,
|
||||
(1ull << 8) + 1,
|
||||
(1ull << 16) - 1,
|
||||
1ull << 16,
|
||||
(1ull << 16) + 1,
|
||||
(1ull << 32) - 1,
|
||||
1ull << 32,
|
||||
(1ull << 32) + 1};
|
||||
for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) {
|
||||
for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) {
|
||||
TestKey(keys[k], seq[s], kTypeValue);
|
||||
@ -62,40 +65,44 @@ TEST(FormatTest, InternalKey_EncodeDecode) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FormatTest, InternalKey_DecodeFromEmpty) {
|
||||
InternalKey internal_key;
|
||||
|
||||
ASSERT_TRUE(!internal_key.DecodeFrom(""));
|
||||
}
|
||||
|
||||
TEST(FormatTest, InternalKeyShortSeparator) {
|
||||
// When user keys are same
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 99, kTypeValue)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 101, kTypeValue)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 100, kTypeValue)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 100, kTypeDeletion)));
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 99, kTypeValue)));
|
||||
ASSERT_EQ(
|
||||
IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 101, kTypeValue)));
|
||||
ASSERT_EQ(
|
||||
IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeValue)));
|
||||
ASSERT_EQ(
|
||||
IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion)));
|
||||
|
||||
// When user keys are misordered
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("bar", 99, kTypeValue)));
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("bar", 99, kTypeValue)));
|
||||
|
||||
// When user keys are different, but correctly ordered
|
||||
ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("hello", 200, kTypeValue)));
|
||||
ASSERT_EQ(
|
||||
IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("hello", 200, kTypeValue)));
|
||||
|
||||
// When start user key is prefix of limit user key
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foobar", 200, kTypeValue)));
|
||||
ASSERT_EQ(
|
||||
IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue), IKey("foobar", 200, kTypeValue)));
|
||||
|
||||
// When limit user key is prefix of start user key
|
||||
ASSERT_EQ(IKey("foobar", 100, kTypeValue),
|
||||
Shorten(IKey("foobar", 100, kTypeValue),
|
||||
IKey("foo", 200, kTypeValue)));
|
||||
ASSERT_EQ(
|
||||
IKey("foobar", 100, kTypeValue),
|
||||
Shorten(IKey("foobar", 100, kTypeValue), IKey("foo", 200, kTypeValue)));
|
||||
}
|
||||
|
||||
TEST(FormatTest, InternalKeyShortestSuccessor) {
|
||||
@ -105,8 +112,20 @@ TEST(FormatTest, InternalKeyShortestSuccessor) {
|
||||
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
||||
}
|
||||
|
||||
TEST(FormatTest, ParsedInternalKeyDebugString) {
|
||||
ParsedInternalKey key("The \"key\" in 'single quotes'", 42, kTypeValue);
|
||||
|
||||
ASSERT_EQ("'The \"key\" in 'single quotes'' @ 42 : 1", key.DebugString());
|
||||
}
|
||||
|
||||
TEST(FormatTest, InternalKeyDebugString) {
|
||||
InternalKey key("The \"key\" in 'single quotes'", 42, kTypeValue);
|
||||
ASSERT_EQ("'The \"key\" in 'single quotes'' @ 42 : 1", key.DebugString());
|
||||
|
||||
InternalKey invalid_key;
|
||||
ASSERT_EQ("(bad)", invalid_key.DebugString());
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -2,7 +2,10 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "leveldb/dumpfile.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "db/filename.h"
|
||||
#include "db/log_reader.h"
|
||||
@ -35,8 +38,7 @@ bool GuessType(const std::string& fname, FileType* type) {
|
||||
// Notified when log reader encounters corruption.
|
||||
class CorruptionReporter : public log::Reader::Reporter {
|
||||
public:
|
||||
WritableFile* dst_;
|
||||
virtual void Corruption(size_t bytes, const Status& status) {
|
||||
void Corruption(size_t bytes, const Status& status) override {
|
||||
std::string r = "corruption: ";
|
||||
AppendNumberTo(&r, bytes);
|
||||
r += " bytes; ";
|
||||
@ -44,6 +46,8 @@ class CorruptionReporter : public log::Reader::Reporter {
|
||||
r.push_back('\n');
|
||||
dst_->Append(r);
|
||||
}
|
||||
|
||||
WritableFile* dst_;
|
||||
};
|
||||
|
||||
// Print contents of a log file. (*func)() is called on every record.
|
||||
@ -70,8 +74,7 @@ Status PrintLogContents(Env* env, const std::string& fname,
|
||||
// Called on every item found in a WriteBatch.
|
||||
class WriteBatchItemPrinter : public WriteBatch::Handler {
|
||||
public:
|
||||
WritableFile* dst_;
|
||||
virtual void Put(const Slice& key, const Slice& value) {
|
||||
void Put(const Slice& key, const Slice& value) override {
|
||||
std::string r = " put '";
|
||||
AppendEscapedStringTo(&r, key);
|
||||
r += "' '";
|
||||
@ -79,14 +82,15 @@ class WriteBatchItemPrinter : public WriteBatch::Handler {
|
||||
r += "'\n";
|
||||
dst_->Append(r);
|
||||
}
|
||||
virtual void Delete(const Slice& key) {
|
||||
void Delete(const Slice& key) override {
|
||||
std::string r = " del '";
|
||||
AppendEscapedStringTo(&r, key);
|
||||
r += "'\n";
|
||||
dst_->Append(r);
|
||||
}
|
||||
};
|
||||
|
||||
WritableFile* dst_;
|
||||
};
|
||||
|
||||
// Called on every log record (each one of which is a WriteBatch)
|
||||
// found in a kLogFile.
|
||||
@ -142,8 +146,8 @@ Status DumpDescriptor(Env* env, const std::string& fname, WritableFile* dst) {
|
||||
|
||||
Status DumpTable(Env* env, const std::string& fname, WritableFile* dst) {
|
||||
uint64_t file_size;
|
||||
RandomAccessFile* file = NULL;
|
||||
Table* table = NULL;
|
||||
RandomAccessFile* file = nullptr;
|
||||
Table* table = nullptr;
|
||||
Status s = env->GetFileSize(fname, &file_size);
|
||||
if (s.ok()) {
|
||||
s = env->NewRandomAccessFile(fname, &file);
|
||||
@ -213,9 +217,12 @@ Status DumpFile(Env* env, const std::string& fname, WritableFile* dst) {
|
||||
return Status::InvalidArgument(fname + ": unknown file type");
|
||||
}
|
||||
switch (ftype) {
|
||||
case kLogFile: return DumpLog(env, fname, dst);
|
||||
case kDescriptorFile: return DumpDescriptor(env, fname, dst);
|
||||
case kTableFile: return DumpTable(env, fname, dst);
|
||||
case kLogFile:
|
||||
return DumpLog(env, fname, dst);
|
||||
case kDescriptorFile:
|
||||
return DumpDescriptor(env, fname, dst);
|
||||
case kTableFile:
|
||||
return DumpTable(env, fname, dst);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -6,18 +6,20 @@
|
||||
// the last "sync". It then checks for data loss errors by purposely dropping
|
||||
// file data (or entire files) not protected by a "sync".
|
||||
|
||||
#include "leveldb/db.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "db/filename.h"
|
||||
#include "db/log_format.h"
|
||||
#include "db/version_set.h"
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/table.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
#include "port/port.h"
|
||||
#include "port/thread_annotations.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/mutexlock.h"
|
||||
#include "util/testharness.h"
|
||||
@ -34,7 +36,7 @@ class FaultInjectionTestEnv;
|
||||
namespace {
|
||||
|
||||
// Assume a filename, and not a directory name like "/foo/bar/"
|
||||
static std::string GetDirName(const std::string filename) {
|
||||
static std::string GetDirName(const std::string& filename) {
|
||||
size_t found = filename.find_last_of("/\\");
|
||||
if (found == std::string::npos) {
|
||||
return "";
|
||||
@ -54,8 +56,7 @@ Status Truncate(const std::string& filename, uint64_t length) {
|
||||
|
||||
SequentialFile* orig_file;
|
||||
Status s = env->NewSequentialFile(filename, &orig_file);
|
||||
if (!s.ok())
|
||||
return s;
|
||||
if (!s.ok()) return s;
|
||||
|
||||
char* scratch = new char[length];
|
||||
leveldb::Slice result;
|
||||
@ -83,15 +84,15 @@ Status Truncate(const std::string& filename, uint64_t length) {
|
||||
|
||||
struct FileState {
|
||||
std::string filename_;
|
||||
ssize_t pos_;
|
||||
ssize_t pos_at_last_sync_;
|
||||
ssize_t pos_at_last_flush_;
|
||||
int64_t pos_;
|
||||
int64_t pos_at_last_sync_;
|
||||
int64_t pos_at_last_flush_;
|
||||
|
||||
FileState(const std::string& filename)
|
||||
: filename_(filename),
|
||||
pos_(-1),
|
||||
pos_at_last_sync_(-1),
|
||||
pos_at_last_flush_(-1) { }
|
||||
pos_at_last_flush_(-1) {}
|
||||
|
||||
FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
|
||||
|
||||
@ -106,14 +107,13 @@ struct FileState {
|
||||
// is written to or sync'ed.
|
||||
class TestWritableFile : public WritableFile {
|
||||
public:
|
||||
TestWritableFile(const std::string& fname,
|
||||
WritableFile* f,
|
||||
TestWritableFile(const FileState& state, WritableFile* f,
|
||||
FaultInjectionTestEnv* env);
|
||||
virtual ~TestWritableFile();
|
||||
virtual Status Append(const Slice& data);
|
||||
virtual Status Close();
|
||||
virtual Status Flush();
|
||||
virtual Status Sync();
|
||||
~TestWritableFile() override;
|
||||
Status Append(const Slice& data) override;
|
||||
Status Close() override;
|
||||
Status Flush() override;
|
||||
Status Sync() override;
|
||||
|
||||
private:
|
||||
FileState state_;
|
||||
@ -126,12 +126,15 @@ class TestWritableFile : public WritableFile {
|
||||
|
||||
class FaultInjectionTestEnv : public EnvWrapper {
|
||||
public:
|
||||
FaultInjectionTestEnv() : EnvWrapper(Env::Default()), filesystem_active_(true) {}
|
||||
virtual ~FaultInjectionTestEnv() { }
|
||||
virtual Status NewWritableFile(const std::string& fname,
|
||||
WritableFile** result);
|
||||
virtual Status DeleteFile(const std::string& f);
|
||||
virtual Status RenameFile(const std::string& s, const std::string& t);
|
||||
FaultInjectionTestEnv()
|
||||
: EnvWrapper(Env::Default()), filesystem_active_(true) {}
|
||||
~FaultInjectionTestEnv() override = default;
|
||||
Status NewWritableFile(const std::string& fname,
|
||||
WritableFile** result) override;
|
||||
Status NewAppendableFile(const std::string& fname,
|
||||
WritableFile** result) override;
|
||||
Status DeleteFile(const std::string& f) override;
|
||||
Status RenameFile(const std::string& s, const std::string& t) override;
|
||||
|
||||
void WritableFileClosed(const FileState& state);
|
||||
Status DropUnsyncedFileData();
|
||||
@ -144,25 +147,26 @@ class FaultInjectionTestEnv : public EnvWrapper {
|
||||
// system reset. Setting to inactive will freeze our saved filesystem state so
|
||||
// that it will stop being recorded. It can then be reset back to the state at
|
||||
// the time of the reset.
|
||||
bool IsFilesystemActive() const { return filesystem_active_; }
|
||||
void SetFilesystemActive(bool active) { filesystem_active_ = active; }
|
||||
bool IsFilesystemActive() LOCKS_EXCLUDED(mutex_) {
|
||||
MutexLock l(&mutex_);
|
||||
return filesystem_active_;
|
||||
}
|
||||
void SetFilesystemActive(bool active) LOCKS_EXCLUDED(mutex_) {
|
||||
MutexLock l(&mutex_);
|
||||
filesystem_active_ = active;
|
||||
}
|
||||
|
||||
private:
|
||||
port::Mutex mutex_;
|
||||
std::map<std::string, FileState> db_file_state_;
|
||||
std::set<std::string> new_files_since_last_dir_sync_;
|
||||
bool filesystem_active_; // Record flushes, syncs, writes
|
||||
std::map<std::string, FileState> db_file_state_ GUARDED_BY(mutex_);
|
||||
std::set<std::string> new_files_since_last_dir_sync_ GUARDED_BY(mutex_);
|
||||
bool filesystem_active_ GUARDED_BY(mutex_); // Record flushes, syncs, writes
|
||||
};
|
||||
|
||||
TestWritableFile::TestWritableFile(const std::string& fname,
|
||||
WritableFile* f,
|
||||
TestWritableFile::TestWritableFile(const FileState& state, WritableFile* f,
|
||||
FaultInjectionTestEnv* env)
|
||||
: state_(fname),
|
||||
target_(f),
|
||||
writable_file_opened_(true),
|
||||
env_(env) {
|
||||
assert(f != NULL);
|
||||
state_.pos_ = 0;
|
||||
: state_(state), target_(f), writable_file_opened_(true), env_(env) {
|
||||
assert(f != nullptr);
|
||||
}
|
||||
|
||||
TestWritableFile::~TestWritableFile() {
|
||||
@ -228,9 +232,12 @@ Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname,
|
||||
WritableFile* actual_writable_file;
|
||||
Status s = target()->NewWritableFile(fname, &actual_writable_file);
|
||||
if (s.ok()) {
|
||||
*result = new TestWritableFile(fname, actual_writable_file, this);
|
||||
// WritableFile doesn't append to files, so if the same file is opened again
|
||||
// then it will be truncated - so forget our saved state.
|
||||
FileState state(fname);
|
||||
state.pos_ = 0;
|
||||
*result = new TestWritableFile(state, actual_writable_file, this);
|
||||
// NewWritableFile doesn't append to files, so if the same file is
|
||||
// opened again then it will be truncated - so forget our saved
|
||||
// state.
|
||||
UntrackFile(fname);
|
||||
MutexLock l(&mutex_);
|
||||
new_files_since_last_dir_sync_.insert(fname);
|
||||
@ -238,13 +245,34 @@ Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname,
|
||||
return s;
|
||||
}
|
||||
|
||||
Status FaultInjectionTestEnv::NewAppendableFile(const std::string& fname,
|
||||
WritableFile** result) {
|
||||
WritableFile* actual_writable_file;
|
||||
Status s = target()->NewAppendableFile(fname, &actual_writable_file);
|
||||
if (s.ok()) {
|
||||
FileState state(fname);
|
||||
state.pos_ = 0;
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
if (db_file_state_.count(fname) == 0) {
|
||||
new_files_since_last_dir_sync_.insert(fname);
|
||||
} else {
|
||||
state = db_file_state_[fname];
|
||||
}
|
||||
}
|
||||
*result = new TestWritableFile(state, actual_writable_file, this);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status FaultInjectionTestEnv::DropUnsyncedFileData() {
|
||||
Status s;
|
||||
MutexLock l(&mutex_);
|
||||
for (std::map<std::string, FileState>::const_iterator it =
|
||||
db_file_state_.begin();
|
||||
s.ok() && it != db_file_state_.end(); ++it) {
|
||||
const FileState& state = it->second;
|
||||
for (const auto& kvp : db_file_state_) {
|
||||
if (!s.ok()) {
|
||||
break;
|
||||
}
|
||||
const FileState& state = kvp.second;
|
||||
if (!state.IsFullySynced()) {
|
||||
s = state.DropUnsyncedData();
|
||||
}
|
||||
@ -301,9 +329,9 @@ Status FaultInjectionTestEnv::RenameFile(const std::string& s,
|
||||
}
|
||||
|
||||
void FaultInjectionTestEnv::ResetState() {
|
||||
MutexLock l(&mutex_);
|
||||
db_file_state_.clear();
|
||||
new_files_since_last_dir_sync_.clear();
|
||||
// Since we are not destroying the database, the existing files
|
||||
// should keep their recorded synced/flushed state. Therefore
|
||||
// we do not reset db_file_state_ and new_files_since_last_dir_sync_.
|
||||
SetFilesystemActive(true);
|
||||
}
|
||||
|
||||
@ -313,12 +341,14 @@ Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() {
|
||||
std::set<std::string> new_files(new_files_since_last_dir_sync_.begin(),
|
||||
new_files_since_last_dir_sync_.end());
|
||||
mutex_.Unlock();
|
||||
Status s;
|
||||
std::set<std::string>::const_iterator it;
|
||||
for (it = new_files.begin(); s.ok() && it != new_files.end(); ++it) {
|
||||
s = DeleteFile(*it);
|
||||
Status status;
|
||||
for (const auto& new_file : new_files) {
|
||||
Status delete_status = DeleteFile(new_file);
|
||||
if (!delete_status.ok() && status.ok()) {
|
||||
status = std::move(delete_status);
|
||||
}
|
||||
}
|
||||
return s;
|
||||
return status;
|
||||
}
|
||||
|
||||
void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
|
||||
@ -327,7 +357,7 @@ void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
|
||||
}
|
||||
|
||||
Status FileState::DropUnsyncedData() const {
|
||||
ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
|
||||
int64_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
|
||||
return Truncate(filename_, sync_pos);
|
||||
}
|
||||
|
||||
@ -342,53 +372,28 @@ class FaultInjectionTest {
|
||||
Options options_;
|
||||
DB* db_;
|
||||
|
||||
FaultInjectionTest() : env_(NULL), tiny_cache_(NULL), db_(NULL) { NewDB(); }
|
||||
|
||||
~FaultInjectionTest() { ASSERT_OK(TearDown()); }
|
||||
|
||||
Status NewDB() {
|
||||
assert(db_ == NULL);
|
||||
assert(tiny_cache_ == NULL);
|
||||
assert(env_ == NULL);
|
||||
|
||||
env_ = new FaultInjectionTestEnv();
|
||||
|
||||
options_ = Options();
|
||||
FaultInjectionTest()
|
||||
: env_(new FaultInjectionTestEnv),
|
||||
tiny_cache_(NewLRUCache(100)),
|
||||
db_(nullptr) {
|
||||
dbname_ = test::TmpDir() + "/fault_test";
|
||||
DestroyDB(dbname_, Options()); // Destroy any db from earlier run
|
||||
options_.reuse_logs = true;
|
||||
options_.env = env_;
|
||||
options_.paranoid_checks = true;
|
||||
|
||||
tiny_cache_ = NewLRUCache(100);
|
||||
options_.block_cache = tiny_cache_;
|
||||
dbname_ = test::TmpDir() + "/fault_test";
|
||||
|
||||
options_.create_if_missing = true;
|
||||
Status s = OpenDB();
|
||||
options_.create_if_missing = false;
|
||||
return s;
|
||||
}
|
||||
|
||||
Status SetUp() {
|
||||
Status s = TearDown();
|
||||
if (s.ok()) {
|
||||
s = NewDB();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status TearDown() {
|
||||
~FaultInjectionTest() {
|
||||
CloseDB();
|
||||
|
||||
Status s = DestroyDB(dbname_, Options());
|
||||
|
||||
DestroyDB(dbname_, Options());
|
||||
delete tiny_cache_;
|
||||
tiny_cache_ = NULL;
|
||||
|
||||
delete env_;
|
||||
env_ = NULL;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void ReuseLogs(bool reuse) { options_.reuse_logs = reuse; }
|
||||
|
||||
void Build(int start_idx, int num_vals) {
|
||||
std::string key_space, value_space;
|
||||
WriteBatch batch;
|
||||
@ -447,19 +452,18 @@ class FaultInjectionTest {
|
||||
|
||||
Status OpenDB() {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
env_->ResetState();
|
||||
return DB::Open(options_, dbname_, &db_);
|
||||
}
|
||||
|
||||
void CloseDB() {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
db_ = nullptr;
|
||||
}
|
||||
|
||||
void DeleteAllData() {
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
WriteOptions options;
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
|
||||
}
|
||||
@ -483,60 +487,65 @@ class FaultInjectionTest {
|
||||
void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
|
||||
DeleteAllData();
|
||||
Build(0, num_pre_sync);
|
||||
db_->CompactRange(NULL, NULL);
|
||||
db_->CompactRange(nullptr, nullptr);
|
||||
Build(num_pre_sync, num_post_sync);
|
||||
}
|
||||
|
||||
void PartialCompactTestReopenWithFault(ResetMethod reset_method,
|
||||
int num_pre_sync,
|
||||
int num_post_sync) {
|
||||
int num_pre_sync, int num_post_sync) {
|
||||
env_->SetFilesystemActive(false);
|
||||
CloseDB();
|
||||
ResetDBState(reset_method);
|
||||
ASSERT_OK(OpenDB());
|
||||
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::VAL_EXPECT_NO_ERROR));
|
||||
ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::VAL_EXPECT_ERROR));
|
||||
ASSERT_OK(Verify(num_pre_sync, num_post_sync,
|
||||
FaultInjectionTest::VAL_EXPECT_ERROR));
|
||||
}
|
||||
|
||||
void NoWriteTestPreFault() {
|
||||
}
|
||||
void NoWriteTestPreFault() {}
|
||||
|
||||
void NoWriteTestReopenWithFault(ResetMethod reset_method) {
|
||||
CloseDB();
|
||||
ResetDBState(reset_method);
|
||||
ASSERT_OK(OpenDB());
|
||||
}
|
||||
|
||||
void DoTest() {
|
||||
Random rnd(0);
|
||||
ASSERT_OK(OpenDB());
|
||||
for (size_t idx = 0; idx < kNumIterations; idx++) {
|
||||
int num_pre_sync = rnd.Uniform(kMaxNumValues);
|
||||
int num_post_sync = rnd.Uniform(kMaxNumValues);
|
||||
|
||||
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
||||
PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA, num_pre_sync,
|
||||
num_post_sync);
|
||||
|
||||
NoWriteTestPreFault();
|
||||
NoWriteTestReopenWithFault(RESET_DROP_UNSYNCED_DATA);
|
||||
|
||||
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
||||
// No new files created so we expect all values since no files will be
|
||||
// dropped.
|
||||
PartialCompactTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES,
|
||||
num_pre_sync + num_post_sync, 0);
|
||||
|
||||
NoWriteTestPreFault();
|
||||
NoWriteTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST(FaultInjectionTest, FaultTest) {
|
||||
Random rnd(0);
|
||||
ASSERT_OK(SetUp());
|
||||
for (size_t idx = 0; idx < kNumIterations; idx++) {
|
||||
int num_pre_sync = rnd.Uniform(kMaxNumValues);
|
||||
int num_post_sync = rnd.Uniform(kMaxNumValues);
|
||||
TEST(FaultInjectionTest, FaultTestNoLogReuse) {
|
||||
ReuseLogs(false);
|
||||
DoTest();
|
||||
}
|
||||
|
||||
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
||||
PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA,
|
||||
num_pre_sync,
|
||||
num_post_sync);
|
||||
|
||||
NoWriteTestPreFault();
|
||||
NoWriteTestReopenWithFault(RESET_DROP_UNSYNCED_DATA);
|
||||
|
||||
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
|
||||
// No new files created so we expect all values since no files will be
|
||||
// dropped.
|
||||
PartialCompactTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES,
|
||||
num_pre_sync + num_post_sync,
|
||||
0);
|
||||
|
||||
NoWriteTestPreFault();
|
||||
NoWriteTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES);
|
||||
}
|
||||
TEST(FaultInjectionTest, FaultTestWithLogReuse) {
|
||||
ReuseLogs(true);
|
||||
DoTest();
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -2,9 +2,11 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/filename.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include "db/filename.h"
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "util/logging.h"
|
||||
@ -12,31 +14,30 @@
|
||||
namespace leveldb {
|
||||
|
||||
// A utility routine: write "data" to the named file and Sync() it.
|
||||
extern Status WriteStringToFileSync(Env* env, const Slice& data,
|
||||
const std::string& fname);
|
||||
Status WriteStringToFileSync(Env* env, const Slice& data,
|
||||
const std::string& fname);
|
||||
|
||||
static std::string MakeFileName(const std::string& name, uint64_t number,
|
||||
static std::string MakeFileName(const std::string& dbname, uint64_t number,
|
||||
const char* suffix) {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "/%06llu.%s",
|
||||
static_cast<unsigned long long>(number),
|
||||
suffix);
|
||||
return name + buf;
|
||||
static_cast<unsigned long long>(number), suffix);
|
||||
return dbname + buf;
|
||||
}
|
||||
|
||||
std::string LogFileName(const std::string& name, uint64_t number) {
|
||||
std::string LogFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
return MakeFileName(name, number, "log");
|
||||
return MakeFileName(dbname, number, "log");
|
||||
}
|
||||
|
||||
std::string TableFileName(const std::string& name, uint64_t number) {
|
||||
std::string TableFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
return MakeFileName(name, number, "ldb");
|
||||
return MakeFileName(dbname, number, "ldb");
|
||||
}
|
||||
|
||||
std::string SSTTableFileName(const std::string& name, uint64_t number) {
|
||||
std::string SSTTableFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
return MakeFileName(name, number, "sst");
|
||||
return MakeFileName(dbname, number, "sst");
|
||||
}
|
||||
|
||||
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
||||
@ -51,9 +52,7 @@ std::string CurrentFileName(const std::string& dbname) {
|
||||
return dbname + "/CURRENT";
|
||||
}
|
||||
|
||||
std::string LockFileName(const std::string& dbname) {
|
||||
return dbname + "/LOCK";
|
||||
}
|
||||
std::string LockFileName(const std::string& dbname) { return dbname + "/LOCK"; }
|
||||
|
||||
std::string TempFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
@ -69,7 +68,6 @@ std::string OldInfoLogFileName(const std::string& dbname) {
|
||||
return dbname + "/LOG.old";
|
||||
}
|
||||
|
||||
|
||||
// Owned filenames have the form:
|
||||
// dbname/CURRENT
|
||||
// dbname/LOCK
|
||||
@ -77,10 +75,9 @@ std::string OldInfoLogFileName(const std::string& dbname) {
|
||||
// dbname/LOG.old
|
||||
// dbname/MANIFEST-[0-9]+
|
||||
// dbname/[0-9]+.(log|sst|ldb)
|
||||
bool ParseFileName(const std::string& fname,
|
||||
uint64_t* number,
|
||||
bool ParseFileName(const std::string& filename, uint64_t* number,
|
||||
FileType* type) {
|
||||
Slice rest(fname);
|
||||
Slice rest(filename);
|
||||
if (rest == "CURRENT") {
|
||||
*number = 0;
|
||||
*type = kCurrentFile;
|
||||
|
@ -8,7 +8,9 @@
|
||||
#define STORAGE_LEVELDB_DB_FILENAME_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/slice.h"
|
||||
#include "leveldb/status.h"
|
||||
#include "port/port.h"
|
||||
@ -30,55 +32,52 @@ enum FileType {
|
||||
// Return the name of the log file with the specified number
|
||||
// in the db named by "dbname". The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string LogFileName(const std::string& dbname, uint64_t number);
|
||||
std::string LogFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the sstable with the specified number
|
||||
// in the db named by "dbname". The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
||||
std::string TableFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the legacy file name for an sstable with the specified number
|
||||
// in the db named by "dbname". The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string SSTTableFileName(const std::string& dbname, uint64_t number);
|
||||
std::string SSTTableFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the descriptor file for the db named by
|
||||
// "dbname" and the specified incarnation number. The result will be
|
||||
// prefixed with "dbname".
|
||||
extern std::string DescriptorFileName(const std::string& dbname,
|
||||
uint64_t number);
|
||||
std::string DescriptorFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the current file. This file contains the name
|
||||
// of the current manifest file. The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string CurrentFileName(const std::string& dbname);
|
||||
std::string CurrentFileName(const std::string& dbname);
|
||||
|
||||
// Return the name of the lock file for the db named by
|
||||
// "dbname". The result will be prefixed with "dbname".
|
||||
extern std::string LockFileName(const std::string& dbname);
|
||||
std::string LockFileName(const std::string& dbname);
|
||||
|
||||
// Return the name of a temporary file owned by the db named "dbname".
|
||||
// The result will be prefixed with "dbname".
|
||||
extern std::string TempFileName(const std::string& dbname, uint64_t number);
|
||||
std::string TempFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the info log file for "dbname".
|
||||
extern std::string InfoLogFileName(const std::string& dbname);
|
||||
std::string InfoLogFileName(const std::string& dbname);
|
||||
|
||||
// Return the name of the old info log file for "dbname".
|
||||
extern std::string OldInfoLogFileName(const std::string& dbname);
|
||||
std::string OldInfoLogFileName(const std::string& dbname);
|
||||
|
||||
// If filename is a leveldb file, store the type of the file in *type.
|
||||
// The number encoded in the filename is stored in *number. If the
|
||||
// filename was successfully parsed, returns true. Else return false.
|
||||
extern bool ParseFileName(const std::string& filename,
|
||||
uint64_t* number,
|
||||
FileType* type);
|
||||
bool ParseFileName(const std::string& filename, uint64_t* number,
|
||||
FileType* type);
|
||||
|
||||
// Make the CURRENT file point to the descriptor file with the
|
||||
// specified number.
|
||||
extern Status SetCurrentFile(Env* env, const std::string& dbname,
|
||||
uint64_t descriptor_number);
|
||||
|
||||
Status SetCurrentFile(Env* env, const std::string& dbname,
|
||||
uint64_t descriptor_number);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class FileNameTest { };
|
||||
class FileNameTest {};
|
||||
|
||||
TEST(FileNameTest, Parse) {
|
||||
Slice db;
|
||||
@ -24,17 +24,17 @@ TEST(FileNameTest, Parse) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
} cases[] = {
|
||||
{ "100.log", 100, kLogFile },
|
||||
{ "0.log", 0, kLogFile },
|
||||
{ "0.sst", 0, kTableFile },
|
||||
{ "0.ldb", 0, kTableFile },
|
||||
{ "CURRENT", 0, kCurrentFile },
|
||||
{ "LOCK", 0, kDBLockFile },
|
||||
{ "MANIFEST-2", 2, kDescriptorFile },
|
||||
{ "MANIFEST-7", 7, kDescriptorFile },
|
||||
{ "LOG", 0, kInfoLogFile },
|
||||
{ "LOG.old", 0, kInfoLogFile },
|
||||
{ "18446744073709551615.log", 18446744073709551615ull, kLogFile },
|
||||
{"100.log", 100, kLogFile},
|
||||
{"0.log", 0, kLogFile},
|
||||
{"0.sst", 0, kTableFile},
|
||||
{"0.ldb", 0, kTableFile},
|
||||
{"CURRENT", 0, kCurrentFile},
|
||||
{"LOCK", 0, kDBLockFile},
|
||||
{"MANIFEST-2", 2, kDescriptorFile},
|
||||
{"MANIFEST-7", 7, kDescriptorFile},
|
||||
{"LOG", 0, kInfoLogFile},
|
||||
{"LOG.old", 0, kInfoLogFile},
|
||||
{"18446744073709551615.log", 18446744073709551615ull, kLogFile},
|
||||
};
|
||||
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||
std::string f = cases[i].fname;
|
||||
@ -44,30 +44,28 @@ TEST(FileNameTest, Parse) {
|
||||
}
|
||||
|
||||
// Errors
|
||||
static const char* errors[] = {
|
||||
"",
|
||||
"foo",
|
||||
"foo-dx-100.log",
|
||||
".log",
|
||||
"",
|
||||
"manifest",
|
||||
"CURREN",
|
||||
"CURRENTX",
|
||||
"MANIFES",
|
||||
"MANIFEST",
|
||||
"MANIFEST-",
|
||||
"XMANIFEST-3",
|
||||
"MANIFEST-3x",
|
||||
"LOC",
|
||||
"LOCKx",
|
||||
"LO",
|
||||
"LOGx",
|
||||
"18446744073709551616.log",
|
||||
"184467440737095516150.log",
|
||||
"100",
|
||||
"100.",
|
||||
"100.lop"
|
||||
};
|
||||
static const char* errors[] = {"",
|
||||
"foo",
|
||||
"foo-dx-100.log",
|
||||
".log",
|
||||
"",
|
||||
"manifest",
|
||||
"CURREN",
|
||||
"CURRENTX",
|
||||
"MANIFES",
|
||||
"MANIFEST",
|
||||
"MANIFEST-",
|
||||
"XMANIFEST-3",
|
||||
"MANIFEST-3x",
|
||||
"LOC",
|
||||
"LOCKx",
|
||||
"LO",
|
||||
"LOGx",
|
||||
"18446744073709551616.log",
|
||||
"184467440737095516150.log",
|
||||
"100",
|
||||
"100.",
|
||||
"100.lop"};
|
||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||
std::string f = errors[i];
|
||||
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
|
||||
@ -114,10 +112,20 @@ TEST(FileNameTest, Construction) {
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(999, number);
|
||||
ASSERT_EQ(kTempFile, type);
|
||||
|
||||
fname = InfoLogFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kInfoLogFile, type);
|
||||
|
||||
fname = OldInfoLogFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kInfoLogFile, type);
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -3,6 +3,7 @@
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "leveldb/dumpfile.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/status.h"
|
||||
@ -12,13 +13,13 @@ namespace {
|
||||
|
||||
class StdoutPrinter : public WritableFile {
|
||||
public:
|
||||
virtual Status Append(const Slice& data) {
|
||||
Status Append(const Slice& data) override {
|
||||
fwrite(data.data(), 1, data.size(), stdout);
|
||||
return Status::OK();
|
||||
}
|
||||
virtual Status Close() { return Status::OK(); }
|
||||
virtual Status Flush() { return Status::OK(); }
|
||||
virtual Status Sync() { return Status::OK(); }
|
||||
Status Close() override { return Status::OK(); }
|
||||
Status Flush() override { return Status::OK(); }
|
||||
Status Sync() override { return Status::OK(); }
|
||||
};
|
||||
|
||||
bool HandleDumpCommand(Env* env, char** files, int num) {
|
||||
@ -38,11 +39,9 @@ bool HandleDumpCommand(Env* env, char** files, int num) {
|
||||
} // namespace leveldb
|
||||
|
||||
static void Usage() {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Usage: leveldbutil command...\n"
|
||||
" dump files... -- dump contents of specified files\n"
|
||||
);
|
||||
fprintf(stderr,
|
||||
"Usage: leveldbutil command...\n"
|
||||
" dump files... -- dump contents of specified files\n");
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
@ -54,7 +53,7 @@ int main(int argc, char** argv) {
|
||||
} else {
|
||||
std::string command = argv[1];
|
||||
if (command == "dump") {
|
||||
ok = leveldb::HandleDumpCommand(env, argv+2, argc-2);
|
||||
ok = leveldb::HandleDumpCommand(env, argv + 2, argc - 2);
|
||||
} else {
|
||||
Usage();
|
||||
ok = false;
|
@ -3,7 +3,7 @@
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Log format information shared by reader and writer.
|
||||
// See ../doc/log_format.txt for more detail.
|
||||
// See ../doc/log_format.md for more detail.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_
|
||||
#define STORAGE_LEVELDB_DB_LOG_FORMAT_H_
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "db/log_reader.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "leveldb/env.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
@ -12,8 +13,7 @@
|
||||
namespace leveldb {
|
||||
namespace log {
|
||||
|
||||
Reader::Reporter::~Reporter() {
|
||||
}
|
||||
Reader::Reporter::~Reporter() = default;
|
||||
|
||||
Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
|
||||
uint64_t initial_offset)
|
||||
@ -25,20 +25,17 @@ Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
|
||||
eof_(false),
|
||||
last_record_offset_(0),
|
||||
end_of_buffer_offset_(0),
|
||||
initial_offset_(initial_offset) {
|
||||
}
|
||||
initial_offset_(initial_offset),
|
||||
resyncing_(initial_offset > 0) {}
|
||||
|
||||
Reader::~Reader() {
|
||||
delete[] backing_store_;
|
||||
}
|
||||
Reader::~Reader() { delete[] backing_store_; }
|
||||
|
||||
bool Reader::SkipToInitialBlock() {
|
||||
size_t offset_in_block = initial_offset_ % kBlockSize;
|
||||
const size_t offset_in_block = initial_offset_ % kBlockSize;
|
||||
uint64_t block_start_location = initial_offset_ - offset_in_block;
|
||||
|
||||
// Don't search a block if we'd be in the trailer
|
||||
if (offset_in_block > kBlockSize - 6) {
|
||||
offset_in_block = 0;
|
||||
block_start_location += kBlockSize;
|
||||
}
|
||||
|
||||
@ -72,8 +69,25 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
|
||||
|
||||
Slice fragment;
|
||||
while (true) {
|
||||
uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
|
||||
const unsigned int record_type = ReadPhysicalRecord(&fragment);
|
||||
|
||||
// ReadPhysicalRecord may have only had an empty trailer remaining in its
|
||||
// internal buffer. Calculate the offset of the next physical record now
|
||||
// that it has returned, properly accounting for its header size.
|
||||
uint64_t physical_record_offset =
|
||||
end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
|
||||
|
||||
if (resyncing_) {
|
||||
if (record_type == kMiddleType) {
|
||||
continue;
|
||||
} else if (record_type == kLastType) {
|
||||
resyncing_ = false;
|
||||
continue;
|
||||
} else {
|
||||
resyncing_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
switch (record_type) {
|
||||
case kFullType:
|
||||
if (in_fragmented_record) {
|
||||
@ -81,9 +95,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
|
||||
// it could emit an empty kFirstType record at the tail end
|
||||
// of a block followed by a kFullType or kFirstType record
|
||||
// at the beginning of the next block.
|
||||
if (scratch->empty()) {
|
||||
in_fragmented_record = false;
|
||||
} else {
|
||||
if (!scratch->empty()) {
|
||||
ReportCorruption(scratch->size(), "partial record without end(1)");
|
||||
}
|
||||
}
|
||||
@ -99,9 +111,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
|
||||
// it could emit an empty kFirstType record at the tail end
|
||||
// of a block followed by a kFullType or kFirstType record
|
||||
// at the beginning of the next block.
|
||||
if (scratch->empty()) {
|
||||
in_fragmented_record = false;
|
||||
} else {
|
||||
if (!scratch->empty()) {
|
||||
ReportCorruption(scratch->size(), "partial record without end(2)");
|
||||
}
|
||||
}
|
||||
@ -163,16 +173,14 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t Reader::LastRecordOffset() {
|
||||
return last_record_offset_;
|
||||
}
|
||||
uint64_t Reader::LastRecordOffset() { return last_record_offset_; }
|
||||
|
||||
void Reader::ReportCorruption(uint64_t bytes, const char* reason) {
|
||||
ReportDrop(bytes, Status::Corruption(reason));
|
||||
}
|
||||
|
||||
void Reader::ReportDrop(uint64_t bytes, const Status& reason) {
|
||||
if (reporter_ != NULL &&
|
||||
if (reporter_ != nullptr &&
|
||||
end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
|
||||
reporter_->Corruption(static_cast<size_t>(bytes), reason);
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ class Reader {
|
||||
// Create a reader that will return log records from "*file".
|
||||
// "*file" must remain live while this Reader is in use.
|
||||
//
|
||||
// If "reporter" is non-NULL, it is notified whenever some data is
|
||||
// If "reporter" is non-null, it is notified whenever some data is
|
||||
// dropped due to a detected corruption. "*reporter" must remain
|
||||
// live while this Reader is in use.
|
||||
//
|
||||
@ -43,6 +43,9 @@ class Reader {
|
||||
Reader(SequentialFile* file, Reporter* reporter, bool checksum,
|
||||
uint64_t initial_offset);
|
||||
|
||||
Reader(const Reader&) = delete;
|
||||
Reader& operator=(const Reader&) = delete;
|
||||
|
||||
~Reader();
|
||||
|
||||
// Read the next record into *record. Returns true if read
|
||||
@ -58,21 +61,6 @@ class Reader {
|
||||
uint64_t LastRecordOffset();
|
||||
|
||||
private:
|
||||
SequentialFile* const file_;
|
||||
Reporter* const reporter_;
|
||||
bool const checksum_;
|
||||
char* const backing_store_;
|
||||
Slice buffer_;
|
||||
bool eof_; // Last Read() indicated EOF by returning < kBlockSize
|
||||
|
||||
// Offset of the last record returned by ReadRecord.
|
||||
uint64_t last_record_offset_;
|
||||
// Offset of the first location past the end of buffer_.
|
||||
uint64_t end_of_buffer_offset_;
|
||||
|
||||
// Offset at which to start looking for the first record to return
|
||||
uint64_t const initial_offset_;
|
||||
|
||||
// Extend record types with the following special values
|
||||
enum {
|
||||
kEof = kMaxRecordType + 1,
|
||||
@ -97,9 +85,25 @@ class Reader {
|
||||
void ReportCorruption(uint64_t bytes, const char* reason);
|
||||
void ReportDrop(uint64_t bytes, const Status& reason);
|
||||
|
||||
// No copying allowed
|
||||
Reader(const Reader&);
|
||||
void operator=(const Reader&);
|
||||
SequentialFile* const file_;
|
||||
Reporter* const reporter_;
|
||||
bool const checksum_;
|
||||
char* const backing_store_;
|
||||
Slice buffer_;
|
||||
bool eof_; // Last Read() indicated EOF by returning < kBlockSize
|
||||
|
||||
// Offset of the last record returned by ReadRecord.
|
||||
uint64_t last_record_offset_;
|
||||
// Offset of the first location past the end of buffer_.
|
||||
uint64_t end_of_buffer_offset_;
|
||||
|
||||
// Offset at which to start looking for the first record to return
|
||||
uint64_t const initial_offset_;
|
||||
|
||||
// True if we are resynchronizing after a seek (initial_offset_ > 0). In
|
||||
// particular, a run of kMiddleType and kLastType records can be silently
|
||||
// skipped in this mode
|
||||
bool resyncing_;
|
||||
};
|
||||
|
||||
} // namespace log
|
||||
|
342
db/log_test.cc
342
db/log_test.cc
@ -37,95 +37,29 @@ static std::string RandomSkewedString(int i, Random* rnd) {
|
||||
}
|
||||
|
||||
class LogTest {
|
||||
private:
|
||||
class StringDest : public WritableFile {
|
||||
public:
|
||||
std::string contents_;
|
||||
|
||||
virtual Status Close() { return Status::OK(); }
|
||||
virtual Status Flush() { return Status::OK(); }
|
||||
virtual Status Sync() { return Status::OK(); }
|
||||
virtual Status Append(const Slice& slice) {
|
||||
contents_.append(slice.data(), slice.size());
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
class StringSource : public SequentialFile {
|
||||
public:
|
||||
Slice contents_;
|
||||
bool force_error_;
|
||||
bool returned_partial_;
|
||||
StringSource() : force_error_(false), returned_partial_(false) { }
|
||||
|
||||
virtual Status Read(size_t n, Slice* result, char* scratch) {
|
||||
ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error";
|
||||
|
||||
if (force_error_) {
|
||||
force_error_ = false;
|
||||
returned_partial_ = true;
|
||||
return Status::Corruption("read error");
|
||||
}
|
||||
|
||||
if (contents_.size() < n) {
|
||||
n = contents_.size();
|
||||
returned_partial_ = true;
|
||||
}
|
||||
*result = Slice(contents_.data(), n);
|
||||
contents_.remove_prefix(n);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status Skip(uint64_t n) {
|
||||
if (n > contents_.size()) {
|
||||
contents_.clear();
|
||||
return Status::NotFound("in-memory file skipepd past end");
|
||||
}
|
||||
|
||||
contents_.remove_prefix(n);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
class ReportCollector : public Reader::Reporter {
|
||||
public:
|
||||
size_t dropped_bytes_;
|
||||
std::string message_;
|
||||
|
||||
ReportCollector() : dropped_bytes_(0) { }
|
||||
virtual void Corruption(size_t bytes, const Status& status) {
|
||||
dropped_bytes_ += bytes;
|
||||
message_.append(status.ToString());
|
||||
}
|
||||
};
|
||||
|
||||
StringDest dest_;
|
||||
StringSource source_;
|
||||
ReportCollector report_;
|
||||
bool reading_;
|
||||
Writer writer_;
|
||||
Reader reader_;
|
||||
|
||||
// Record metadata for testing initial offset functionality
|
||||
static size_t initial_offset_record_sizes_[];
|
||||
static uint64_t initial_offset_last_record_offsets_[];
|
||||
|
||||
public:
|
||||
LogTest() : reading_(false),
|
||||
writer_(&dest_),
|
||||
reader_(&source_, &report_, true/*checksum*/,
|
||||
0/*initial_offset*/) {
|
||||
LogTest()
|
||||
: reading_(false),
|
||||
writer_(new Writer(&dest_)),
|
||||
reader_(new Reader(&source_, &report_, true /*checksum*/,
|
||||
0 /*initial_offset*/)) {}
|
||||
|
||||
~LogTest() {
|
||||
delete writer_;
|
||||
delete reader_;
|
||||
}
|
||||
|
||||
void ReopenForAppend() {
|
||||
delete writer_;
|
||||
writer_ = new Writer(&dest_, dest_.contents_.size());
|
||||
}
|
||||
|
||||
void Write(const std::string& msg) {
|
||||
ASSERT_TRUE(!reading_) << "Write() after starting to read";
|
||||
writer_.AddRecord(Slice(msg));
|
||||
writer_->AddRecord(Slice(msg));
|
||||
}
|
||||
|
||||
size_t WrittenBytes() const {
|
||||
return dest_.contents_.size();
|
||||
}
|
||||
size_t WrittenBytes() const { return dest_.contents_.size(); }
|
||||
|
||||
std::string Read() {
|
||||
if (!reading_) {
|
||||
@ -134,7 +68,7 @@ class LogTest {
|
||||
}
|
||||
std::string scratch;
|
||||
Slice record;
|
||||
if (reader_.ReadRecord(&record, &scratch)) {
|
||||
if (reader_->ReadRecord(&record, &scratch)) {
|
||||
return record.ToString();
|
||||
} else {
|
||||
return "EOF";
|
||||
@ -155,22 +89,16 @@ class LogTest {
|
||||
|
||||
void FixChecksum(int header_offset, int len) {
|
||||
// Compute crc of type/len/data
|
||||
uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len);
|
||||
uint32_t crc = crc32c::Value(&dest_.contents_[header_offset + 6], 1 + len);
|
||||
crc = crc32c::Mask(crc);
|
||||
EncodeFixed32(&dest_.contents_[header_offset], crc);
|
||||
}
|
||||
|
||||
void ForceError() {
|
||||
source_.force_error_ = true;
|
||||
}
|
||||
void ForceError() { source_.force_error_ = true; }
|
||||
|
||||
size_t DroppedBytes() const {
|
||||
return report_.dropped_bytes_;
|
||||
}
|
||||
size_t DroppedBytes() const { return report_.dropped_bytes_; }
|
||||
|
||||
std::string ReportMessage() const {
|
||||
return report_.message_;
|
||||
}
|
||||
std::string ReportMessage() const { return report_.message_; }
|
||||
|
||||
// Returns OK iff recorded error message contains "msg"
|
||||
std::string MatchError(const std::string& msg) const {
|
||||
@ -182,18 +110,23 @@ class LogTest {
|
||||
}
|
||||
|
||||
void WriteInitialOffsetLog() {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < num_initial_offset_records_; i++) {
|
||||
std::string record(initial_offset_record_sizes_[i],
|
||||
static_cast<char>('a' + i));
|
||||
Write(record);
|
||||
}
|
||||
}
|
||||
|
||||
void StartReadingAt(uint64_t initial_offset) {
|
||||
delete reader_;
|
||||
reader_ = new Reader(&source_, &report_, true /*checksum*/, initial_offset);
|
||||
}
|
||||
|
||||
void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
|
||||
WriteInitialOffsetLog();
|
||||
reading_ = true;
|
||||
source_.contents_ = Slice(dest_.contents_);
|
||||
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
|
||||
Reader* offset_reader = new Reader(&source_, &report_, true /*checksum*/,
|
||||
WrittenBytes() + offset_past_end);
|
||||
Slice record;
|
||||
std::string scratch;
|
||||
@ -206,38 +139,126 @@ class LogTest {
|
||||
WriteInitialOffsetLog();
|
||||
reading_ = true;
|
||||
source_.contents_ = Slice(dest_.contents_);
|
||||
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
|
||||
initial_offset);
|
||||
Slice record;
|
||||
std::string scratch;
|
||||
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
|
||||
ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
|
||||
record.size());
|
||||
ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
|
||||
offset_reader->LastRecordOffset());
|
||||
ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
|
||||
Reader* offset_reader =
|
||||
new Reader(&source_, &report_, true /*checksum*/, initial_offset);
|
||||
|
||||
// Read all records from expected_record_offset through the last one.
|
||||
ASSERT_LT(expected_record_offset, num_initial_offset_records_);
|
||||
for (; expected_record_offset < num_initial_offset_records_;
|
||||
++expected_record_offset) {
|
||||
Slice record;
|
||||
std::string scratch;
|
||||
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
|
||||
ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
|
||||
record.size());
|
||||
ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
|
||||
offset_reader->LastRecordOffset());
|
||||
ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
|
||||
}
|
||||
delete offset_reader;
|
||||
}
|
||||
|
||||
private:
|
||||
class StringDest : public WritableFile {
|
||||
public:
|
||||
Status Close() override { return Status::OK(); }
|
||||
Status Flush() override { return Status::OK(); }
|
||||
Status Sync() override { return Status::OK(); }
|
||||
Status Append(const Slice& slice) override {
|
||||
contents_.append(slice.data(), slice.size());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::string contents_;
|
||||
};
|
||||
|
||||
class StringSource : public SequentialFile {
|
||||
public:
|
||||
StringSource() : force_error_(false), returned_partial_(false) {}
|
||||
|
||||
Status Read(size_t n, Slice* result, char* scratch) override {
|
||||
ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error";
|
||||
|
||||
if (force_error_) {
|
||||
force_error_ = false;
|
||||
returned_partial_ = true;
|
||||
return Status::Corruption("read error");
|
||||
}
|
||||
|
||||
if (contents_.size() < n) {
|
||||
n = contents_.size();
|
||||
returned_partial_ = true;
|
||||
}
|
||||
*result = Slice(contents_.data(), n);
|
||||
contents_.remove_prefix(n);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Skip(uint64_t n) override {
|
||||
if (n > contents_.size()) {
|
||||
contents_.clear();
|
||||
return Status::NotFound("in-memory file skipped past end");
|
||||
}
|
||||
|
||||
contents_.remove_prefix(n);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Slice contents_;
|
||||
bool force_error_;
|
||||
bool returned_partial_;
|
||||
};
|
||||
|
||||
class ReportCollector : public Reader::Reporter {
|
||||
public:
|
||||
ReportCollector() : dropped_bytes_(0) {}
|
||||
void Corruption(size_t bytes, const Status& status) override {
|
||||
dropped_bytes_ += bytes;
|
||||
message_.append(status.ToString());
|
||||
}
|
||||
|
||||
size_t dropped_bytes_;
|
||||
std::string message_;
|
||||
};
|
||||
|
||||
// Record metadata for testing initial offset functionality
|
||||
static size_t initial_offset_record_sizes_[];
|
||||
static uint64_t initial_offset_last_record_offsets_[];
|
||||
static int num_initial_offset_records_;
|
||||
|
||||
StringDest dest_;
|
||||
StringSource source_;
|
||||
ReportCollector report_;
|
||||
bool reading_;
|
||||
Writer* writer_;
|
||||
Reader* reader_;
|
||||
};
|
||||
|
||||
size_t LogTest::initial_offset_record_sizes_[] =
|
||||
{10000, // Two sizable records in first block
|
||||
10000,
|
||||
2 * log::kBlockSize - 1000, // Span three blocks
|
||||
1};
|
||||
size_t LogTest::initial_offset_record_sizes_[] = {
|
||||
10000, // Two sizable records in first block
|
||||
10000,
|
||||
2 * log::kBlockSize - 1000, // Span three blocks
|
||||
1,
|
||||
13716, // Consume all but two bytes of block 3.
|
||||
log::kBlockSize - kHeaderSize, // Consume the entirety of block 4.
|
||||
};
|
||||
|
||||
uint64_t LogTest::initial_offset_last_record_offsets_[] =
|
||||
{0,
|
||||
kHeaderSize + 10000,
|
||||
2 * (kHeaderSize + 10000),
|
||||
2 * (kHeaderSize + 10000) +
|
||||
(2 * log::kBlockSize - 1000) + 3 * kHeaderSize};
|
||||
uint64_t LogTest::initial_offset_last_record_offsets_[] = {
|
||||
0,
|
||||
kHeaderSize + 10000,
|
||||
2 * (kHeaderSize + 10000),
|
||||
2 * (kHeaderSize + 10000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
|
||||
2 * (kHeaderSize + 10000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize +
|
||||
kHeaderSize + 1,
|
||||
3 * log::kBlockSize,
|
||||
};
|
||||
|
||||
// LogTest::initial_offset_last_record_offsets_ must be defined before this.
|
||||
int LogTest::num_initial_offset_records_ =
|
||||
sizeof(LogTest::initial_offset_last_record_offsets_) / sizeof(uint64_t);
|
||||
|
||||
TEST(LogTest, Empty) {
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
TEST(LogTest, Empty) { ASSERT_EQ("EOF", Read()); }
|
||||
|
||||
TEST(LogTest, ReadWrite) {
|
||||
Write("foo");
|
||||
@ -274,7 +295,7 @@ TEST(LogTest, Fragmentation) {
|
||||
|
||||
TEST(LogTest, MarginalTrailer) {
|
||||
// Make a trailer that is exactly the same length as an empty record.
|
||||
const int n = kBlockSize - 2*kHeaderSize;
|
||||
const int n = kBlockSize - 2 * kHeaderSize;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
|
||||
Write("");
|
||||
@ -287,7 +308,7 @@ TEST(LogTest, MarginalTrailer) {
|
||||
|
||||
TEST(LogTest, MarginalTrailer2) {
|
||||
// Make a trailer that is exactly the same length as an empty record.
|
||||
const int n = kBlockSize - 2*kHeaderSize;
|
||||
const int n = kBlockSize - 2 * kHeaderSize;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
|
||||
Write("bar");
|
||||
@ -299,7 +320,7 @@ TEST(LogTest, MarginalTrailer2) {
|
||||
}
|
||||
|
||||
TEST(LogTest, ShortTrailer) {
|
||||
const int n = kBlockSize - 2*kHeaderSize + 4;
|
||||
const int n = kBlockSize - 2 * kHeaderSize + 4;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
|
||||
Write("");
|
||||
@ -311,13 +332,22 @@ TEST(LogTest, ShortTrailer) {
|
||||
}
|
||||
|
||||
TEST(LogTest, AlignedEof) {
|
||||
const int n = kBlockSize - 2*kHeaderSize + 4;
|
||||
const int n = kBlockSize - 2 * kHeaderSize + 4;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
|
||||
ASSERT_EQ(BigString("foo", n), Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, OpenForAppend) {
|
||||
Write("hello");
|
||||
ReopenForAppend();
|
||||
Write("world");
|
||||
ASSERT_EQ("hello", Read());
|
||||
ASSERT_EQ("world", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, RandomRead) {
|
||||
const int N = 500;
|
||||
Random write_rnd(301);
|
||||
@ -353,7 +383,7 @@ TEST(LogTest, BadRecordType) {
|
||||
|
||||
TEST(LogTest, TruncatedTrailingRecordIsIgnored) {
|
||||
Write("foo");
|
||||
ShrinkSize(4); // Drop all payload as well as a header byte
|
||||
ShrinkSize(4); // Drop all payload as well as a header byte
|
||||
ASSERT_EQ("EOF", Read());
|
||||
// Truncated last record is ignored, not treated as an error.
|
||||
ASSERT_EQ(0, DroppedBytes());
|
||||
@ -445,6 +475,22 @@ TEST(LogTest, PartialLastIsIgnored) {
|
||||
ASSERT_EQ(0, DroppedBytes());
|
||||
}
|
||||
|
||||
TEST(LogTest, SkipIntoMultiRecord) {
|
||||
// Consider a fragmented record:
|
||||
// first(R1), middle(R1), last(R1), first(R2)
|
||||
// If initial_offset points to a record after first(R1) but before first(R2)
|
||||
// incomplete fragment errors are not actual errors, and must be suppressed
|
||||
// until a new first or full record is encountered.
|
||||
Write(BigString("foo", 3 * kBlockSize));
|
||||
Write("correct");
|
||||
StartReadingAt(kBlockSize);
|
||||
|
||||
ASSERT_EQ("correct", Read());
|
||||
ASSERT_EQ("", ReportMessage());
|
||||
ASSERT_EQ(0, DroppedBytes());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, ErrorJoinsRecords) {
|
||||
// Consider two fragmented records:
|
||||
// first(R1) last(R1) first(R2) last(R2)
|
||||
@ -457,44 +503,30 @@ TEST(LogTest, ErrorJoinsRecords) {
|
||||
Write("correct");
|
||||
|
||||
// Wipe the middle block
|
||||
for (int offset = kBlockSize; offset < 2*kBlockSize; offset++) {
|
||||
for (int offset = kBlockSize; offset < 2 * kBlockSize; offset++) {
|
||||
SetByte(offset, 'x');
|
||||
}
|
||||
|
||||
ASSERT_EQ("correct", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
const size_t dropped = DroppedBytes();
|
||||
ASSERT_LE(dropped, 2*kBlockSize + 100);
|
||||
ASSERT_GE(dropped, 2*kBlockSize);
|
||||
ASSERT_LE(dropped, 2 * kBlockSize + 100);
|
||||
ASSERT_GE(dropped, 2 * kBlockSize);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadStart) {
|
||||
CheckInitialOffsetRecord(0, 0);
|
||||
}
|
||||
TEST(LogTest, ReadStart) { CheckInitialOffsetRecord(0, 0); }
|
||||
|
||||
TEST(LogTest, ReadSecondOneOff) {
|
||||
CheckInitialOffsetRecord(1, 1);
|
||||
}
|
||||
TEST(LogTest, ReadSecondOneOff) { CheckInitialOffsetRecord(1, 1); }
|
||||
|
||||
TEST(LogTest, ReadSecondTenThousand) {
|
||||
CheckInitialOffsetRecord(10000, 1);
|
||||
}
|
||||
TEST(LogTest, ReadSecondTenThousand) { CheckInitialOffsetRecord(10000, 1); }
|
||||
|
||||
TEST(LogTest, ReadSecondStart) {
|
||||
CheckInitialOffsetRecord(10007, 1);
|
||||
}
|
||||
TEST(LogTest, ReadSecondStart) { CheckInitialOffsetRecord(10007, 1); }
|
||||
|
||||
TEST(LogTest, ReadThirdOneOff) {
|
||||
CheckInitialOffsetRecord(10008, 2);
|
||||
}
|
||||
TEST(LogTest, ReadThirdOneOff) { CheckInitialOffsetRecord(10008, 2); }
|
||||
|
||||
TEST(LogTest, ReadThirdStart) {
|
||||
CheckInitialOffsetRecord(20014, 2);
|
||||
}
|
||||
TEST(LogTest, ReadThirdStart) { CheckInitialOffsetRecord(20014, 2); }
|
||||
|
||||
TEST(LogTest, ReadFourthOneOff) {
|
||||
CheckInitialOffsetRecord(20015, 3);
|
||||
}
|
||||
TEST(LogTest, ReadFourthOneOff) { CheckInitialOffsetRecord(20015, 3); }
|
||||
|
||||
TEST(LogTest, ReadFourthFirstBlockTrailer) {
|
||||
CheckInitialOffsetRecord(log::kBlockSize - 4, 3);
|
||||
@ -514,17 +546,15 @@ TEST(LogTest, ReadFourthStart) {
|
||||
3);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadEnd) {
|
||||
CheckOffsetPastEndReturnsNoRecords(0);
|
||||
TEST(LogTest, ReadInitialOffsetIntoBlockPadding) {
|
||||
CheckInitialOffsetRecord(3 * log::kBlockSize - 3, 5);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadPastEnd) {
|
||||
CheckOffsetPastEndReturnsNoRecords(5);
|
||||
}
|
||||
TEST(LogTest, ReadEnd) { CheckOffsetPastEndReturnsNoRecords(0); }
|
||||
|
||||
TEST(LogTest, ReadPastEnd) { CheckOffsetPastEndReturnsNoRecords(5); }
|
||||
|
||||
} // namespace log
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "db/log_writer.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "leveldb/env.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
@ -12,18 +13,24 @@
|
||||
namespace leveldb {
|
||||
namespace log {
|
||||
|
||||
Writer::Writer(WritableFile* dest)
|
||||
: dest_(dest),
|
||||
block_offset_(0) {
|
||||
static void InitTypeCrc(uint32_t* type_crc) {
|
||||
for (int i = 0; i <= kMaxRecordType; i++) {
|
||||
char t = static_cast<char>(i);
|
||||
type_crc_[i] = crc32c::Value(&t, 1);
|
||||
type_crc[i] = crc32c::Value(&t, 1);
|
||||
}
|
||||
}
|
||||
|
||||
Writer::~Writer() {
|
||||
Writer::Writer(WritableFile* dest) : dest_(dest), block_offset_(0) {
|
||||
InitTypeCrc(type_crc_);
|
||||
}
|
||||
|
||||
Writer::Writer(WritableFile* dest, uint64_t dest_length)
|
||||
: dest_(dest), block_offset_(dest_length % kBlockSize) {
|
||||
InitTypeCrc(type_crc_);
|
||||
}
|
||||
|
||||
Writer::~Writer() = default;
|
||||
|
||||
Status Writer::AddRecord(const Slice& slice) {
|
||||
const char* ptr = slice.data();
|
||||
size_t left = slice.size();
|
||||
@ -40,7 +47,7 @@ Status Writer::AddRecord(const Slice& slice) {
|
||||
// Switch to a new block
|
||||
if (leftover > 0) {
|
||||
// Fill the trailer (literal below relies on kHeaderSize being 7)
|
||||
assert(kHeaderSize == 7);
|
||||
static_assert(kHeaderSize == 7, "");
|
||||
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
|
||||
}
|
||||
block_offset_ = 0;
|
||||
@ -72,30 +79,31 @@ Status Writer::AddRecord(const Slice& slice) {
|
||||
return s;
|
||||
}
|
||||
|
||||
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
|
||||
assert(n <= 0xffff); // Must fit in two bytes
|
||||
assert(block_offset_ + kHeaderSize + n <= kBlockSize);
|
||||
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr,
|
||||
size_t length) {
|
||||
assert(length <= 0xffff); // Must fit in two bytes
|
||||
assert(block_offset_ + kHeaderSize + length <= kBlockSize);
|
||||
|
||||
// Format the header
|
||||
char buf[kHeaderSize];
|
||||
buf[4] = static_cast<char>(n & 0xff);
|
||||
buf[5] = static_cast<char>(n >> 8);
|
||||
buf[4] = static_cast<char>(length & 0xff);
|
||||
buf[5] = static_cast<char>(length >> 8);
|
||||
buf[6] = static_cast<char>(t);
|
||||
|
||||
// Compute the crc of the record type and the payload.
|
||||
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
|
||||
crc = crc32c::Mask(crc); // Adjust for storage
|
||||
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, length);
|
||||
crc = crc32c::Mask(crc); // Adjust for storage
|
||||
EncodeFixed32(buf, crc);
|
||||
|
||||
// Write the header and the payload
|
||||
Status s = dest_->Append(Slice(buf, kHeaderSize));
|
||||
if (s.ok()) {
|
||||
s = dest_->Append(Slice(ptr, n));
|
||||
s = dest_->Append(Slice(ptr, length));
|
||||
if (s.ok()) {
|
||||
s = dest_->Flush();
|
||||
}
|
||||
}
|
||||
block_offset_ += kHeaderSize + n;
|
||||
block_offset_ += kHeaderSize + length;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "db/log_format.h"
|
||||
#include "leveldb/slice.h"
|
||||
#include "leveldb/status.h"
|
||||
@ -22,24 +23,29 @@ class Writer {
|
||||
// "*dest" must be initially empty.
|
||||
// "*dest" must remain live while this Writer is in use.
|
||||
explicit Writer(WritableFile* dest);
|
||||
|
||||
// Create a writer that will append data to "*dest".
|
||||
// "*dest" must have initial length "dest_length".
|
||||
// "*dest" must remain live while this Writer is in use.
|
||||
Writer(WritableFile* dest, uint64_t dest_length);
|
||||
|
||||
Writer(const Writer&) = delete;
|
||||
Writer& operator=(const Writer&) = delete;
|
||||
|
||||
~Writer();
|
||||
|
||||
Status AddRecord(const Slice& slice);
|
||||
|
||||
private:
|
||||
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
|
||||
|
||||
WritableFile* dest_;
|
||||
int block_offset_; // Current offset in block
|
||||
int block_offset_; // Current offset in block
|
||||
|
||||
// crc32c values for all supported record types. These are
|
||||
// pre-computed to reduce the overhead of computing the crc of the
|
||||
// record type stored in the header.
|
||||
uint32_t type_crc_[kMaxRecordType + 1];
|
||||
|
||||
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
|
||||
|
||||
// No copying allowed
|
||||
Writer(const Writer&);
|
||||
void operator=(const Writer&);
|
||||
};
|
||||
|
||||
} // namespace log
|
||||
|
@ -18,20 +18,15 @@ static Slice GetLengthPrefixedSlice(const char* data) {
|
||||
return Slice(p, len);
|
||||
}
|
||||
|
||||
MemTable::MemTable(const InternalKeyComparator& cmp)
|
||||
: comparator_(cmp),
|
||||
refs_(0),
|
||||
table_(comparator_, &arena_) {
|
||||
}
|
||||
MemTable::MemTable(const InternalKeyComparator& comparator)
|
||||
: comparator_(comparator), refs_(0), table_(comparator_, &arena_) {}
|
||||
|
||||
MemTable::~MemTable() {
|
||||
assert(refs_ == 0);
|
||||
}
|
||||
MemTable::~MemTable() { assert(refs_ == 0); }
|
||||
|
||||
size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }
|
||||
|
||||
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
|
||||
const {
|
||||
int MemTable::KeyComparator::operator()(const char* aptr,
|
||||
const char* bptr) const {
|
||||
// Internal keys are encoded as length-prefixed strings.
|
||||
Slice a = GetLengthPrefixedSlice(aptr);
|
||||
Slice b = GetLengthPrefixedSlice(bptr);
|
||||
@ -48,39 +43,37 @@ static const char* EncodeKey(std::string* scratch, const Slice& target) {
|
||||
return scratch->data();
|
||||
}
|
||||
|
||||
class MemTableIterator: public Iterator {
|
||||
class MemTableIterator : public Iterator {
|
||||
public:
|
||||
explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
|
||||
explicit MemTableIterator(MemTable::Table* table) : iter_(table) {}
|
||||
|
||||
virtual bool Valid() const { return iter_.Valid(); }
|
||||
virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
|
||||
virtual void SeekToFirst() { iter_.SeekToFirst(); }
|
||||
virtual void SeekToLast() { iter_.SeekToLast(); }
|
||||
virtual void Next() { iter_.Next(); }
|
||||
virtual void Prev() { iter_.Prev(); }
|
||||
virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
|
||||
virtual Slice value() const {
|
||||
MemTableIterator(const MemTableIterator&) = delete;
|
||||
MemTableIterator& operator=(const MemTableIterator&) = delete;
|
||||
|
||||
~MemTableIterator() override = default;
|
||||
|
||||
bool Valid() const override { return iter_.Valid(); }
|
||||
void Seek(const Slice& k) override { iter_.Seek(EncodeKey(&tmp_, k)); }
|
||||
void SeekToFirst() override { iter_.SeekToFirst(); }
|
||||
void SeekToLast() override { iter_.SeekToLast(); }
|
||||
void Next() override { iter_.Next(); }
|
||||
void Prev() override { iter_.Prev(); }
|
||||
Slice key() const override { return GetLengthPrefixedSlice(iter_.key()); }
|
||||
Slice value() const override {
|
||||
Slice key_slice = GetLengthPrefixedSlice(iter_.key());
|
||||
return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
|
||||
}
|
||||
|
||||
virtual Status status() const { return Status::OK(); }
|
||||
Status status() const override { return Status::OK(); }
|
||||
|
||||
private:
|
||||
MemTable::Table::Iterator iter_;
|
||||
std::string tmp_; // For passing to EncodeKey
|
||||
|
||||
// No copying allowed
|
||||
MemTableIterator(const MemTableIterator&);
|
||||
void operator=(const MemTableIterator&);
|
||||
std::string tmp_; // For passing to EncodeKey
|
||||
};
|
||||
|
||||
Iterator* MemTable::NewIterator() {
|
||||
return new MemTableIterator(&table_);
|
||||
}
|
||||
Iterator* MemTable::NewIterator() { return new MemTableIterator(&table_); }
|
||||
|
||||
void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
const Slice& key,
|
||||
void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
|
||||
const Slice& value) {
|
||||
// Format of an entry is concatenation of:
|
||||
// key_size : varint32 of internal_key.size()
|
||||
@ -90,9 +83,9 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
size_t key_size = key.size();
|
||||
size_t val_size = value.size();
|
||||
size_t internal_key_size = key_size + 8;
|
||||
const size_t encoded_len =
|
||||
VarintLength(internal_key_size) + internal_key_size +
|
||||
VarintLength(val_size) + val_size;
|
||||
const size_t encoded_len = VarintLength(internal_key_size) +
|
||||
internal_key_size + VarintLength(val_size) +
|
||||
val_size;
|
||||
char* buf = arena_.Allocate(encoded_len);
|
||||
char* p = EncodeVarint32(buf, internal_key_size);
|
||||
memcpy(p, key.data(), key_size);
|
||||
@ -101,7 +94,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
p += 8;
|
||||
p = EncodeVarint32(p, val_size);
|
||||
memcpy(p, value.data(), val_size);
|
||||
assert((p + val_size) - buf == encoded_len);
|
||||
assert(p + val_size == buf + encoded_len);
|
||||
table_.Insert(buf);
|
||||
}
|
||||
|
||||
@ -121,10 +114,9 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
|
||||
// all entries with overly large sequence numbers.
|
||||
const char* entry = iter.key();
|
||||
uint32_t key_length;
|
||||
const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
|
||||
const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
|
||||
if (comparator_.comparator.user_comparator()->Compare(
|
||||
Slice(key_ptr, key_length - 8),
|
||||
key.user_key()) == 0) {
|
||||
Slice(key_ptr, key_length - 8), key.user_key()) == 0) {
|
||||
// Correct user key
|
||||
const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
|
||||
switch (static_cast<ValueType>(tag & 0xff)) {
|
||||
|
@ -6,15 +6,15 @@
|
||||
#define STORAGE_LEVELDB_DB_MEMTABLE_H_
|
||||
|
||||
#include <string>
|
||||
#include "leveldb/db.h"
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "db/skiplist.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "util/arena.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class InternalKeyComparator;
|
||||
class Mutex;
|
||||
class MemTableIterator;
|
||||
|
||||
class MemTable {
|
||||
@ -23,6 +23,9 @@ class MemTable {
|
||||
// is zero and the caller must call Ref() at least once.
|
||||
explicit MemTable(const InternalKeyComparator& comparator);
|
||||
|
||||
MemTable(const MemTable&) = delete;
|
||||
MemTable& operator=(const MemTable&) = delete;
|
||||
|
||||
// Increase reference count.
|
||||
void Ref() { ++refs_; }
|
||||
|
||||
@ -36,10 +39,7 @@ class MemTable {
|
||||
}
|
||||
|
||||
// Returns an estimate of the number of bytes of data in use by this
|
||||
// data structure.
|
||||
//
|
||||
// REQUIRES: external synchronization to prevent simultaneous
|
||||
// operations on the same MemTable.
|
||||
// data structure. It is safe to call when MemTable is being modified.
|
||||
size_t ApproximateMemoryUsage();
|
||||
|
||||
// Return an iterator that yields the contents of the memtable.
|
||||
@ -53,8 +53,7 @@ class MemTable {
|
||||
// Add an entry into memtable that maps key to value at the
|
||||
// specified sequence number and with the specified type.
|
||||
// Typically value will be empty if type==kTypeDeletion.
|
||||
void Add(SequenceNumber seq, ValueType type,
|
||||
const Slice& key,
|
||||
void Add(SequenceNumber seq, ValueType type, const Slice& key,
|
||||
const Slice& value);
|
||||
|
||||
// If memtable contains a value for key, store it in *value and return true.
|
||||
@ -64,26 +63,23 @@ class MemTable {
|
||||
bool Get(const LookupKey& key, std::string* value, Status* s);
|
||||
|
||||
private:
|
||||
~MemTable(); // Private since only Unref() should be used to delete it
|
||||
|
||||
struct KeyComparator {
|
||||
const InternalKeyComparator comparator;
|
||||
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
|
||||
int operator()(const char* a, const char* b) const;
|
||||
};
|
||||
friend class MemTableIterator;
|
||||
friend class MemTableBackwardIterator;
|
||||
|
||||
struct KeyComparator {
|
||||
const InternalKeyComparator comparator;
|
||||
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) {}
|
||||
int operator()(const char* a, const char* b) const;
|
||||
};
|
||||
|
||||
typedef SkipList<const char*, KeyComparator> Table;
|
||||
|
||||
~MemTable(); // Private since only Unref() should be used to delete it
|
||||
|
||||
KeyComparator comparator_;
|
||||
int refs_;
|
||||
Arena arena_;
|
||||
Table table_;
|
||||
|
||||
// No copying allowed
|
||||
MemTable(const MemTable&);
|
||||
void operator=(const MemTable&);
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
330
db/recovery_test.cc
Normal file
330
db/recovery_test.cc
Normal file
@ -0,0 +1,330 @@
|
||||
// Copyright (c) 2014 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "db/filename.h"
|
||||
#include "db/version_set.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class RecoveryTest {
|
||||
public:
|
||||
RecoveryTest() : env_(Env::Default()), db_(nullptr) {
|
||||
dbname_ = test::TmpDir() + "/recovery_test";
|
||||
DestroyDB(dbname_, Options());
|
||||
Open();
|
||||
}
|
||||
|
||||
~RecoveryTest() {
|
||||
Close();
|
||||
DestroyDB(dbname_, Options());
|
||||
}
|
||||
|
||||
DBImpl* dbfull() const { return reinterpret_cast<DBImpl*>(db_); }
|
||||
Env* env() const { return env_; }
|
||||
|
||||
bool CanAppend() {
|
||||
WritableFile* tmp;
|
||||
Status s = env_->NewAppendableFile(CurrentFileName(dbname_), &tmp);
|
||||
delete tmp;
|
||||
if (s.IsNotSupportedError()) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void Close() {
|
||||
delete db_;
|
||||
db_ = nullptr;
|
||||
}
|
||||
|
||||
Status OpenWithStatus(Options* options = nullptr) {
|
||||
Close();
|
||||
Options opts;
|
||||
if (options != nullptr) {
|
||||
opts = *options;
|
||||
} else {
|
||||
opts.reuse_logs = true; // TODO(sanjay): test both ways
|
||||
opts.create_if_missing = true;
|
||||
}
|
||||
if (opts.env == nullptr) {
|
||||
opts.env = env_;
|
||||
}
|
||||
return DB::Open(opts, dbname_, &db_);
|
||||
}
|
||||
|
||||
void Open(Options* options = nullptr) {
|
||||
ASSERT_OK(OpenWithStatus(options));
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
}
|
||||
|
||||
Status Put(const std::string& k, const std::string& v) {
|
||||
return db_->Put(WriteOptions(), k, v);
|
||||
}
|
||||
|
||||
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
|
||||
std::string result;
|
||||
Status s = db_->Get(ReadOptions(), k, &result);
|
||||
if (s.IsNotFound()) {
|
||||
result = "NOT_FOUND";
|
||||
} else if (!s.ok()) {
|
||||
result = s.ToString();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ManifestFileName() {
|
||||
std::string current;
|
||||
ASSERT_OK(ReadFileToString(env_, CurrentFileName(dbname_), ¤t));
|
||||
size_t len = current.size();
|
||||
if (len > 0 && current[len - 1] == '\n') {
|
||||
current.resize(len - 1);
|
||||
}
|
||||
return dbname_ + "/" + current;
|
||||
}
|
||||
|
||||
std::string LogName(uint64_t number) { return LogFileName(dbname_, number); }
|
||||
|
||||
size_t DeleteLogFiles() {
|
||||
// Linux allows unlinking open files, but Windows does not.
|
||||
// Closing the db allows for file deletion.
|
||||
Close();
|
||||
std::vector<uint64_t> logs = GetFiles(kLogFile);
|
||||
for (size_t i = 0; i < logs.size(); i++) {
|
||||
ASSERT_OK(env_->DeleteFile(LogName(logs[i]))) << LogName(logs[i]);
|
||||
}
|
||||
return logs.size();
|
||||
}
|
||||
|
||||
void DeleteManifestFile() { ASSERT_OK(env_->DeleteFile(ManifestFileName())); }
|
||||
|
||||
uint64_t FirstLogFile() { return GetFiles(kLogFile)[0]; }
|
||||
|
||||
std::vector<uint64_t> GetFiles(FileType t) {
|
||||
std::vector<std::string> filenames;
|
||||
ASSERT_OK(env_->GetChildren(dbname_, &filenames));
|
||||
std::vector<uint64_t> result;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
if (ParseFileName(filenames[i], &number, &type) && type == t) {
|
||||
result.push_back(number);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int NumLogs() { return GetFiles(kLogFile).size(); }
|
||||
|
||||
int NumTables() { return GetFiles(kTableFile).size(); }
|
||||
|
||||
uint64_t FileSize(const std::string& fname) {
|
||||
uint64_t result;
|
||||
ASSERT_OK(env_->GetFileSize(fname, &result)) << fname;
|
||||
return result;
|
||||
}
|
||||
|
||||
void CompactMemTable() { dbfull()->TEST_CompactMemTable(); }
|
||||
|
||||
// Directly construct a log file that sets key to val.
|
||||
void MakeLogFile(uint64_t lognum, SequenceNumber seq, Slice key, Slice val) {
|
||||
std::string fname = LogFileName(dbname_, lognum);
|
||||
WritableFile* file;
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &file));
|
||||
log::Writer writer(file);
|
||||
WriteBatch batch;
|
||||
batch.Put(key, val);
|
||||
WriteBatchInternal::SetSequence(&batch, seq);
|
||||
ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
|
||||
ASSERT_OK(file->Flush());
|
||||
delete file;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string dbname_;
|
||||
Env* env_;
|
||||
DB* db_;
|
||||
};
|
||||
|
||||
TEST(RecoveryTest, ManifestReused) {
|
||||
if (!CanAppend()) {
|
||||
fprintf(stderr, "skipping test because env does not support appending\n");
|
||||
return;
|
||||
}
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
Close();
|
||||
std::string old_manifest = ManifestFileName();
|
||||
Open();
|
||||
ASSERT_EQ(old_manifest, ManifestFileName());
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
Open();
|
||||
ASSERT_EQ(old_manifest, ManifestFileName());
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
}
|
||||
|
||||
TEST(RecoveryTest, LargeManifestCompacted) {
|
||||
if (!CanAppend()) {
|
||||
fprintf(stderr, "skipping test because env does not support appending\n");
|
||||
return;
|
||||
}
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
Close();
|
||||
std::string old_manifest = ManifestFileName();
|
||||
|
||||
// Pad with zeroes to make manifest file very big.
|
||||
{
|
||||
uint64_t len = FileSize(old_manifest);
|
||||
WritableFile* file;
|
||||
ASSERT_OK(env()->NewAppendableFile(old_manifest, &file));
|
||||
std::string zeroes(3 * 1048576 - static_cast<size_t>(len), 0);
|
||||
ASSERT_OK(file->Append(zeroes));
|
||||
ASSERT_OK(file->Flush());
|
||||
delete file;
|
||||
}
|
||||
|
||||
Open();
|
||||
std::string new_manifest = ManifestFileName();
|
||||
ASSERT_NE(old_manifest, new_manifest);
|
||||
ASSERT_GT(10000, FileSize(new_manifest));
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
|
||||
Open();
|
||||
ASSERT_EQ(new_manifest, ManifestFileName());
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
}
|
||||
|
||||
TEST(RecoveryTest, NoLogFiles) {
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
ASSERT_EQ(1, DeleteLogFiles());
|
||||
Open();
|
||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||
Open();
|
||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||
}
|
||||
|
||||
TEST(RecoveryTest, LogFileReuse) {
|
||||
if (!CanAppend()) {
|
||||
fprintf(stderr, "skipping test because env does not support appending\n");
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < 2; i++) {
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
if (i == 0) {
|
||||
// Compact to ensure current log is empty
|
||||
CompactMemTable();
|
||||
}
|
||||
Close();
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
uint64_t number = FirstLogFile();
|
||||
if (i == 0) {
|
||||
ASSERT_EQ(0, FileSize(LogName(number)));
|
||||
} else {
|
||||
ASSERT_LT(0, FileSize(LogName(number)));
|
||||
}
|
||||
Open();
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
Open();
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RecoveryTest, MultipleMemTables) {
|
||||
// Make a large log.
|
||||
const int kNum = 1000;
|
||||
for (int i = 0; i < kNum; i++) {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "%050d", i);
|
||||
ASSERT_OK(Put(buf, buf));
|
||||
}
|
||||
ASSERT_EQ(0, NumTables());
|
||||
Close();
|
||||
ASSERT_EQ(0, NumTables());
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
uint64_t old_log_file = FirstLogFile();
|
||||
|
||||
// Force creation of multiple memtables by reducing the write buffer size.
|
||||
Options opt;
|
||||
opt.reuse_logs = true;
|
||||
opt.write_buffer_size = (kNum * 100) / 2;
|
||||
Open(&opt);
|
||||
ASSERT_LE(2, NumTables());
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
ASSERT_NE(old_log_file, FirstLogFile()) << "must not reuse log";
|
||||
for (int i = 0; i < kNum; i++) {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "%050d", i);
|
||||
ASSERT_EQ(buf, Get(buf));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RecoveryTest, MultipleLogFiles) {
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
Close();
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
|
||||
// Make a bunch of uncompacted log files.
|
||||
uint64_t old_log = FirstLogFile();
|
||||
MakeLogFile(old_log + 1, 1000, "hello", "world");
|
||||
MakeLogFile(old_log + 2, 1001, "hi", "there");
|
||||
MakeLogFile(old_log + 3, 1002, "foo", "bar2");
|
||||
|
||||
// Recover and check that all log files were processed.
|
||||
Open();
|
||||
ASSERT_LE(1, NumTables());
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
uint64_t new_log = FirstLogFile();
|
||||
ASSERT_LE(old_log + 3, new_log);
|
||||
ASSERT_EQ("bar2", Get("foo"));
|
||||
ASSERT_EQ("world", Get("hello"));
|
||||
ASSERT_EQ("there", Get("hi"));
|
||||
|
||||
// Test that previous recovery produced recoverable state.
|
||||
Open();
|
||||
ASSERT_LE(1, NumTables());
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
if (CanAppend()) {
|
||||
ASSERT_EQ(new_log, FirstLogFile());
|
||||
}
|
||||
ASSERT_EQ("bar2", Get("foo"));
|
||||
ASSERT_EQ("world", Get("hello"));
|
||||
ASSERT_EQ("there", Get("hi"));
|
||||
|
||||
// Check that introducing an older log file does not cause it to be re-read.
|
||||
Close();
|
||||
MakeLogFile(old_log + 1, 2000, "hello", "stale write");
|
||||
Open();
|
||||
ASSERT_LE(1, NumTables());
|
||||
ASSERT_EQ(1, NumLogs());
|
||||
if (CanAppend()) {
|
||||
ASSERT_EQ(new_log, FirstLogFile());
|
||||
}
|
||||
ASSERT_EQ("bar2", Get("foo"));
|
||||
ASSERT_EQ("world", Get("hello"));
|
||||
ASSERT_EQ("there", Get("hi"));
|
||||
}
|
||||
|
||||
TEST(RecoveryTest, ManifestMissing) {
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
Close();
|
||||
DeleteManifestFile();
|
||||
|
||||
Status status = OpenWithStatus();
|
||||
ASSERT_TRUE(status.IsCorruption());
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
95
db/repair.cc
95
db/repair.cc
@ -54,7 +54,7 @@ class Repairer {
|
||||
owns_cache_(options_.block_cache != options.block_cache),
|
||||
next_file_number_(1) {
|
||||
// TableCache can be small since we expect each table to be opened once.
|
||||
table_cache_ = new TableCache(dbname_, &options_, 10);
|
||||
table_cache_ = new TableCache(dbname_, options_, 10);
|
||||
}
|
||||
|
||||
~Repairer() {
|
||||
@ -84,9 +84,7 @@ class Repairer {
|
||||
"recovered %d files; %llu bytes. "
|
||||
"Some data may have been lost. "
|
||||
"****",
|
||||
dbname_.c_str(),
|
||||
static_cast<int>(tables_.size()),
|
||||
bytes);
|
||||
dbname_.c_str(), static_cast<int>(tables_.size()), bytes);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
@ -97,22 +95,6 @@ class Repairer {
|
||||
SequenceNumber max_sequence;
|
||||
};
|
||||
|
||||
std::string const dbname_;
|
||||
Env* const env_;
|
||||
InternalKeyComparator const icmp_;
|
||||
InternalFilterPolicy const ipolicy_;
|
||||
Options const options_;
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
TableCache* table_cache_;
|
||||
VersionEdit edit_;
|
||||
|
||||
std::vector<std::string> manifests_;
|
||||
std::vector<uint64_t> table_numbers_;
|
||||
std::vector<uint64_t> logs_;
|
||||
std::vector<TableInfo> tables_;
|
||||
uint64_t next_file_number_;
|
||||
|
||||
Status FindFiles() {
|
||||
std::vector<std::string> filenames;
|
||||
Status status = env_->GetChildren(dbname_, &filenames);
|
||||
@ -152,8 +134,7 @@ class Repairer {
|
||||
Status status = ConvertLogToTable(logs_[i]);
|
||||
if (!status.ok()) {
|
||||
Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
|
||||
(unsigned long long) logs_[i],
|
||||
status.ToString().c_str());
|
||||
(unsigned long long)logs_[i], status.ToString().c_str());
|
||||
}
|
||||
ArchiveFile(logname);
|
||||
}
|
||||
@ -164,11 +145,10 @@ class Repairer {
|
||||
Env* env;
|
||||
Logger* info_log;
|
||||
uint64_t lognum;
|
||||
virtual void Corruption(size_t bytes, const Status& s) {
|
||||
void Corruption(size_t bytes, const Status& s) override {
|
||||
// We print error messages for corruption, but continue repairing.
|
||||
Log(info_log, "Log #%llu: dropping %d bytes; %s",
|
||||
(unsigned long long) lognum,
|
||||
static_cast<int>(bytes),
|
||||
(unsigned long long)lognum, static_cast<int>(bytes),
|
||||
s.ToString().c_str());
|
||||
}
|
||||
};
|
||||
@ -190,8 +170,8 @@ class Repairer {
|
||||
// corruptions cause entire commits to be skipped instead of
|
||||
// propagating bad information (like overly large sequence
|
||||
// numbers).
|
||||
log::Reader reader(lfile, &reporter, false/*do not checksum*/,
|
||||
0/*initial_offset*/);
|
||||
log::Reader reader(lfile, &reporter, false /*do not checksum*/,
|
||||
0 /*initial_offset*/);
|
||||
|
||||
// Read all the records and add to a memtable
|
||||
std::string scratch;
|
||||
@ -202,8 +182,8 @@ class Repairer {
|
||||
int counter = 0;
|
||||
while (reader.ReadRecord(&record, &scratch)) {
|
||||
if (record.size() < 12) {
|
||||
reporter.Corruption(
|
||||
record.size(), Status::Corruption("log record too small"));
|
||||
reporter.Corruption(record.size(),
|
||||
Status::Corruption("log record too small"));
|
||||
continue;
|
||||
}
|
||||
WriteBatchInternal::SetContents(&batch, record);
|
||||
@ -212,8 +192,7 @@ class Repairer {
|
||||
counter += WriteBatchInternal::Count(&batch);
|
||||
} else {
|
||||
Log(options_.info_log, "Log #%llu: ignoring %s",
|
||||
(unsigned long long) log,
|
||||
status.ToString().c_str());
|
||||
(unsigned long long)log, status.ToString().c_str());
|
||||
status = Status::OK(); // Keep going with rest of file
|
||||
}
|
||||
}
|
||||
@ -227,16 +206,14 @@ class Repairer {
|
||||
status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
|
||||
delete iter;
|
||||
mem->Unref();
|
||||
mem = NULL;
|
||||
mem = nullptr;
|
||||
if (status.ok()) {
|
||||
if (meta.file_size > 0) {
|
||||
table_numbers_.push_back(meta.number);
|
||||
}
|
||||
}
|
||||
Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
|
||||
(unsigned long long) log,
|
||||
counter,
|
||||
(unsigned long long) meta.number,
|
||||
(unsigned long long)log, counter, (unsigned long long)meta.number,
|
||||
status.ToString().c_str());
|
||||
return status;
|
||||
}
|
||||
@ -272,8 +249,7 @@ class Repairer {
|
||||
ArchiveFile(TableFileName(dbname_, number));
|
||||
ArchiveFile(SSTTableFileName(dbname_, number));
|
||||
Log(options_.info_log, "Table #%llu: dropped: %s",
|
||||
(unsigned long long) t.meta.number,
|
||||
status.ToString().c_str());
|
||||
(unsigned long long)t.meta.number, status.ToString().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
@ -287,8 +263,7 @@ class Repairer {
|
||||
Slice key = iter->key();
|
||||
if (!ParseInternalKey(key, &parsed)) {
|
||||
Log(options_.info_log, "Table #%llu: unparsable key %s",
|
||||
(unsigned long long) t.meta.number,
|
||||
EscapeString(key).c_str());
|
||||
(unsigned long long)t.meta.number, EscapeString(key).c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -307,9 +282,7 @@ class Repairer {
|
||||
}
|
||||
delete iter;
|
||||
Log(options_.info_log, "Table #%llu: %d entries %s",
|
||||
(unsigned long long) t.meta.number,
|
||||
counter,
|
||||
status.ToString().c_str());
|
||||
(unsigned long long)t.meta.number, counter, status.ToString().c_str());
|
||||
|
||||
if (status.ok()) {
|
||||
tables_.push_back(t);
|
||||
@ -350,20 +323,20 @@ class Repairer {
|
||||
}
|
||||
}
|
||||
delete builder;
|
||||
builder = NULL;
|
||||
builder = nullptr;
|
||||
|
||||
if (s.ok()) {
|
||||
s = file->Close();
|
||||
}
|
||||
delete file;
|
||||
file = NULL;
|
||||
file = nullptr;
|
||||
|
||||
if (counter > 0 && s.ok()) {
|
||||
std::string orig = TableFileName(dbname_, t.meta.number);
|
||||
s = env_->RenameFile(copy, orig);
|
||||
if (s.ok()) {
|
||||
Log(options_.info_log, "Table #%llu: %d entries repaired",
|
||||
(unsigned long long) t.meta.number, counter);
|
||||
(unsigned long long)t.meta.number, counter);
|
||||
tables_.push_back(t);
|
||||
}
|
||||
}
|
||||
@ -395,11 +368,11 @@ class Repairer {
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
// TODO(opt): separate out into multiple levels
|
||||
const TableInfo& t = tables_[i];
|
||||
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
||||
t.meta.smallest, t.meta.largest);
|
||||
edit_.AddFile(0, t.meta.number, t.meta.file_size, t.meta.smallest,
|
||||
t.meta.largest);
|
||||
}
|
||||
|
||||
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
|
||||
// fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
|
||||
{
|
||||
log::Writer log(file);
|
||||
std::string record;
|
||||
@ -410,7 +383,7 @@ class Repairer {
|
||||
status = file->Close();
|
||||
}
|
||||
delete file;
|
||||
file = NULL;
|
||||
file = nullptr;
|
||||
|
||||
if (!status.ok()) {
|
||||
env_->DeleteFile(tmp);
|
||||
@ -438,18 +411,34 @@ class Repairer {
|
||||
// dir/lost/foo
|
||||
const char* slash = strrchr(fname.c_str(), '/');
|
||||
std::string new_dir;
|
||||
if (slash != NULL) {
|
||||
if (slash != nullptr) {
|
||||
new_dir.assign(fname.data(), slash - fname.data());
|
||||
}
|
||||
new_dir.append("/lost");
|
||||
env_->CreateDir(new_dir); // Ignore error
|
||||
std::string new_file = new_dir;
|
||||
new_file.append("/");
|
||||
new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
|
||||
new_file.append((slash == nullptr) ? fname.c_str() : slash + 1);
|
||||
Status s = env_->RenameFile(fname, new_file);
|
||||
Log(options_.info_log, "Archiving %s: %s\n",
|
||||
fname.c_str(), s.ToString().c_str());
|
||||
Log(options_.info_log, "Archiving %s: %s\n", fname.c_str(),
|
||||
s.ToString().c_str());
|
||||
}
|
||||
|
||||
const std::string dbname_;
|
||||
Env* const env_;
|
||||
InternalKeyComparator const icmp_;
|
||||
InternalFilterPolicy const ipolicy_;
|
||||
const Options options_;
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
TableCache* table_cache_;
|
||||
VersionEdit edit_;
|
||||
|
||||
std::vector<std::string> manifests_;
|
||||
std::vector<uint64_t> table_numbers_;
|
||||
std::vector<uint64_t> logs_;
|
||||
std::vector<TableInfo> tables_;
|
||||
uint64_t next_file_number_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
182
db/skiplist.h
182
db/skiplist.h
@ -27,9 +27,10 @@
|
||||
//
|
||||
// ... prev vs. next pointer ordering ...
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "port/port.h"
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "util/arena.h"
|
||||
#include "util/random.h"
|
||||
|
||||
@ -37,7 +38,7 @@ namespace leveldb {
|
||||
|
||||
class Arena;
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
template <typename Key, class Comparator>
|
||||
class SkipList {
|
||||
private:
|
||||
struct Node;
|
||||
@ -48,6 +49,9 @@ class SkipList {
|
||||
// must remain allocated for the lifetime of the skiplist object.
|
||||
explicit SkipList(Comparator cmp, Arena* arena);
|
||||
|
||||
SkipList(const SkipList&) = delete;
|
||||
SkipList& operator=(const SkipList&) = delete;
|
||||
|
||||
// Insert key into the list.
|
||||
// REQUIRES: nothing that compares equal to key is currently in the list.
|
||||
void Insert(const Key& key);
|
||||
@ -97,24 +101,10 @@ class SkipList {
|
||||
private:
|
||||
enum { kMaxHeight = 12 };
|
||||
|
||||
// Immutable after construction
|
||||
Comparator const compare_;
|
||||
Arena* const arena_; // Arena used for allocations of nodes
|
||||
|
||||
Node* const head_;
|
||||
|
||||
// Modified only by Insert(). Read racily by readers, but stale
|
||||
// values are ok.
|
||||
port::AtomicPointer max_height_; // Height of the entire list
|
||||
|
||||
inline int GetMaxHeight() const {
|
||||
return static_cast<int>(
|
||||
reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
|
||||
return max_height_.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Read/written only by Insert().
|
||||
Random rnd_;
|
||||
|
||||
Node* NewNode(const Key& key, int height);
|
||||
int RandomHeight();
|
||||
bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
|
||||
@ -123,9 +113,9 @@ class SkipList {
|
||||
bool KeyIsAfterNode(const Key& key, Node* n) const;
|
||||
|
||||
// Return the earliest node that comes at or after key.
|
||||
// Return NULL if there is no such node.
|
||||
// Return nullptr if there is no such node.
|
||||
//
|
||||
// If prev is non-NULL, fills prev[level] with pointer to previous
|
||||
// If prev is non-null, fills prev[level] with pointer to previous
|
||||
// node at "level" for every level in [0..max_height_-1].
|
||||
Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
|
||||
|
||||
@ -137,15 +127,24 @@ class SkipList {
|
||||
// Return head_ if list is empty.
|
||||
Node* FindLast() const;
|
||||
|
||||
// No copying allowed
|
||||
SkipList(const SkipList&);
|
||||
void operator=(const SkipList&);
|
||||
// Immutable after construction
|
||||
Comparator const compare_;
|
||||
Arena* const arena_; // Arena used for allocations of nodes
|
||||
|
||||
Node* const head_;
|
||||
|
||||
// Modified only by Insert(). Read racily by readers, but stale
|
||||
// values are ok.
|
||||
std::atomic<int> max_height_; // Height of the entire list
|
||||
|
||||
// Read/written only by Insert().
|
||||
Random rnd_;
|
||||
};
|
||||
|
||||
// Implementation details follow
|
||||
template<typename Key, class Comparator>
|
||||
struct SkipList<Key,Comparator>::Node {
|
||||
explicit Node(const Key& k) : key(k) { }
|
||||
template <typename Key, class Comparator>
|
||||
struct SkipList<Key, Comparator>::Node {
|
||||
explicit Node(const Key& k) : key(k) {}
|
||||
|
||||
Key const key;
|
||||
|
||||
@ -155,92 +154,92 @@ struct SkipList<Key,Comparator>::Node {
|
||||
assert(n >= 0);
|
||||
// Use an 'acquire load' so that we observe a fully initialized
|
||||
// version of the returned Node.
|
||||
return reinterpret_cast<Node*>(next_[n].Acquire_Load());
|
||||
return next_[n].load(std::memory_order_acquire);
|
||||
}
|
||||
void SetNext(int n, Node* x) {
|
||||
assert(n >= 0);
|
||||
// Use a 'release store' so that anybody who reads through this
|
||||
// pointer observes a fully initialized version of the inserted node.
|
||||
next_[n].Release_Store(x);
|
||||
next_[n].store(x, std::memory_order_release);
|
||||
}
|
||||
|
||||
// No-barrier variants that can be safely used in a few locations.
|
||||
Node* NoBarrier_Next(int n) {
|
||||
assert(n >= 0);
|
||||
return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
|
||||
return next_[n].load(std::memory_order_relaxed);
|
||||
}
|
||||
void NoBarrier_SetNext(int n, Node* x) {
|
||||
assert(n >= 0);
|
||||
next_[n].NoBarrier_Store(x);
|
||||
next_[n].store(x, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
private:
|
||||
// Array of length equal to the node height. next_[0] is lowest level link.
|
||||
port::AtomicPointer next_[1];
|
||||
std::atomic<Node*> next_[1];
|
||||
};
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node*
|
||||
SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
|
||||
char* mem = arena_->AllocateAligned(
|
||||
sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
|
||||
return new (mem) Node(key);
|
||||
template <typename Key, class Comparator>
|
||||
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::NewNode(
|
||||
const Key& key, int height) {
|
||||
char* const node_memory = arena_->AllocateAligned(
|
||||
sizeof(Node) + sizeof(std::atomic<Node*>) * (height - 1));
|
||||
return new (node_memory) Node(key);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
|
||||
template <typename Key, class Comparator>
|
||||
inline SkipList<Key, Comparator>::Iterator::Iterator(const SkipList* list) {
|
||||
list_ = list;
|
||||
node_ = NULL;
|
||||
node_ = nullptr;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
|
||||
return node_ != NULL;
|
||||
template <typename Key, class Comparator>
|
||||
inline bool SkipList<Key, Comparator>::Iterator::Valid() const {
|
||||
return node_ != nullptr;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
|
||||
template <typename Key, class Comparator>
|
||||
inline const Key& SkipList<Key, Comparator>::Iterator::key() const {
|
||||
assert(Valid());
|
||||
return node_->key;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::Next() {
|
||||
template <typename Key, class Comparator>
|
||||
inline void SkipList<Key, Comparator>::Iterator::Next() {
|
||||
assert(Valid());
|
||||
node_ = node_->Next(0);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::Prev() {
|
||||
template <typename Key, class Comparator>
|
||||
inline void SkipList<Key, Comparator>::Iterator::Prev() {
|
||||
// Instead of using explicit "prev" links, we just search for the
|
||||
// last node that falls before key.
|
||||
assert(Valid());
|
||||
node_ = list_->FindLessThan(node_->key);
|
||||
if (node_ == list_->head_) {
|
||||
node_ = NULL;
|
||||
node_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
|
||||
node_ = list_->FindGreaterOrEqual(target, NULL);
|
||||
template <typename Key, class Comparator>
|
||||
inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
|
||||
node_ = list_->FindGreaterOrEqual(target, nullptr);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
|
||||
template <typename Key, class Comparator>
|
||||
inline void SkipList<Key, Comparator>::Iterator::SeekToFirst() {
|
||||
node_ = list_->head_->Next(0);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
|
||||
template <typename Key, class Comparator>
|
||||
inline void SkipList<Key, Comparator>::Iterator::SeekToLast() {
|
||||
node_ = list_->FindLast();
|
||||
if (node_ == list_->head_) {
|
||||
node_ = NULL;
|
||||
node_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
int SkipList<Key,Comparator>::RandomHeight() {
|
||||
template <typename Key, class Comparator>
|
||||
int SkipList<Key, Comparator>::RandomHeight() {
|
||||
// Increase height with probability 1 in kBranching
|
||||
static const unsigned int kBranching = 4;
|
||||
int height = 1;
|
||||
@ -252,15 +251,16 @@ int SkipList<Key,Comparator>::RandomHeight() {
|
||||
return height;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
|
||||
// NULL n is considered infinite
|
||||
return (n != NULL) && (compare_(n->key, key) < 0);
|
||||
template <typename Key, class Comparator>
|
||||
bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
|
||||
// null n is considered infinite
|
||||
return (n != nullptr) && (compare_(n->key, key) < 0);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
|
||||
const {
|
||||
template <typename Key, class Comparator>
|
||||
typename SkipList<Key, Comparator>::Node*
|
||||
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
|
||||
Node** prev) const {
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
@ -269,7 +269,7 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOr
|
||||
// Keep searching in this list
|
||||
x = next;
|
||||
} else {
|
||||
if (prev != NULL) prev[level] = x;
|
||||
if (prev != nullptr) prev[level] = x;
|
||||
if (level == 0) {
|
||||
return next;
|
||||
} else {
|
||||
@ -280,15 +280,15 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOr
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node*
|
||||
SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
|
||||
template <typename Key, class Comparator>
|
||||
typename SkipList<Key, Comparator>::Node*
|
||||
SkipList<Key, Comparator>::FindLessThan(const Key& key) const {
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
assert(x == head_ || compare_(x->key, key) < 0);
|
||||
Node* next = x->Next(level);
|
||||
if (next == NULL || compare_(next->key, key) >= 0) {
|
||||
if (next == nullptr || compare_(next->key, key) >= 0) {
|
||||
if (level == 0) {
|
||||
return x;
|
||||
} else {
|
||||
@ -301,14 +301,14 @@ SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
|
||||
template <typename Key, class Comparator>
|
||||
typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
|
||||
const {
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
Node* next = x->Next(level);
|
||||
if (next == NULL) {
|
||||
if (next == nullptr) {
|
||||
if (level == 0) {
|
||||
return x;
|
||||
} else {
|
||||
@ -321,43 +321,41 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
|
||||
template <typename Key, class Comparator>
|
||||
SkipList<Key, Comparator>::SkipList(Comparator cmp, Arena* arena)
|
||||
: compare_(cmp),
|
||||
arena_(arena),
|
||||
head_(NewNode(0 /* any key will do */, kMaxHeight)),
|
||||
max_height_(reinterpret_cast<void*>(1)),
|
||||
max_height_(1),
|
||||
rnd_(0xdeadbeef) {
|
||||
for (int i = 0; i < kMaxHeight; i++) {
|
||||
head_->SetNext(i, NULL);
|
||||
head_->SetNext(i, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
void SkipList<Key,Comparator>::Insert(const Key& key) {
|
||||
template <typename Key, class Comparator>
|
||||
void SkipList<Key, Comparator>::Insert(const Key& key) {
|
||||
// TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
|
||||
// here since Insert() is externally synchronized.
|
||||
Node* prev[kMaxHeight];
|
||||
Node* x = FindGreaterOrEqual(key, prev);
|
||||
|
||||
// Our data structure does not allow duplicate insertion
|
||||
assert(x == NULL || !Equal(key, x->key));
|
||||
assert(x == nullptr || !Equal(key, x->key));
|
||||
|
||||
int height = RandomHeight();
|
||||
if (height > GetMaxHeight()) {
|
||||
for (int i = GetMaxHeight(); i < height; i++) {
|
||||
prev[i] = head_;
|
||||
}
|
||||
//fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
|
||||
|
||||
// It is ok to mutate max_height_ without any synchronization
|
||||
// with concurrent readers. A concurrent reader that observes
|
||||
// the new value of max_height_ will see either the old value of
|
||||
// new level pointers from head_ (NULL), or a new value set in
|
||||
// new level pointers from head_ (nullptr), or a new value set in
|
||||
// the loop below. In the former case the reader will
|
||||
// immediately drop to the next level since NULL sorts after all
|
||||
// immediately drop to the next level since nullptr sorts after all
|
||||
// keys. In the latter case the reader will use the new node.
|
||||
max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
|
||||
max_height_.store(height, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
x = NewNode(key, height);
|
||||
@ -369,10 +367,10 @@ void SkipList<Key,Comparator>::Insert(const Key& key) {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
bool SkipList<Key,Comparator>::Contains(const Key& key) const {
|
||||
Node* x = FindGreaterOrEqual(key, NULL);
|
||||
if (x != NULL && Equal(key, x->key)) {
|
||||
template <typename Key, class Comparator>
|
||||
bool SkipList<Key, Comparator>::Contains(const Key& key) const {
|
||||
Node* x = FindGreaterOrEqual(key, nullptr);
|
||||
if (x != nullptr && Equal(key, x->key)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -3,8 +3,13 @@
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/skiplist.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <set>
|
||||
|
||||
#include "leveldb/env.h"
|
||||
#include "port/port.h"
|
||||
#include "port/thread_annotations.h"
|
||||
#include "util/arena.h"
|
||||
#include "util/hash.h"
|
||||
#include "util/random.h"
|
||||
@ -26,7 +31,7 @@ struct Comparator {
|
||||
}
|
||||
};
|
||||
|
||||
class SkipTest { };
|
||||
class SkipTest {};
|
||||
|
||||
TEST(SkipTest, Empty) {
|
||||
Arena arena;
|
||||
@ -112,8 +117,7 @@ TEST(SkipTest, InsertAndLookup) {
|
||||
|
||||
// Compare against model iterator
|
||||
for (std::set<Key>::reverse_iterator model_iter = keys.rbegin();
|
||||
model_iter != keys.rend();
|
||||
++model_iter) {
|
||||
model_iter != keys.rend(); ++model_iter) {
|
||||
ASSERT_TRUE(iter.Valid());
|
||||
ASSERT_EQ(*model_iter, iter.key());
|
||||
iter.Prev();
|
||||
@ -126,7 +130,7 @@ TEST(SkipTest, InsertAndLookup) {
|
||||
// concurrent readers (with no synchronization other than when a
|
||||
// reader's iterator is created), the reader always observes all the
|
||||
// data that was present in the skip list when the iterator was
|
||||
// constructor. Because insertions are happening concurrently, we may
|
||||
// constructed. Because insertions are happening concurrently, we may
|
||||
// also observe new values that were inserted since the iterator was
|
||||
// constructed, but we should never miss any values that were present
|
||||
// at iterator construction time.
|
||||
@ -155,12 +159,12 @@ class ConcurrentTest {
|
||||
static uint64_t hash(Key key) { return key & 0xff; }
|
||||
|
||||
static uint64_t HashNumbers(uint64_t k, uint64_t g) {
|
||||
uint64_t data[2] = { k, g };
|
||||
uint64_t data[2] = {k, g};
|
||||
return Hash(reinterpret_cast<char*>(data), sizeof(data), 0);
|
||||
}
|
||||
|
||||
static Key MakeKey(uint64_t k, uint64_t g) {
|
||||
assert(sizeof(Key) == sizeof(uint64_t));
|
||||
static_assert(sizeof(Key) == sizeof(uint64_t), "");
|
||||
assert(k <= K); // We sometimes pass K to seek to the end of the skiplist
|
||||
assert(g <= 0xffffffffu);
|
||||
return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff));
|
||||
@ -186,13 +190,11 @@ class ConcurrentTest {
|
||||
|
||||
// Per-key generation
|
||||
struct State {
|
||||
port::AtomicPointer generation[K];
|
||||
void Set(int k, intptr_t v) {
|
||||
generation[k].Release_Store(reinterpret_cast<void*>(v));
|
||||
}
|
||||
intptr_t Get(int k) {
|
||||
return reinterpret_cast<intptr_t>(generation[k].Acquire_Load());
|
||||
std::atomic<int> generation[K];
|
||||
void Set(int k, int v) {
|
||||
generation[k].store(v, std::memory_order_release);
|
||||
}
|
||||
int Get(int k) { return generation[k].load(std::memory_order_acquire); }
|
||||
|
||||
State() {
|
||||
for (int k = 0; k < K; k++) {
|
||||
@ -211,7 +213,7 @@ class ConcurrentTest {
|
||||
SkipList<Key, Comparator> list_;
|
||||
|
||||
public:
|
||||
ConcurrentTest() : list_(Comparator(), &arena_) { }
|
||||
ConcurrentTest() : list_(Comparator(), &arena_) {}
|
||||
|
||||
// REQUIRES: External synchronization
|
||||
void WriteStep(Random* rnd) {
|
||||
@ -250,11 +252,9 @@ class ConcurrentTest {
|
||||
// Note that generation 0 is never inserted, so it is ok if
|
||||
// <*,0,*> is missing.
|
||||
ASSERT_TRUE((gen(pos) == 0) ||
|
||||
(gen(pos) > initial_state.Get(key(pos)))
|
||||
) << "key: " << key(pos)
|
||||
<< "; gen: " << gen(pos)
|
||||
<< "; initgen: "
|
||||
<< initial_state.Get(key(pos));
|
||||
(gen(pos) > static_cast<Key>(initial_state.Get(key(pos)))))
|
||||
<< "key: " << key(pos) << "; gen: " << gen(pos)
|
||||
<< "; initgen: " << initial_state.Get(key(pos));
|
||||
|
||||
// Advance to next key in the valid key space
|
||||
if (key(pos) < key(current)) {
|
||||
@ -298,21 +298,14 @@ class TestState {
|
||||
public:
|
||||
ConcurrentTest t_;
|
||||
int seed_;
|
||||
port::AtomicPointer quit_flag_;
|
||||
std::atomic<bool> quit_flag_;
|
||||
|
||||
enum ReaderState {
|
||||
STARTING,
|
||||
RUNNING,
|
||||
DONE
|
||||
};
|
||||
enum ReaderState { STARTING, RUNNING, DONE };
|
||||
|
||||
explicit TestState(int s)
|
||||
: seed_(s),
|
||||
quit_flag_(NULL),
|
||||
state_(STARTING),
|
||||
state_cv_(&mu_) {}
|
||||
: seed_(s), quit_flag_(false), state_(STARTING), state_cv_(&mu_) {}
|
||||
|
||||
void Wait(ReaderState s) {
|
||||
void Wait(ReaderState s) LOCKS_EXCLUDED(mu_) {
|
||||
mu_.Lock();
|
||||
while (state_ != s) {
|
||||
state_cv_.Wait();
|
||||
@ -320,7 +313,7 @@ class TestState {
|
||||
mu_.Unlock();
|
||||
}
|
||||
|
||||
void Change(ReaderState s) {
|
||||
void Change(ReaderState s) LOCKS_EXCLUDED(mu_) {
|
||||
mu_.Lock();
|
||||
state_ = s;
|
||||
state_cv_.Signal();
|
||||
@ -329,8 +322,8 @@ class TestState {
|
||||
|
||||
private:
|
||||
port::Mutex mu_;
|
||||
ReaderState state_;
|
||||
port::CondVar state_cv_;
|
||||
ReaderState state_ GUARDED_BY(mu_);
|
||||
port::CondVar state_cv_ GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
static void ConcurrentReader(void* arg) {
|
||||
@ -338,7 +331,7 @@ static void ConcurrentReader(void* arg) {
|
||||
Random rnd(state->seed_);
|
||||
int64_t reads = 0;
|
||||
state->Change(TestState::RUNNING);
|
||||
while (!state->quit_flag_.Acquire_Load()) {
|
||||
while (!state->quit_flag_.load(std::memory_order_acquire)) {
|
||||
state->t_.ReadStep(&rnd);
|
||||
++reads;
|
||||
}
|
||||
@ -360,7 +353,7 @@ static void RunConcurrent(int run) {
|
||||
for (int i = 0; i < kSize; i++) {
|
||||
state.t_.WriteStep(&rnd);
|
||||
}
|
||||
state.quit_flag_.Release_Store(&state); // Any non-NULL arg will do
|
||||
state.quit_flag_.store(true, std::memory_order_release);
|
||||
state.Wait(TestState::DONE);
|
||||
}
|
||||
}
|
||||
@ -373,6 +366,4 @@ TEST(SkipTest, Concurrent5) { RunConcurrent(5); }
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -16,50 +16,78 @@ class SnapshotList;
|
||||
// Each SnapshotImpl corresponds to a particular sequence number.
|
||||
class SnapshotImpl : public Snapshot {
|
||||
public:
|
||||
SequenceNumber number_; // const after creation
|
||||
SnapshotImpl(SequenceNumber sequence_number)
|
||||
: sequence_number_(sequence_number) {}
|
||||
|
||||
SequenceNumber sequence_number() const { return sequence_number_; }
|
||||
|
||||
private:
|
||||
friend class SnapshotList;
|
||||
|
||||
// SnapshotImpl is kept in a doubly-linked circular list
|
||||
// SnapshotImpl is kept in a doubly-linked circular list. The SnapshotList
|
||||
// implementation operates on the next/previous fields direcly.
|
||||
SnapshotImpl* prev_;
|
||||
SnapshotImpl* next_;
|
||||
|
||||
SnapshotList* list_; // just for sanity checks
|
||||
const SequenceNumber sequence_number_;
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
SnapshotList* list_ = nullptr;
|
||||
#endif // !defined(NDEBUG)
|
||||
};
|
||||
|
||||
class SnapshotList {
|
||||
public:
|
||||
SnapshotList() {
|
||||
list_.prev_ = &list_;
|
||||
list_.next_ = &list_;
|
||||
SnapshotList() : head_(0) {
|
||||
head_.prev_ = &head_;
|
||||
head_.next_ = &head_;
|
||||
}
|
||||
|
||||
bool empty() const { return list_.next_ == &list_; }
|
||||
SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
|
||||
SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
|
||||
|
||||
const SnapshotImpl* New(SequenceNumber seq) {
|
||||
SnapshotImpl* s = new SnapshotImpl;
|
||||
s->number_ = seq;
|
||||
s->list_ = this;
|
||||
s->next_ = &list_;
|
||||
s->prev_ = list_.prev_;
|
||||
s->prev_->next_ = s;
|
||||
s->next_->prev_ = s;
|
||||
return s;
|
||||
bool empty() const { return head_.next_ == &head_; }
|
||||
SnapshotImpl* oldest() const {
|
||||
assert(!empty());
|
||||
return head_.next_;
|
||||
}
|
||||
SnapshotImpl* newest() const {
|
||||
assert(!empty());
|
||||
return head_.prev_;
|
||||
}
|
||||
|
||||
void Delete(const SnapshotImpl* s) {
|
||||
assert(s->list_ == this);
|
||||
s->prev_->next_ = s->next_;
|
||||
s->next_->prev_ = s->prev_;
|
||||
delete s;
|
||||
// Creates a SnapshotImpl and appends it to the end of the list.
|
||||
SnapshotImpl* New(SequenceNumber sequence_number) {
|
||||
assert(empty() || newest()->sequence_number_ <= sequence_number);
|
||||
|
||||
SnapshotImpl* snapshot = new SnapshotImpl(sequence_number);
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
snapshot->list_ = this;
|
||||
#endif // !defined(NDEBUG)
|
||||
snapshot->next_ = &head_;
|
||||
snapshot->prev_ = head_.prev_;
|
||||
snapshot->prev_->next_ = snapshot;
|
||||
snapshot->next_->prev_ = snapshot;
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
// Removes a SnapshotImpl from this list.
|
||||
//
|
||||
// The snapshot must have been created by calling New() on this list.
|
||||
//
|
||||
// The snapshot pointer should not be const, because its memory is
|
||||
// deallocated. However, that would force us to change DB::ReleaseSnapshot(),
|
||||
// which is in the API, and currently takes a const Snapshot.
|
||||
void Delete(const SnapshotImpl* snapshot) {
|
||||
#if !defined(NDEBUG)
|
||||
assert(snapshot->list_ == this);
|
||||
#endif // !defined(NDEBUG)
|
||||
snapshot->prev_->next_ = snapshot->next_;
|
||||
snapshot->next_->prev_ = snapshot->prev_;
|
||||
delete snapshot;
|
||||
}
|
||||
|
||||
private:
|
||||
// Dummy head of doubly-linked list of snapshots
|
||||
SnapshotImpl list_;
|
||||
SnapshotImpl head_;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -29,18 +29,14 @@ static void UnrefEntry(void* arg1, void* arg2) {
|
||||
cache->Release(h);
|
||||
}
|
||||
|
||||
TableCache::TableCache(const std::string& dbname,
|
||||
const Options* options,
|
||||
TableCache::TableCache(const std::string& dbname, const Options& options,
|
||||
int entries)
|
||||
: env_(options->env),
|
||||
: env_(options.env),
|
||||
dbname_(dbname),
|
||||
options_(options),
|
||||
cache_(NewLRUCache(entries)) {
|
||||
}
|
||||
cache_(NewLRUCache(entries)) {}
|
||||
|
||||
TableCache::~TableCache() {
|
||||
delete cache_;
|
||||
}
|
||||
TableCache::~TableCache() { delete cache_; }
|
||||
|
||||
Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||
Cache::Handle** handle) {
|
||||
@ -49,10 +45,10 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||
EncodeFixed64(buf, file_number);
|
||||
Slice key(buf, sizeof(buf));
|
||||
*handle = cache_->Lookup(key);
|
||||
if (*handle == NULL) {
|
||||
if (*handle == nullptr) {
|
||||
std::string fname = TableFileName(dbname_, file_number);
|
||||
RandomAccessFile* file = NULL;
|
||||
Table* table = NULL;
|
||||
RandomAccessFile* file = nullptr;
|
||||
Table* table = nullptr;
|
||||
s = env_->NewRandomAccessFile(fname, &file);
|
||||
if (!s.ok()) {
|
||||
std::string old_fname = SSTTableFileName(dbname_, file_number);
|
||||
@ -61,11 +57,11 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||
}
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = Table::Open(*options_, file, file_size, &table);
|
||||
s = Table::Open(options_, file, file_size, &table);
|
||||
}
|
||||
|
||||
if (!s.ok()) {
|
||||
assert(table == NULL);
|
||||
assert(table == nullptr);
|
||||
delete file;
|
||||
// We do not cache error results so that if the error is transient,
|
||||
// or somebody repairs the file, we recover automatically.
|
||||
@ -80,14 +76,13 @@ Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||
}
|
||||
|
||||
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
uint64_t file_number, uint64_t file_size,
|
||||
Table** tableptr) {
|
||||
if (tableptr != NULL) {
|
||||
*tableptr = NULL;
|
||||
if (tableptr != nullptr) {
|
||||
*tableptr = nullptr;
|
||||
}
|
||||
|
||||
Cache::Handle* handle = NULL;
|
||||
Cache::Handle* handle = nullptr;
|
||||
Status s = FindTable(file_number, file_size, &handle);
|
||||
if (!s.ok()) {
|
||||
return NewErrorIterator(s);
|
||||
@ -96,23 +91,21 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||
Iterator* result = table->NewIterator(options);
|
||||
result->RegisterCleanup(&UnrefEntry, cache_, handle);
|
||||
if (tableptr != NULL) {
|
||||
if (tableptr != nullptr) {
|
||||
*tableptr = table;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Status TableCache::Get(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
void (*saver)(void*, const Slice&, const Slice&)) {
|
||||
Cache::Handle* handle = NULL;
|
||||
Status TableCache::Get(const ReadOptions& options, uint64_t file_number,
|
||||
uint64_t file_size, const Slice& k, void* arg,
|
||||
void (*handle_result)(void*, const Slice&,
|
||||
const Slice&)) {
|
||||
Cache::Handle* handle = nullptr;
|
||||
Status s = FindTable(file_number, file_size, &handle);
|
||||
if (s.ok()) {
|
||||
Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||
s = t->InternalGet(options, k, arg, saver);
|
||||
s = t->InternalGet(options, k, arg, handle_result);
|
||||
cache_->Release(handle);
|
||||
}
|
||||
return s;
|
||||
|
@ -7,8 +7,10 @@
|
||||
#ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_
|
||||
#define STORAGE_LEVELDB_DB_TABLE_CACHE_H_
|
||||
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "leveldb/cache.h"
|
||||
#include "leveldb/table.h"
|
||||
@ -20,40 +22,35 @@ class Env;
|
||||
|
||||
class TableCache {
|
||||
public:
|
||||
TableCache(const std::string& dbname, const Options* options, int entries);
|
||||
TableCache(const std::string& dbname, const Options& options, int entries);
|
||||
~TableCache();
|
||||
|
||||
// Return an iterator for the specified file number (the corresponding
|
||||
// file length must be exactly "file_size" bytes). If "tableptr" is
|
||||
// non-NULL, also sets "*tableptr" to point to the Table object
|
||||
// underlying the returned iterator, or NULL if no Table object underlies
|
||||
// the returned iterator. The returned "*tableptr" object is owned by
|
||||
// the cache and should not be deleted, and is valid for as long as the
|
||||
// non-null, also sets "*tableptr" to point to the Table object
|
||||
// underlying the returned iterator, or to nullptr if no Table object
|
||||
// underlies the returned iterator. The returned "*tableptr" object is owned
|
||||
// by the cache and should not be deleted, and is valid for as long as the
|
||||
// returned iterator is live.
|
||||
Iterator* NewIterator(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
Table** tableptr = NULL);
|
||||
Iterator* NewIterator(const ReadOptions& options, uint64_t file_number,
|
||||
uint64_t file_size, Table** tableptr = nullptr);
|
||||
|
||||
// If a seek to internal key "k" in specified file finds an entry,
|
||||
// call (*handle_result)(arg, found_key, found_value).
|
||||
Status Get(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
Status Get(const ReadOptions& options, uint64_t file_number,
|
||||
uint64_t file_size, const Slice& k, void* arg,
|
||||
void (*handle_result)(void*, const Slice&, const Slice&));
|
||||
|
||||
// Evict any entry for the specified file number
|
||||
void Evict(uint64_t file_number);
|
||||
|
||||
private:
|
||||
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
|
||||
|
||||
Env* const env_;
|
||||
const std::string dbname_;
|
||||
const Options* options_;
|
||||
const Options& options_;
|
||||
Cache* cache_;
|
||||
|
||||
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -12,15 +12,15 @@ namespace leveldb {
|
||||
// Tag numbers for serialized VersionEdit. These numbers are written to
|
||||
// disk and should not be changed.
|
||||
enum Tag {
|
||||
kComparator = 1,
|
||||
kLogNumber = 2,
|
||||
kNextFileNumber = 3,
|
||||
kLastSequence = 4,
|
||||
kCompactPointer = 5,
|
||||
kDeletedFile = 6,
|
||||
kNewFile = 7,
|
||||
kComparator = 1,
|
||||
kLogNumber = 2,
|
||||
kNextFileNumber = 3,
|
||||
kLastSequence = 4,
|
||||
kCompactPointer = 5,
|
||||
kDeletedFile = 6,
|
||||
kNewFile = 7,
|
||||
// 8 was used for large value refs
|
||||
kPrevLogNumber = 9
|
||||
kPrevLogNumber = 9
|
||||
};
|
||||
|
||||
void VersionEdit::Clear() {
|
||||
@ -66,12 +66,10 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
||||
}
|
||||
|
||||
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
||||
iter != deleted_files_.end();
|
||||
++iter) {
|
||||
for (const auto& deleted_file_kvp : deleted_files_) {
|
||||
PutVarint32(dst, kDeletedFile);
|
||||
PutVarint32(dst, iter->first); // level
|
||||
PutVarint64(dst, iter->second); // file number
|
||||
PutVarint32(dst, deleted_file_kvp.first); // level
|
||||
PutVarint64(dst, deleted_file_kvp.second); // file number
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||
@ -88,8 +86,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
||||
Slice str;
|
||||
if (GetLengthPrefixedSlice(input, &str)) {
|
||||
dst->DecodeFrom(str);
|
||||
return true;
|
||||
return dst->DecodeFrom(str);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
@ -97,8 +94,7 @@ static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
||||
|
||||
static bool GetLevel(Slice* input, int* level) {
|
||||
uint32_t v;
|
||||
if (GetVarint32(input, &v) &&
|
||||
v < config::kNumLevels) {
|
||||
if (GetVarint32(input, &v) && v < config::kNumLevels) {
|
||||
*level = v;
|
||||
return true;
|
||||
} else {
|
||||
@ -109,7 +105,7 @@ static bool GetLevel(Slice* input, int* level) {
|
||||
Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
Clear();
|
||||
Slice input = src;
|
||||
const char* msg = NULL;
|
||||
const char* msg = nullptr;
|
||||
uint32_t tag;
|
||||
|
||||
// Temporary storage for parsing
|
||||
@ -119,7 +115,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
Slice str;
|
||||
InternalKey key;
|
||||
|
||||
while (msg == NULL && GetVarint32(&input, &tag)) {
|
||||
while (msg == nullptr && GetVarint32(&input, &tag)) {
|
||||
switch (tag) {
|
||||
case kComparator:
|
||||
if (GetLengthPrefixedSlice(&input, &str)) {
|
||||
@ -163,8 +159,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
break;
|
||||
|
||||
case kCompactPointer:
|
||||
if (GetLevel(&input, &level) &&
|
||||
GetInternalKey(&input, &key)) {
|
||||
if (GetLevel(&input, &level) && GetInternalKey(&input, &key)) {
|
||||
compact_pointers_.push_back(std::make_pair(level, key));
|
||||
} else {
|
||||
msg = "compaction pointer";
|
||||
@ -172,8 +167,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
break;
|
||||
|
||||
case kDeletedFile:
|
||||
if (GetLevel(&input, &level) &&
|
||||
GetVarint64(&input, &number)) {
|
||||
if (GetLevel(&input, &level) && GetVarint64(&input, &number)) {
|
||||
deleted_files_.insert(std::make_pair(level, number));
|
||||
} else {
|
||||
msg = "deleted file";
|
||||
@ -181,8 +175,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
break;
|
||||
|
||||
case kNewFile:
|
||||
if (GetLevel(&input, &level) &&
|
||||
GetVarint64(&input, &f.number) &&
|
||||
if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) &&
|
||||
GetVarint64(&input, &f.file_size) &&
|
||||
GetInternalKey(&input, &f.smallest) &&
|
||||
GetInternalKey(&input, &f.largest)) {
|
||||
@ -198,12 +191,12 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
}
|
||||
}
|
||||
|
||||
if (msg == NULL && !input.empty()) {
|
||||
if (msg == nullptr && !input.empty()) {
|
||||
msg = "invalid tag";
|
||||
}
|
||||
|
||||
Status result;
|
||||
if (msg != NULL) {
|
||||
if (msg != nullptr) {
|
||||
result = Status::Corruption("VersionEdit", msg);
|
||||
}
|
||||
return result;
|
||||
@ -238,13 +231,11 @@ std::string VersionEdit::DebugString() const {
|
||||
r.append(" ");
|
||||
r.append(compact_pointers_[i].second.DebugString());
|
||||
}
|
||||
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
||||
iter != deleted_files_.end();
|
||||
++iter) {
|
||||
for (const auto& deleted_files_kvp : deleted_files_) {
|
||||
r.append("\n DeleteFile: ");
|
||||
AppendNumberTo(&r, iter->first);
|
||||
AppendNumberTo(&r, deleted_files_kvp.first);
|
||||
r.append(" ");
|
||||
AppendNumberTo(&r, iter->second);
|
||||
AppendNumberTo(&r, deleted_files_kvp.second);
|
||||
}
|
||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||
const FileMetaData& f = new_files_[i].second;
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "db/dbformat.h"
|
||||
|
||||
namespace leveldb {
|
||||
@ -15,20 +16,20 @@ namespace leveldb {
|
||||
class VersionSet;
|
||||
|
||||
struct FileMetaData {
|
||||
int refs;
|
||||
int allowed_seeks; // Seeks allowed until compaction
|
||||
uint64_t number;
|
||||
uint64_t file_size; // File size in bytes
|
||||
InternalKey smallest; // Smallest internal key served by table
|
||||
InternalKey largest; // Largest internal key served by table
|
||||
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) {}
|
||||
|
||||
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
|
||||
int refs;
|
||||
int allowed_seeks; // Seeks allowed until compaction
|
||||
uint64_t number;
|
||||
uint64_t file_size; // File size in bytes
|
||||
InternalKey smallest; // Smallest internal key served by table
|
||||
InternalKey largest; // Largest internal key served by table
|
||||
};
|
||||
|
||||
class VersionEdit {
|
||||
public:
|
||||
VersionEdit() { Clear(); }
|
||||
~VersionEdit() { }
|
||||
~VersionEdit() = default;
|
||||
|
||||
void Clear();
|
||||
|
||||
@ -59,10 +60,8 @@ class VersionEdit {
|
||||
// Add the specified file at the specified number.
|
||||
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
|
||||
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file
|
||||
void AddFile(int level, uint64_t file,
|
||||
uint64_t file_size,
|
||||
const InternalKey& smallest,
|
||||
const InternalKey& largest) {
|
||||
void AddFile(int level, uint64_t file, uint64_t file_size,
|
||||
const InternalKey& smallest, const InternalKey& largest) {
|
||||
FileMetaData f;
|
||||
f.number = file;
|
||||
f.file_size = file_size;
|
||||
@ -84,7 +83,7 @@ class VersionEdit {
|
||||
private:
|
||||
friend class VersionSet;
|
||||
|
||||
typedef std::set< std::pair<int, uint64_t> > DeletedFileSet;
|
||||
typedef std::set<std::pair<int, uint64_t> > DeletedFileSet;
|
||||
|
||||
std::string comparator_;
|
||||
uint64_t log_number_;
|
||||
@ -97,9 +96,9 @@ class VersionEdit {
|
||||
bool has_next_file_number_;
|
||||
bool has_last_sequence_;
|
||||
|
||||
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
||||
std::vector<std::pair<int, InternalKey> > compact_pointers_;
|
||||
DeletedFileSet deleted_files_;
|
||||
std::vector< std::pair<int, FileMetaData> > new_files_;
|
||||
std::vector<std::pair<int, FileMetaData> > new_files_;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -17,7 +17,7 @@ static void TestEncodeDecode(const VersionEdit& edit) {
|
||||
ASSERT_EQ(encoded, encoded2);
|
||||
}
|
||||
|
||||
class VersionEditTest { };
|
||||
class VersionEditTest {};
|
||||
|
||||
TEST(VersionEditTest, EncodeDecode) {
|
||||
static const uint64_t kBig = 1ull << 50;
|
||||
@ -41,6 +41,4 @@ TEST(VersionEditTest, EncodeDecode) {
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
File diff suppressed because it is too large
Load Diff
129
db/version_set.h
129
db/version_set.h
@ -18,6 +18,7 @@
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "db/version_edit.h"
|
||||
#include "port/port.h"
|
||||
@ -25,7 +26,9 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
namespace log { class Writer; }
|
||||
namespace log {
|
||||
class Writer;
|
||||
}
|
||||
|
||||
class Compaction;
|
||||
class Iterator;
|
||||
@ -39,30 +42,23 @@ class WritableFile;
|
||||
// Return the smallest index i such that files[i]->largest >= key.
|
||||
// Return files.size() if there is no such file.
|
||||
// REQUIRES: "files" contains a sorted list of non-overlapping files.
|
||||
extern int FindFile(const InternalKeyComparator& icmp,
|
||||
const std::vector<FileMetaData*>& files,
|
||||
const Slice& key);
|
||||
int FindFile(const InternalKeyComparator& icmp,
|
||||
const std::vector<FileMetaData*>& files, const Slice& key);
|
||||
|
||||
// Returns true iff some file in "files" overlaps the user key range
|
||||
// [*smallest,*largest].
|
||||
// smallest==NULL represents a key smaller than all keys in the DB.
|
||||
// largest==NULL represents a key largest than all keys in the DB.
|
||||
// smallest==nullptr represents a key smaller than all keys in the DB.
|
||||
// largest==nullptr represents a key largest than all keys in the DB.
|
||||
// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
|
||||
// in sorted order.
|
||||
extern bool SomeFileOverlapsRange(
|
||||
const InternalKeyComparator& icmp,
|
||||
bool disjoint_sorted_files,
|
||||
const std::vector<FileMetaData*>& files,
|
||||
const Slice* smallest_user_key,
|
||||
const Slice* largest_user_key);
|
||||
bool SomeFileOverlapsRange(const InternalKeyComparator& icmp,
|
||||
bool disjoint_sorted_files,
|
||||
const std::vector<FileMetaData*>& files,
|
||||
const Slice* smallest_user_key,
|
||||
const Slice* largest_user_key);
|
||||
|
||||
class Version {
|
||||
public:
|
||||
// Append to *iters a sequence of iterators that will
|
||||
// yield the contents of this Version when merged together.
|
||||
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
||||
void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
|
||||
|
||||
// Lookup the value for key. If found, store it in *val and
|
||||
// return OK. Else return a non-OK status. Fills *stats.
|
||||
// REQUIRES: lock is not held
|
||||
@ -70,6 +66,12 @@ class Version {
|
||||
FileMetaData* seek_file;
|
||||
int seek_file_level;
|
||||
};
|
||||
|
||||
// Append to *iters a sequence of iterators that will
|
||||
// yield the contents of this Version when merged together.
|
||||
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
||||
void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
|
||||
|
||||
Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
||||
GetStats* stats);
|
||||
|
||||
@ -91,16 +93,15 @@ class Version {
|
||||
|
||||
void GetOverlappingInputs(
|
||||
int level,
|
||||
const InternalKey* begin, // NULL means before all keys
|
||||
const InternalKey* end, // NULL means after all keys
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
std::vector<FileMetaData*>* inputs);
|
||||
|
||||
// Returns true iff some file in the specified level overlaps
|
||||
// some part of [*smallest_user_key,*largest_user_key].
|
||||
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
|
||||
// largest_user_key==NULL represents a key largest than all keys in the DB.
|
||||
bool OverlapInLevel(int level,
|
||||
const Slice* smallest_user_key,
|
||||
// smallest_user_key==nullptr represents a key smaller than all the DB's keys.
|
||||
// largest_user_key==nullptr represents a key largest than all the DB's keys.
|
||||
bool OverlapInLevel(int level, const Slice* smallest_user_key,
|
||||
const Slice* largest_user_key);
|
||||
|
||||
// Return the level at which we should place a new memtable compaction
|
||||
@ -118,6 +119,22 @@ class Version {
|
||||
friend class VersionSet;
|
||||
|
||||
class LevelFileNumIterator;
|
||||
|
||||
explicit Version(VersionSet* vset)
|
||||
: vset_(vset),
|
||||
next_(this),
|
||||
prev_(this),
|
||||
refs_(0),
|
||||
file_to_compact_(nullptr),
|
||||
file_to_compact_level_(-1),
|
||||
compaction_score_(-1),
|
||||
compaction_level_(-1) {}
|
||||
|
||||
Version(const Version&) = delete;
|
||||
Version& operator=(const Version&) = delete;
|
||||
|
||||
~Version();
|
||||
|
||||
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
|
||||
|
||||
// Call func(arg, level, f) for every file that overlaps user_key in
|
||||
@ -125,14 +142,13 @@ class Version {
|
||||
// false, makes no more calls.
|
||||
//
|
||||
// REQUIRES: user portion of internal_key == user_key.
|
||||
void ForEachOverlapping(Slice user_key, Slice internal_key,
|
||||
void* arg,
|
||||
void ForEachOverlapping(Slice user_key, Slice internal_key, void* arg,
|
||||
bool (*func)(void*, int, FileMetaData*));
|
||||
|
||||
VersionSet* vset_; // VersionSet to which this Version belongs
|
||||
Version* next_; // Next version in linked list
|
||||
Version* prev_; // Previous version in linked list
|
||||
int refs_; // Number of live refs to this version
|
||||
VersionSet* vset_; // VersionSet to which this Version belongs
|
||||
Version* next_; // Next version in linked list
|
||||
Version* prev_; // Previous version in linked list
|
||||
int refs_; // Number of live refs to this version
|
||||
|
||||
// List of files per level
|
||||
std::vector<FileMetaData*> files_[config::kNumLevels];
|
||||
@ -146,28 +162,15 @@ class Version {
|
||||
// are initialized by Finalize().
|
||||
double compaction_score_;
|
||||
int compaction_level_;
|
||||
|
||||
explicit Version(VersionSet* vset)
|
||||
: vset_(vset), next_(this), prev_(this), refs_(0),
|
||||
file_to_compact_(NULL),
|
||||
file_to_compact_level_(-1),
|
||||
compaction_score_(-1),
|
||||
compaction_level_(-1) {
|
||||
}
|
||||
|
||||
~Version();
|
||||
|
||||
// No copying allowed
|
||||
Version(const Version&);
|
||||
void operator=(const Version&);
|
||||
};
|
||||
|
||||
class VersionSet {
|
||||
public:
|
||||
VersionSet(const std::string& dbname,
|
||||
const Options* options,
|
||||
TableCache* table_cache,
|
||||
const InternalKeyComparator*);
|
||||
VersionSet(const std::string& dbname, const Options* options,
|
||||
TableCache* table_cache, const InternalKeyComparator*);
|
||||
VersionSet(const VersionSet&) = delete;
|
||||
VersionSet& operator=(const VersionSet&) = delete;
|
||||
|
||||
~VersionSet();
|
||||
|
||||
// Apply *edit to the current version to form a new descriptor that
|
||||
@ -179,7 +182,7 @@ class VersionSet {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Recover the last saved descriptor from persistent storage.
|
||||
Status Recover();
|
||||
Status Recover(bool* save_manifest);
|
||||
|
||||
// Return the current version.
|
||||
Version* current() const { return current_; }
|
||||
@ -225,19 +228,17 @@ class VersionSet {
|
||||
uint64_t PrevLogNumber() const { return prev_log_number_; }
|
||||
|
||||
// Pick level and inputs for a new compaction.
|
||||
// Returns NULL if there is no compaction to be done.
|
||||
// Returns nullptr if there is no compaction to be done.
|
||||
// Otherwise returns a pointer to a heap-allocated object that
|
||||
// describes the compaction. Caller should delete the result.
|
||||
Compaction* PickCompaction();
|
||||
|
||||
// Return a compaction object for compacting the range [begin,end] in
|
||||
// the specified level. Returns NULL if there is nothing in that
|
||||
// the specified level. Returns nullptr if there is nothing in that
|
||||
// level that overlaps the specified range. Caller should delete
|
||||
// the result.
|
||||
Compaction* CompactRange(
|
||||
int level,
|
||||
const InternalKey* begin,
|
||||
const InternalKey* end);
|
||||
Compaction* CompactRange(int level, const InternalKey* begin,
|
||||
const InternalKey* end);
|
||||
|
||||
// Return the maximum overlapping data (in bytes) at next level for any
|
||||
// file at a level >= 1.
|
||||
@ -250,7 +251,7 @@ class VersionSet {
|
||||
// Returns true iff some level needs a compaction.
|
||||
bool NeedsCompaction() const {
|
||||
Version* v = current_;
|
||||
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL);
|
||||
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr);
|
||||
}
|
||||
|
||||
// Add all files listed in any live version to *live.
|
||||
@ -274,16 +275,16 @@ class VersionSet {
|
||||
friend class Compaction;
|
||||
friend class Version;
|
||||
|
||||
bool ReuseManifest(const std::string& dscname, const std::string& dscbase);
|
||||
|
||||
void Finalize(Version* v);
|
||||
|
||||
void GetRange(const std::vector<FileMetaData*>& inputs,
|
||||
InternalKey* smallest,
|
||||
void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest,
|
||||
InternalKey* largest);
|
||||
|
||||
void GetRange2(const std::vector<FileMetaData*>& inputs1,
|
||||
const std::vector<FileMetaData*>& inputs2,
|
||||
InternalKey* smallest,
|
||||
InternalKey* largest);
|
||||
InternalKey* smallest, InternalKey* largest);
|
||||
|
||||
void SetupOtherInputs(Compaction* c);
|
||||
|
||||
@ -312,10 +313,6 @@ class VersionSet {
|
||||
// Per-level key at which the next compaction at that level should start.
|
||||
// Either an empty string, or a valid InternalKey.
|
||||
std::string compact_pointer_[config::kNumLevels];
|
||||
|
||||
// No copying allowed
|
||||
VersionSet(const VersionSet&);
|
||||
void operator=(const VersionSet&);
|
||||
};
|
||||
|
||||
// A Compaction encapsulates information about a compaction.
|
||||
@ -364,7 +361,7 @@ class Compaction {
|
||||
friend class Version;
|
||||
friend class VersionSet;
|
||||
|
||||
explicit Compaction(int level);
|
||||
Compaction(const Options* options, int level);
|
||||
|
||||
int level_;
|
||||
uint64_t max_output_file_size_;
|
||||
@ -372,9 +369,9 @@ class Compaction {
|
||||
VersionEdit edit_;
|
||||
|
||||
// Each compaction reads inputs from "level_" and "level_+1"
|
||||
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
|
||||
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
|
||||
|
||||
// State used to check for number of of overlapping grandparent files
|
||||
// State used to check for number of overlapping grandparent files
|
||||
// (parent == level_ + 1, grandparent == level_ + 2)
|
||||
std::vector<FileMetaData*> grandparents_;
|
||||
size_t grandparent_index_; // Index in grandparent_starts_
|
||||
|
@ -11,10 +11,7 @@ namespace leveldb {
|
||||
|
||||
class FindFileTest {
|
||||
public:
|
||||
std::vector<FileMetaData*> files_;
|
||||
bool disjoint_sorted_files_;
|
||||
|
||||
FindFileTest() : disjoint_sorted_files_(true) { }
|
||||
FindFileTest() : disjoint_sorted_files_(true) {}
|
||||
|
||||
~FindFileTest() {
|
||||
for (int i = 0; i < files_.size(); i++) {
|
||||
@ -40,20 +37,25 @@ class FindFileTest {
|
||||
|
||||
bool Overlaps(const char* smallest, const char* largest) {
|
||||
InternalKeyComparator cmp(BytewiseComparator());
|
||||
Slice s(smallest != NULL ? smallest : "");
|
||||
Slice l(largest != NULL ? largest : "");
|
||||
Slice s(smallest != nullptr ? smallest : "");
|
||||
Slice l(largest != nullptr ? largest : "");
|
||||
return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_,
|
||||
(smallest != NULL ? &s : NULL),
|
||||
(largest != NULL ? &l : NULL));
|
||||
(smallest != nullptr ? &s : nullptr),
|
||||
(largest != nullptr ? &l : nullptr));
|
||||
}
|
||||
|
||||
bool disjoint_sorted_files_;
|
||||
|
||||
private:
|
||||
std::vector<FileMetaData*> files_;
|
||||
};
|
||||
|
||||
TEST(FindFileTest, Empty) {
|
||||
ASSERT_EQ(0, Find("foo"));
|
||||
ASSERT_TRUE(! Overlaps("a", "z"));
|
||||
ASSERT_TRUE(! Overlaps(NULL, "z"));
|
||||
ASSERT_TRUE(! Overlaps("a", NULL));
|
||||
ASSERT_TRUE(! Overlaps(NULL, NULL));
|
||||
ASSERT_TRUE(!Overlaps("a", "z"));
|
||||
ASSERT_TRUE(!Overlaps(nullptr, "z"));
|
||||
ASSERT_TRUE(!Overlaps("a", nullptr));
|
||||
ASSERT_TRUE(!Overlaps(nullptr, nullptr));
|
||||
}
|
||||
|
||||
TEST(FindFileTest, Single) {
|
||||
@ -65,8 +67,8 @@ TEST(FindFileTest, Single) {
|
||||
ASSERT_EQ(1, Find("q1"));
|
||||
ASSERT_EQ(1, Find("z"));
|
||||
|
||||
ASSERT_TRUE(! Overlaps("a", "b"));
|
||||
ASSERT_TRUE(! Overlaps("z1", "z2"));
|
||||
ASSERT_TRUE(!Overlaps("a", "b"));
|
||||
ASSERT_TRUE(!Overlaps("z1", "z2"));
|
||||
ASSERT_TRUE(Overlaps("a", "p"));
|
||||
ASSERT_TRUE(Overlaps("a", "q"));
|
||||
ASSERT_TRUE(Overlaps("a", "z"));
|
||||
@ -78,15 +80,14 @@ TEST(FindFileTest, Single) {
|
||||
ASSERT_TRUE(Overlaps("q", "q"));
|
||||
ASSERT_TRUE(Overlaps("q", "q1"));
|
||||
|
||||
ASSERT_TRUE(! Overlaps(NULL, "j"));
|
||||
ASSERT_TRUE(! Overlaps("r", NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, "p"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "p1"));
|
||||
ASSERT_TRUE(Overlaps("q", NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, NULL));
|
||||
ASSERT_TRUE(!Overlaps(nullptr, "j"));
|
||||
ASSERT_TRUE(!Overlaps("r", nullptr));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "p"));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "p1"));
|
||||
ASSERT_TRUE(Overlaps("q", nullptr));
|
||||
ASSERT_TRUE(Overlaps(nullptr, nullptr));
|
||||
}
|
||||
|
||||
|
||||
TEST(FindFileTest, Multiple) {
|
||||
Add("150", "200");
|
||||
Add("200", "250");
|
||||
@ -110,10 +111,10 @@ TEST(FindFileTest, Multiple) {
|
||||
ASSERT_EQ(3, Find("450"));
|
||||
ASSERT_EQ(4, Find("451"));
|
||||
|
||||
ASSERT_TRUE(! Overlaps("100", "149"));
|
||||
ASSERT_TRUE(! Overlaps("251", "299"));
|
||||
ASSERT_TRUE(! Overlaps("451", "500"));
|
||||
ASSERT_TRUE(! Overlaps("351", "399"));
|
||||
ASSERT_TRUE(!Overlaps("100", "149"));
|
||||
ASSERT_TRUE(!Overlaps("251", "299"));
|
||||
ASSERT_TRUE(!Overlaps("451", "500"));
|
||||
ASSERT_TRUE(!Overlaps("351", "399"));
|
||||
|
||||
ASSERT_TRUE(Overlaps("100", "150"));
|
||||
ASSERT_TRUE(Overlaps("100", "200"));
|
||||
@ -130,25 +131,25 @@ TEST(FindFileTest, MultipleNullBoundaries) {
|
||||
Add("200", "250");
|
||||
Add("300", "350");
|
||||
Add("400", "450");
|
||||
ASSERT_TRUE(! Overlaps(NULL, "149"));
|
||||
ASSERT_TRUE(! Overlaps("451", NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, "150"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "199"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "200"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "201"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "400"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "800"));
|
||||
ASSERT_TRUE(Overlaps("100", NULL));
|
||||
ASSERT_TRUE(Overlaps("200", NULL));
|
||||
ASSERT_TRUE(Overlaps("449", NULL));
|
||||
ASSERT_TRUE(Overlaps("450", NULL));
|
||||
ASSERT_TRUE(!Overlaps(nullptr, "149"));
|
||||
ASSERT_TRUE(!Overlaps("451", nullptr));
|
||||
ASSERT_TRUE(Overlaps(nullptr, nullptr));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "150"));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "199"));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "200"));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "201"));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "400"));
|
||||
ASSERT_TRUE(Overlaps(nullptr, "800"));
|
||||
ASSERT_TRUE(Overlaps("100", nullptr));
|
||||
ASSERT_TRUE(Overlaps("200", nullptr));
|
||||
ASSERT_TRUE(Overlaps("449", nullptr));
|
||||
ASSERT_TRUE(Overlaps("450", nullptr));
|
||||
}
|
||||
|
||||
TEST(FindFileTest, OverlapSequenceChecks) {
|
||||
Add("200", "200", 5000, 3000);
|
||||
ASSERT_TRUE(! Overlaps("199", "199"));
|
||||
ASSERT_TRUE(! Overlaps("201", "300"));
|
||||
ASSERT_TRUE(!Overlaps("199", "199"));
|
||||
ASSERT_TRUE(!Overlaps("201", "300"));
|
||||
ASSERT_TRUE(Overlaps("200", "200"));
|
||||
ASSERT_TRUE(Overlaps("190", "200"));
|
||||
ASSERT_TRUE(Overlaps("200", "210"));
|
||||
@ -158,8 +159,8 @@ TEST(FindFileTest, OverlappingFiles) {
|
||||
Add("150", "600");
|
||||
Add("400", "500");
|
||||
disjoint_sorted_files_ = false;
|
||||
ASSERT_TRUE(! Overlaps("100", "149"));
|
||||
ASSERT_TRUE(! Overlaps("601", "700"));
|
||||
ASSERT_TRUE(!Overlaps("100", "149"));
|
||||
ASSERT_TRUE(!Overlaps("601", "700"));
|
||||
ASSERT_TRUE(Overlaps("100", "150"));
|
||||
ASSERT_TRUE(Overlaps("100", "200"));
|
||||
ASSERT_TRUE(Overlaps("100", "300"));
|
||||
@ -172,8 +173,160 @@ TEST(FindFileTest, OverlappingFiles) {
|
||||
ASSERT_TRUE(Overlaps("600", "700"));
|
||||
}
|
||||
|
||||
void AddBoundaryInputs(const InternalKeyComparator& icmp,
|
||||
const std::vector<FileMetaData*>& level_files,
|
||||
std::vector<FileMetaData*>* compaction_files);
|
||||
|
||||
class AddBoundaryInputsTest {
|
||||
public:
|
||||
std::vector<FileMetaData*> level_files_;
|
||||
std::vector<FileMetaData*> compaction_files_;
|
||||
std::vector<FileMetaData*> all_files_;
|
||||
InternalKeyComparator icmp_;
|
||||
|
||||
AddBoundaryInputsTest() : icmp_(BytewiseComparator()) {}
|
||||
|
||||
~AddBoundaryInputsTest() {
|
||||
for (size_t i = 0; i < all_files_.size(); ++i) {
|
||||
delete all_files_[i];
|
||||
}
|
||||
all_files_.clear();
|
||||
}
|
||||
|
||||
FileMetaData* CreateFileMetaData(uint64_t number, InternalKey smallest,
|
||||
InternalKey largest) {
|
||||
FileMetaData* f = new FileMetaData();
|
||||
f->number = number;
|
||||
f->smallest = smallest;
|
||||
f->largest = largest;
|
||||
all_files_.push_back(f);
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestEmptyFileSets) {
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_TRUE(compaction_files_.empty());
|
||||
ASSERT_TRUE(level_files_.empty());
|
||||
}
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestEmptyLevelFiles) {
|
||||
FileMetaData* f1 =
|
||||
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
|
||||
InternalKey(InternalKey("100", 1, kTypeValue)));
|
||||
compaction_files_.push_back(f1);
|
||||
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_EQ(1, compaction_files_.size());
|
||||
ASSERT_EQ(f1, compaction_files_[0]);
|
||||
ASSERT_TRUE(level_files_.empty());
|
||||
}
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestEmptyCompactionFiles) {
|
||||
FileMetaData* f1 =
|
||||
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
|
||||
InternalKey(InternalKey("100", 1, kTypeValue)));
|
||||
level_files_.push_back(f1);
|
||||
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_TRUE(compaction_files_.empty());
|
||||
ASSERT_EQ(1, level_files_.size());
|
||||
ASSERT_EQ(f1, level_files_[0]);
|
||||
}
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestNoBoundaryFiles) {
|
||||
FileMetaData* f1 =
|
||||
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
|
||||
InternalKey(InternalKey("100", 1, kTypeValue)));
|
||||
FileMetaData* f2 =
|
||||
CreateFileMetaData(1, InternalKey("200", 2, kTypeValue),
|
||||
InternalKey(InternalKey("200", 1, kTypeValue)));
|
||||
FileMetaData* f3 =
|
||||
CreateFileMetaData(1, InternalKey("300", 2, kTypeValue),
|
||||
InternalKey(InternalKey("300", 1, kTypeValue)));
|
||||
|
||||
level_files_.push_back(f3);
|
||||
level_files_.push_back(f2);
|
||||
level_files_.push_back(f1);
|
||||
compaction_files_.push_back(f2);
|
||||
compaction_files_.push_back(f3);
|
||||
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_EQ(2, compaction_files_.size());
|
||||
}
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestOneBoundaryFiles) {
|
||||
FileMetaData* f1 =
|
||||
CreateFileMetaData(1, InternalKey("100", 3, kTypeValue),
|
||||
InternalKey(InternalKey("100", 2, kTypeValue)));
|
||||
FileMetaData* f2 =
|
||||
CreateFileMetaData(1, InternalKey("100", 1, kTypeValue),
|
||||
InternalKey(InternalKey("200", 3, kTypeValue)));
|
||||
FileMetaData* f3 =
|
||||
CreateFileMetaData(1, InternalKey("300", 2, kTypeValue),
|
||||
InternalKey(InternalKey("300", 1, kTypeValue)));
|
||||
|
||||
level_files_.push_back(f3);
|
||||
level_files_.push_back(f2);
|
||||
level_files_.push_back(f1);
|
||||
compaction_files_.push_back(f1);
|
||||
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_EQ(2, compaction_files_.size());
|
||||
ASSERT_EQ(f1, compaction_files_[0]);
|
||||
ASSERT_EQ(f2, compaction_files_[1]);
|
||||
}
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestTwoBoundaryFiles) {
|
||||
FileMetaData* f1 =
|
||||
CreateFileMetaData(1, InternalKey("100", 6, kTypeValue),
|
||||
InternalKey(InternalKey("100", 5, kTypeValue)));
|
||||
FileMetaData* f2 =
|
||||
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
|
||||
InternalKey(InternalKey("300", 1, kTypeValue)));
|
||||
FileMetaData* f3 =
|
||||
CreateFileMetaData(1, InternalKey("100", 4, kTypeValue),
|
||||
InternalKey(InternalKey("100", 3, kTypeValue)));
|
||||
|
||||
level_files_.push_back(f2);
|
||||
level_files_.push_back(f3);
|
||||
level_files_.push_back(f1);
|
||||
compaction_files_.push_back(f1);
|
||||
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_EQ(3, compaction_files_.size());
|
||||
ASSERT_EQ(f1, compaction_files_[0]);
|
||||
ASSERT_EQ(f3, compaction_files_[1]);
|
||||
ASSERT_EQ(f2, compaction_files_[2]);
|
||||
}
|
||||
|
||||
TEST(AddBoundaryInputsTest, TestDisjoinFilePointers) {
|
||||
FileMetaData* f1 =
|
||||
CreateFileMetaData(1, InternalKey("100", 6, kTypeValue),
|
||||
InternalKey(InternalKey("100", 5, kTypeValue)));
|
||||
FileMetaData* f2 =
|
||||
CreateFileMetaData(1, InternalKey("100", 6, kTypeValue),
|
||||
InternalKey(InternalKey("100", 5, kTypeValue)));
|
||||
FileMetaData* f3 =
|
||||
CreateFileMetaData(1, InternalKey("100", 2, kTypeValue),
|
||||
InternalKey(InternalKey("300", 1, kTypeValue)));
|
||||
FileMetaData* f4 =
|
||||
CreateFileMetaData(1, InternalKey("100", 4, kTypeValue),
|
||||
InternalKey(InternalKey("100", 3, kTypeValue)));
|
||||
|
||||
level_files_.push_back(f2);
|
||||
level_files_.push_back(f3);
|
||||
level_files_.push_back(f4);
|
||||
|
||||
compaction_files_.push_back(f1);
|
||||
|
||||
AddBoundaryInputs(icmp_, level_files_, &compaction_files_);
|
||||
ASSERT_EQ(3, compaction_files_.size());
|
||||
ASSERT_EQ(f1, compaction_files_[0]);
|
||||
ASSERT_EQ(f4, compaction_files_[1]);
|
||||
ASSERT_EQ(f3, compaction_files_[2]);
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -15,10 +15,10 @@
|
||||
|
||||
#include "leveldb/write_batch.h"
|
||||
|
||||
#include "leveldb/db.h"
|
||||
#include "db/dbformat.h"
|
||||
#include "db/memtable.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace leveldb {
|
||||
@ -26,19 +26,19 @@ namespace leveldb {
|
||||
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
|
||||
static const size_t kHeader = 12;
|
||||
|
||||
WriteBatch::WriteBatch() {
|
||||
Clear();
|
||||
}
|
||||
WriteBatch::WriteBatch() { Clear(); }
|
||||
|
||||
WriteBatch::~WriteBatch() { }
|
||||
WriteBatch::~WriteBatch() = default;
|
||||
|
||||
WriteBatch::Handler::~Handler() { }
|
||||
WriteBatch::Handler::~Handler() = default;
|
||||
|
||||
void WriteBatch::Clear() {
|
||||
rep_.clear();
|
||||
rep_.resize(kHeader);
|
||||
}
|
||||
|
||||
size_t WriteBatch::ApproximateSize() const { return rep_.size(); }
|
||||
|
||||
Status WriteBatch::Iterate(Handler* handler) const {
|
||||
Slice input(rep_);
|
||||
if (input.size() < kHeader) {
|
||||
@ -108,25 +108,28 @@ void WriteBatch::Delete(const Slice& key) {
|
||||
PutLengthPrefixedSlice(&rep_, key);
|
||||
}
|
||||
|
||||
void WriteBatch::Append(const WriteBatch& source) {
|
||||
WriteBatchInternal::Append(this, &source);
|
||||
}
|
||||
|
||||
namespace {
|
||||
class MemTableInserter : public WriteBatch::Handler {
|
||||
public:
|
||||
SequenceNumber sequence_;
|
||||
MemTable* mem_;
|
||||
|
||||
virtual void Put(const Slice& key, const Slice& value) {
|
||||
void Put(const Slice& key, const Slice& value) override {
|
||||
mem_->Add(sequence_, kTypeValue, key, value);
|
||||
sequence_++;
|
||||
}
|
||||
virtual void Delete(const Slice& key) {
|
||||
void Delete(const Slice& key) override {
|
||||
mem_->Add(sequence_, kTypeDeletion, key, Slice());
|
||||
sequence_++;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
||||
MemTable* memtable) {
|
||||
Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* memtable) {
|
||||
MemTableInserter inserter;
|
||||
inserter.sequence_ = WriteBatchInternal::Sequence(b);
|
||||
inserter.mem_ = memtable;
|
||||
|
@ -29,13 +29,9 @@ class WriteBatchInternal {
|
||||
// this batch.
|
||||
static void SetSequence(WriteBatch* batch, SequenceNumber seq);
|
||||
|
||||
static Slice Contents(const WriteBatch* batch) {
|
||||
return Slice(batch->rep_);
|
||||
}
|
||||
static Slice Contents(const WriteBatch* batch) { return Slice(batch->rep_); }
|
||||
|
||||
static size_t ByteSize(const WriteBatch* batch) {
|
||||
return batch->rep_.size();
|
||||
}
|
||||
static size_t ByteSize(const WriteBatch* batch) { return batch->rep_.size(); }
|
||||
|
||||
static void SetContents(WriteBatch* batch, const Slice& contents);
|
||||
|
||||
@ -46,5 +42,4 @@ class WriteBatchInternal {
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
||||
#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
|
||||
|
@ -52,7 +52,7 @@ static std::string PrintContents(WriteBatch* b) {
|
||||
return state;
|
||||
}
|
||||
|
||||
class WriteBatchTest { };
|
||||
class WriteBatchTest {};
|
||||
|
||||
TEST(WriteBatchTest, Empty) {
|
||||
WriteBatch batch;
|
||||
@ -68,10 +68,11 @@ TEST(WriteBatchTest, Multiple) {
|
||||
WriteBatchInternal::SetSequence(&batch, 100);
|
||||
ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
|
||||
ASSERT_EQ(3, WriteBatchInternal::Count(&batch));
|
||||
ASSERT_EQ("Put(baz, boo)@102"
|
||||
"Delete(box)@101"
|
||||
"Put(foo, bar)@100",
|
||||
PrintContents(&batch));
|
||||
ASSERT_EQ(
|
||||
"Put(baz, boo)@102"
|
||||
"Delete(box)@101"
|
||||
"Put(foo, bar)@100",
|
||||
PrintContents(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, Corruption) {
|
||||
@ -81,40 +82,56 @@ TEST(WriteBatchTest, Corruption) {
|
||||
WriteBatchInternal::SetSequence(&batch, 200);
|
||||
Slice contents = WriteBatchInternal::Contents(&batch);
|
||||
WriteBatchInternal::SetContents(&batch,
|
||||
Slice(contents.data(),contents.size()-1));
|
||||
ASSERT_EQ("Put(foo, bar)@200"
|
||||
"ParseError()",
|
||||
PrintContents(&batch));
|
||||
Slice(contents.data(), contents.size() - 1));
|
||||
ASSERT_EQ(
|
||||
"Put(foo, bar)@200"
|
||||
"ParseError()",
|
||||
PrintContents(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, Append) {
|
||||
WriteBatch b1, b2;
|
||||
WriteBatchInternal::SetSequence(&b1, 200);
|
||||
WriteBatchInternal::SetSequence(&b2, 300);
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("",
|
||||
PrintContents(&b1));
|
||||
b1.Append(b2);
|
||||
ASSERT_EQ("", PrintContents(&b1));
|
||||
b2.Put("a", "va");
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("Put(a, va)@200",
|
||||
PrintContents(&b1));
|
||||
b1.Append(b2);
|
||||
ASSERT_EQ("Put(a, va)@200", PrintContents(&b1));
|
||||
b2.Clear();
|
||||
b2.Put("b", "vb");
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("Put(a, va)@200"
|
||||
"Put(b, vb)@201",
|
||||
PrintContents(&b1));
|
||||
b1.Append(b2);
|
||||
ASSERT_EQ(
|
||||
"Put(a, va)@200"
|
||||
"Put(b, vb)@201",
|
||||
PrintContents(&b1));
|
||||
b2.Delete("foo");
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("Put(a, va)@200"
|
||||
"Put(b, vb)@202"
|
||||
"Put(b, vb)@201"
|
||||
"Delete(foo)@203",
|
||||
PrintContents(&b1));
|
||||
b1.Append(b2);
|
||||
ASSERT_EQ(
|
||||
"Put(a, va)@200"
|
||||
"Put(b, vb)@202"
|
||||
"Put(b, vb)@201"
|
||||
"Delete(foo)@203",
|
||||
PrintContents(&b1));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, ApproximateSize) {
|
||||
WriteBatch batch;
|
||||
size_t empty_size = batch.ApproximateSize();
|
||||
|
||||
batch.Put(Slice("foo"), Slice("bar"));
|
||||
size_t one_key_size = batch.ApproximateSize();
|
||||
ASSERT_LT(empty_size, one_key_size);
|
||||
|
||||
batch.Put(Slice("baz"), Slice("boo"));
|
||||
size_t two_keys_size = batch.ApproximateSize();
|
||||
ASSERT_LT(one_key_size, two_keys_size);
|
||||
|
||||
batch.Delete(Slice("box"));
|
||||
size_t post_delete_size = batch.ApproximateSize();
|
||||
ASSERT_LT(two_keys_size, post_delete_size);
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -90,9 +90,9 @@ div.bsql {
|
||||
<h4>Benchmark Source Code</h4>
|
||||
<p>We wrote benchmark tools for SQLite and Kyoto TreeDB based on LevelDB's <span class="code">db_bench</span>. The code for each of the benchmarks resides here:</p>
|
||||
<ul>
|
||||
<li> <b>LevelDB:</b> <a href="http://code.google.com/p/leveldb/source/browse/trunk/db/db_bench.cc">db/db_bench.cc</a>.</li>
|
||||
<li> <b>SQLite:</b> <a href="http://code.google.com/p/leveldb/source/browse/#svn%2Ftrunk%2Fdoc%2Fbench%2Fdb_bench_sqlite3.cc">doc/bench/db_bench_sqlite3.cc</a>.</li>
|
||||
<li> <b>Kyoto TreeDB:</b> <a href="http://code.google.com/p/leveldb/source/browse/#svn%2Ftrunk%2Fdoc%2Fbench%2Fdb_bench_tree_db.cc">doc/bench/db_bench_tree_db.cc</a>.</li>
|
||||
<li> <b>LevelDB:</b> <a href="https://github.com/google/leveldb/blob/master/benchmarks/db_bench.cc">benchmarks/db_bench.cc</a>.</li>
|
||||
<li> <b>SQLite:</b> <a href="https://github.com/google/leveldb/blob/master/benchmarks/db_bench_sqlite3.cc">benchmarks/db_bench_sqlite3.cc</a>.</li>
|
||||
<li> <b>Kyoto TreeDB:</b> <a href="https://github.com/google/leveldb/blob/master/benchmarks/db_bench_tree_db.cc">benchmarks/db_bench_tree_db.cc</a>.</li>
|
||||
</ul>
|
||||
|
||||
<h4>Custom Build Specifications</h4>
|
||||
|
89
doc/doc.css
89
doc/doc.css
@ -1,89 +0,0 @@
|
||||
body {
|
||||
margin-left: 0.5in;
|
||||
margin-right: 0.5in;
|
||||
background: white;
|
||||
color: black;
|
||||
}
|
||||
|
||||
h1 {
|
||||
margin-left: -0.2in;
|
||||
font-size: 14pt;
|
||||
}
|
||||
h2 {
|
||||
margin-left: -0in;
|
||||
font-size: 12pt;
|
||||
}
|
||||
h3 {
|
||||
margin-left: -0in;
|
||||
}
|
||||
h4 {
|
||||
margin-left: -0in;
|
||||
}
|
||||
hr {
|
||||
margin-left: -0in;
|
||||
}
|
||||
|
||||
/* Definition lists: definition term bold */
|
||||
dt {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
address {
|
||||
text-align: center;
|
||||
}
|
||||
code,samp,var {
|
||||
color: blue;
|
||||
}
|
||||
kbd {
|
||||
color: #600000;
|
||||
}
|
||||
div.note p {
|
||||
float: right;
|
||||
width: 3in;
|
||||
margin-right: 0%;
|
||||
padding: 1px;
|
||||
border: 2px solid #6060a0;
|
||||
background-color: #fffff0;
|
||||
}
|
||||
|
||||
ul {
|
||||
margin-top: -0em;
|
||||
margin-bottom: -0em;
|
||||
}
|
||||
|
||||
ol {
|
||||
margin-top: -0em;
|
||||
margin-bottom: -0em;
|
||||
}
|
||||
|
||||
UL.nobullets {
|
||||
list-style-type: none;
|
||||
list-style-image: none;
|
||||
margin-left: -1em;
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 1em 0 1em 0;
|
||||
padding: 0 0 0 0;
|
||||
}
|
||||
|
||||
pre {
|
||||
line-height: 1.3em;
|
||||
padding: 0.4em 0 0.8em 0;
|
||||
margin: 0 0 0 0;
|
||||
border: 0 0 0 0;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
.datatable {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
margin-top: 2em;
|
||||
margin-bottom: 2em;
|
||||
border: 1px solid;
|
||||
}
|
||||
|
||||
.datatable td,th {
|
||||
padding: 0 0.5em 0 0.5em;
|
||||
text-align: right;
|
||||
}
|
213
doc/impl.html
213
doc/impl.html
@ -1,213 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" type="text/css" href="doc.css" />
|
||||
<title>Leveldb file layout and compactions</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<h1>Files</h1>
|
||||
|
||||
The implementation of leveldb is similar in spirit to the
|
||||
representation of a single
|
||||
<a href="http://research.google.com/archive/bigtable.html">
|
||||
Bigtable tablet (section 5.3)</a>.
|
||||
However the organization of the files that make up the representation
|
||||
is somewhat different and is explained below.
|
||||
|
||||
<p>
|
||||
Each database is represented by a set of files stored in a directory.
|
||||
There are several different types of files as documented below:
|
||||
<p>
|
||||
<h2>Log files</h2>
|
||||
<p>
|
||||
A log file (*.log) stores a sequence of recent updates. Each update
|
||||
is appended to the current log file. When the log file reaches a
|
||||
pre-determined size (approximately 4MB by default), it is converted
|
||||
to a sorted table (see below) and a new log file is created for future
|
||||
updates.
|
||||
<p>
|
||||
A copy of the current log file is kept in an in-memory structure (the
|
||||
<code>memtable</code>). This copy is consulted on every read so that read
|
||||
operations reflect all logged updates.
|
||||
<p>
|
||||
<h2>Sorted tables</h2>
|
||||
<p>
|
||||
A sorted table (*.sst) stores a sequence of entries sorted by key.
|
||||
Each entry is either a value for the key, or a deletion marker for the
|
||||
key. (Deletion markers are kept around to hide obsolete values
|
||||
present in older sorted tables).
|
||||
<p>
|
||||
The set of sorted tables are organized into a sequence of levels. The
|
||||
sorted table generated from a log file is placed in a special <code>young</code>
|
||||
level (also called level-0). When the number of young files exceeds a
|
||||
certain threshold (currently four), all of the young files are merged
|
||||
together with all of the overlapping level-1 files to produce a
|
||||
sequence of new level-1 files (we create a new level-1 file for every
|
||||
2MB of data.)
|
||||
<p>
|
||||
Files in the young level may contain overlapping keys. However files
|
||||
in other levels have distinct non-overlapping key ranges. Consider
|
||||
level number L where L >= 1. When the combined size of files in
|
||||
level-L exceeds (10^L) MB (i.e., 10MB for level-1, 100MB for level-2,
|
||||
...), one file in level-L, and all of the overlapping files in
|
||||
level-(L+1) are merged to form a set of new files for level-(L+1).
|
||||
These merges have the effect of gradually migrating new updates from
|
||||
the young level to the largest level using only bulk reads and writes
|
||||
(i.e., minimizing expensive seeks).
|
||||
|
||||
<h2>Manifest</h2>
|
||||
<p>
|
||||
A MANIFEST file lists the set of sorted tables that make up each
|
||||
level, the corresponding key ranges, and other important metadata.
|
||||
A new MANIFEST file (with a new number embedded in the file name)
|
||||
is created whenever the database is reopened. The MANIFEST file is
|
||||
formatted as a log, and changes made to the serving state (as files
|
||||
are added or removed) are appended to this log.
|
||||
<p>
|
||||
<h2>Current</h2>
|
||||
<p>
|
||||
CURRENT is a simple text file that contains the name of the latest
|
||||
MANIFEST file.
|
||||
<p>
|
||||
<h2>Info logs</h2>
|
||||
<p>
|
||||
Informational messages are printed to files named LOG and LOG.old.
|
||||
<p>
|
||||
<h2>Others</h2>
|
||||
<p>
|
||||
Other files used for miscellaneous purposes may also be present
|
||||
(LOCK, *.dbtmp).
|
||||
|
||||
<h1>Level 0</h1>
|
||||
When the log file grows above a certain size (1MB by default):
|
||||
<ul>
|
||||
<li>Create a brand new memtable and log file and direct future updates here
|
||||
<li>In the background:
|
||||
<ul>
|
||||
<li>Write the contents of the previous memtable to an sstable
|
||||
<li>Discard the memtable
|
||||
<li>Delete the old log file and the old memtable
|
||||
<li>Add the new sstable to the young (level-0) level.
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
<h1>Compactions</h1>
|
||||
|
||||
<p>
|
||||
When the size of level L exceeds its limit, we compact it in a
|
||||
background thread. The compaction picks a file from level L and all
|
||||
overlapping files from the next level L+1. Note that if a level-L
|
||||
file overlaps only part of a level-(L+1) file, the entire file at
|
||||
level-(L+1) is used as an input to the compaction and will be
|
||||
discarded after the compaction. Aside: because level-0 is special
|
||||
(files in it may overlap each other), we treat compactions from
|
||||
level-0 to level-1 specially: a level-0 compaction may pick more than
|
||||
one level-0 file in case some of these files overlap each other.
|
||||
|
||||
<p>
|
||||
A compaction merges the contents of the picked files to produce a
|
||||
sequence of level-(L+1) files. We switch to producing a new
|
||||
level-(L+1) file after the current output file has reached the target
|
||||
file size (2MB). We also switch to a new output file when the key
|
||||
range of the current output file has grown enough to overlap more than
|
||||
ten level-(L+2) files. This last rule ensures that a later compaction
|
||||
of a level-(L+1) file will not pick up too much data from level-(L+2).
|
||||
|
||||
<p>
|
||||
The old files are discarded and the new files are added to the serving
|
||||
state.
|
||||
|
||||
<p>
|
||||
Compactions for a particular level rotate through the key space. In
|
||||
more detail, for each level L, we remember the ending key of the last
|
||||
compaction at level L. The next compaction for level L will pick the
|
||||
first file that starts after this key (wrapping around to the
|
||||
beginning of the key space if there is no such file).
|
||||
|
||||
<p>
|
||||
Compactions drop overwritten values. They also drop deletion markers
|
||||
if there are no higher numbered levels that contain a file whose range
|
||||
overlaps the current key.
|
||||
|
||||
<h2>Timing</h2>
|
||||
|
||||
Level-0 compactions will read up to four 1MB files from level-0, and
|
||||
at worst all the level-1 files (10MB). I.e., we will read 14MB and
|
||||
write 14MB.
|
||||
|
||||
<p>
|
||||
Other than the special level-0 compactions, we will pick one 2MB file
|
||||
from level L. In the worst case, this will overlap ~ 12 files from
|
||||
level L+1 (10 because level-(L+1) is ten times the size of level-L,
|
||||
and another two at the boundaries since the file ranges at level-L
|
||||
will usually not be aligned with the file ranges at level-L+1). The
|
||||
compaction will therefore read 26MB and write 26MB. Assuming a disk
|
||||
IO rate of 100MB/s (ballpark range for modern drives), the worst
|
||||
compaction cost will be approximately 0.5 second.
|
||||
|
||||
<p>
|
||||
If we throttle the background writing to something small, say 10% of
|
||||
the full 100MB/s speed, a compaction may take up to 5 seconds. If the
|
||||
user is writing at 10MB/s, we might build up lots of level-0 files
|
||||
(~50 to hold the 5*10MB). This may significantly increase the cost of
|
||||
reads due to the overhead of merging more files together on every
|
||||
read.
|
||||
|
||||
<p>
|
||||
Solution 1: To reduce this problem, we might want to increase the log
|
||||
switching threshold when the number of level-0 files is large. Though
|
||||
the downside is that the larger this threshold, the more memory we will
|
||||
need to hold the corresponding memtable.
|
||||
|
||||
<p>
|
||||
Solution 2: We might want to decrease write rate artificially when the
|
||||
number of level-0 files goes up.
|
||||
|
||||
<p>
|
||||
Solution 3: We work on reducing the cost of very wide merges.
|
||||
Perhaps most of the level-0 files will have their blocks sitting
|
||||
uncompressed in the cache and we will only need to worry about the
|
||||
O(N) complexity in the merging iterator.
|
||||
|
||||
<h2>Number of files</h2>
|
||||
|
||||
Instead of always making 2MB files, we could make larger files for
|
||||
larger levels to reduce the total file count, though at the expense of
|
||||
more bursty compactions. Alternatively, we could shard the set of
|
||||
files into multiple directories.
|
||||
|
||||
<p>
|
||||
An experiment on an <code>ext3</code> filesystem on Feb 04, 2011 shows
|
||||
the following timings to do 100K file opens in directories with
|
||||
varying number of files:
|
||||
<table class="datatable">
|
||||
<tr><th>Files in directory</th><th>Microseconds to open a file</th></tr>
|
||||
<tr><td>1000</td><td>9</td>
|
||||
<tr><td>10000</td><td>10</td>
|
||||
<tr><td>100000</td><td>16</td>
|
||||
</table>
|
||||
So maybe even the sharding is not necessary on modern filesystems?
|
||||
|
||||
<h1>Recovery</h1>
|
||||
|
||||
<ul>
|
||||
<li> Read CURRENT to find name of the latest committed MANIFEST
|
||||
<li> Read the named MANIFEST file
|
||||
<li> Clean up stale files
|
||||
<li> We could open all sstables here, but it is probably better to be lazy...
|
||||
<li> Convert log chunk to a new level-0 sstable
|
||||
<li> Start directing new writes to a new log file with recovered sequence#
|
||||
</ul>
|
||||
|
||||
<h1>Garbage collection of files</h1>
|
||||
|
||||
<code>DeleteObsoleteFiles()</code> is called at the end of every
|
||||
compaction and at the end of recovery. It finds the names of all
|
||||
files in the database. It deletes all log files that are not the
|
||||
current log file. It deletes all table files that are not referenced
|
||||
from some level and are not the output of an active compaction.
|
||||
|
||||
</body>
|
||||
</html>
|
172
doc/impl.md
Normal file
172
doc/impl.md
Normal file
@ -0,0 +1,172 @@
|
||||
## Files
|
||||
|
||||
The implementation of leveldb is similar in spirit to the representation of a
|
||||
single [Bigtable tablet (section 5.3)](http://research.google.com/archive/bigtable.html).
|
||||
However the organization of the files that make up the representation is
|
||||
somewhat different and is explained below.
|
||||
|
||||
Each database is represented by a set of files stored in a directory. There are
|
||||
several different types of files as documented below:
|
||||
|
||||
### Log files
|
||||
|
||||
A log file (*.log) stores a sequence of recent updates. Each update is appended
|
||||
to the current log file. When the log file reaches a pre-determined size
|
||||
(approximately 4MB by default), it is converted to a sorted table (see below)
|
||||
and a new log file is created for future updates.
|
||||
|
||||
A copy of the current log file is kept in an in-memory structure (the
|
||||
`memtable`). This copy is consulted on every read so that read operations
|
||||
reflect all logged updates.
|
||||
|
||||
## Sorted tables
|
||||
|
||||
A sorted table (*.ldb) stores a sequence of entries sorted by key. Each entry is
|
||||
either a value for the key, or a deletion marker for the key. (Deletion markers
|
||||
are kept around to hide obsolete values present in older sorted tables).
|
||||
|
||||
The set of sorted tables are organized into a sequence of levels. The sorted
|
||||
table generated from a log file is placed in a special **young** level (also
|
||||
called level-0). When the number of young files exceeds a certain threshold
|
||||
(currently four), all of the young files are merged together with all of the
|
||||
overlapping level-1 files to produce a sequence of new level-1 files (we create
|
||||
a new level-1 file for every 2MB of data.)
|
||||
|
||||
Files in the young level may contain overlapping keys. However files in other
|
||||
levels have distinct non-overlapping key ranges. Consider level number L where
|
||||
L >= 1. When the combined size of files in level-L exceeds (10^L) MB (i.e., 10MB
|
||||
for level-1, 100MB for level-2, ...), one file in level-L, and all of the
|
||||
overlapping files in level-(L+1) are merged to form a set of new files for
|
||||
level-(L+1). These merges have the effect of gradually migrating new updates
|
||||
from the young level to the largest level using only bulk reads and writes
|
||||
(i.e., minimizing expensive seeks).
|
||||
|
||||
### Manifest
|
||||
|
||||
A MANIFEST file lists the set of sorted tables that make up each level, the
|
||||
corresponding key ranges, and other important metadata. A new MANIFEST file
|
||||
(with a new number embedded in the file name) is created whenever the database
|
||||
is reopened. The MANIFEST file is formatted as a log, and changes made to the
|
||||
serving state (as files are added or removed) are appended to this log.
|
||||
|
||||
### Current
|
||||
|
||||
CURRENT is a simple text file that contains the name of the latest MANIFEST
|
||||
file.
|
||||
|
||||
### Info logs
|
||||
|
||||
Informational messages are printed to files named LOG and LOG.old.
|
||||
|
||||
### Others
|
||||
|
||||
Other files used for miscellaneous purposes may also be present (LOCK, *.dbtmp).
|
||||
|
||||
## Level 0
|
||||
|
||||
When the log file grows above a certain size (4MB by default):
|
||||
Create a brand new memtable and log file and direct future updates here.
|
||||
|
||||
In the background:
|
||||
|
||||
1. Write the contents of the previous memtable to an sstable.
|
||||
2. Discard the memtable.
|
||||
3. Delete the old log file and the old memtable.
|
||||
4. Add the new sstable to the young (level-0) level.
|
||||
|
||||
## Compactions
|
||||
|
||||
When the size of level L exceeds its limit, we compact it in a background
|
||||
thread. The compaction picks a file from level L and all overlapping files from
|
||||
the next level L+1. Note that if a level-L file overlaps only part of a
|
||||
level-(L+1) file, the entire file at level-(L+1) is used as an input to the
|
||||
compaction and will be discarded after the compaction. Aside: because level-0
|
||||
is special (files in it may overlap each other), we treat compactions from
|
||||
level-0 to level-1 specially: a level-0 compaction may pick more than one
|
||||
level-0 file in case some of these files overlap each other.
|
||||
|
||||
A compaction merges the contents of the picked files to produce a sequence of
|
||||
level-(L+1) files. We switch to producing a new level-(L+1) file after the
|
||||
current output file has reached the target file size (2MB). We also switch to a
|
||||
new output file when the key range of the current output file has grown enough
|
||||
to overlap more than ten level-(L+2) files. This last rule ensures that a later
|
||||
compaction of a level-(L+1) file will not pick up too much data from
|
||||
level-(L+2).
|
||||
|
||||
The old files are discarded and the new files are added to the serving state.
|
||||
|
||||
Compactions for a particular level rotate through the key space. In more detail,
|
||||
for each level L, we remember the ending key of the last compaction at level L.
|
||||
The next compaction for level L will pick the first file that starts after this
|
||||
key (wrapping around to the beginning of the key space if there is no such
|
||||
file).
|
||||
|
||||
Compactions drop overwritten values. They also drop deletion markers if there
|
||||
are no higher numbered levels that contain a file whose range overlaps the
|
||||
current key.
|
||||
|
||||
### Timing
|
||||
|
||||
Level-0 compactions will read up to four 1MB files from level-0, and at worst
|
||||
all the level-1 files (10MB). I.e., we will read 14MB and write 14MB.
|
||||
|
||||
Other than the special level-0 compactions, we will pick one 2MB file from level
|
||||
L. In the worst case, this will overlap ~ 12 files from level L+1 (10 because
|
||||
level-(L+1) is ten times the size of level-L, and another two at the boundaries
|
||||
since the file ranges at level-L will usually not be aligned with the file
|
||||
ranges at level-L+1). The compaction will therefore read 26MB and write 26MB.
|
||||
Assuming a disk IO rate of 100MB/s (ballpark range for modern drives), the worst
|
||||
compaction cost will be approximately 0.5 second.
|
||||
|
||||
If we throttle the background writing to something small, say 10% of the full
|
||||
100MB/s speed, a compaction may take up to 5 seconds. If the user is writing at
|
||||
10MB/s, we might build up lots of level-0 files (~50 to hold the 5*10MB). This
|
||||
may significantly increase the cost of reads due to the overhead of merging more
|
||||
files together on every read.
|
||||
|
||||
Solution 1: To reduce this problem, we might want to increase the log switching
|
||||
threshold when the number of level-0 files is large. Though the downside is that
|
||||
the larger this threshold, the more memory we will need to hold the
|
||||
corresponding memtable.
|
||||
|
||||
Solution 2: We might want to decrease write rate artificially when the number of
|
||||
level-0 files goes up.
|
||||
|
||||
Solution 3: We work on reducing the cost of very wide merges. Perhaps most of
|
||||
the level-0 files will have their blocks sitting uncompressed in the cache and
|
||||
we will only need to worry about the O(N) complexity in the merging iterator.
|
||||
|
||||
### Number of files
|
||||
|
||||
Instead of always making 2MB files, we could make larger files for larger levels
|
||||
to reduce the total file count, though at the expense of more bursty
|
||||
compactions. Alternatively, we could shard the set of files into multiple
|
||||
directories.
|
||||
|
||||
An experiment on an ext3 filesystem on Feb 04, 2011 shows the following timings
|
||||
to do 100K file opens in directories with varying number of files:
|
||||
|
||||
|
||||
| Files in directory | Microseconds to open a file |
|
||||
|-------------------:|----------------------------:|
|
||||
| 1000 | 9 |
|
||||
| 10000 | 10 |
|
||||
| 100000 | 16 |
|
||||
|
||||
So maybe even the sharding is not necessary on modern filesystems?
|
||||
|
||||
## Recovery
|
||||
|
||||
* Read CURRENT to find name of the latest committed MANIFEST
|
||||
* Read the named MANIFEST file
|
||||
* Clean up stale files
|
||||
* We could open all sstables here, but it is probably better to be lazy...
|
||||
* Convert log chunk to a new level-0 sstable
|
||||
* Start directing new writes to a new log file with recovered sequence#
|
||||
|
||||
## Garbage collection of files
|
||||
|
||||
`DeleteObsoleteFiles()` is called at the end of every compaction and at the end
|
||||
of recovery. It finds the names of all files in the database. It deletes all log
|
||||
files that are not the current log file. It deletes all table files that are not
|
||||
referenced from some level and are not the output of an active compaction.
|
549
doc/index.html
549
doc/index.html
@ -1,549 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" type="text/css" href="doc.css" />
|
||||
<title>Leveldb</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Leveldb</h1>
|
||||
<address>Jeff Dean, Sanjay Ghemawat</address>
|
||||
<p>
|
||||
The <code>leveldb</code> library provides a persistent key value store. Keys and
|
||||
values are arbitrary byte arrays. The keys are ordered within the key
|
||||
value store according to a user-specified comparator function.
|
||||
|
||||
<p>
|
||||
<h1>Opening A Database</h1>
|
||||
<p>
|
||||
A <code>leveldb</code> database has a name which corresponds to a file system
|
||||
directory. All of the contents of database are stored in this
|
||||
directory. The following example shows how to open a database,
|
||||
creating it if necessary:
|
||||
<p>
|
||||
<pre>
|
||||
#include <assert>
|
||||
#include "leveldb/db.h"
|
||||
|
||||
leveldb::DB* db;
|
||||
leveldb::Options options;
|
||||
options.create_if_missing = true;
|
||||
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
assert(status.ok());
|
||||
...
|
||||
</pre>
|
||||
If you want to raise an error if the database already exists, add
|
||||
the following line before the <code>leveldb::DB::Open</code> call:
|
||||
<pre>
|
||||
options.error_if_exists = true;
|
||||
</pre>
|
||||
<h1>Status</h1>
|
||||
<p>
|
||||
You may have noticed the <code>leveldb::Status</code> type above. Values of this
|
||||
type are returned by most functions in <code>leveldb</code> that may encounter an
|
||||
error. You can check if such a result is ok, and also print an
|
||||
associated error message:
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::Status s = ...;
|
||||
if (!s.ok()) cerr << s.ToString() << endl;
|
||||
</pre>
|
||||
<h1>Closing A Database</h1>
|
||||
<p>
|
||||
When you are done with a database, just delete the database object.
|
||||
Example:
|
||||
<p>
|
||||
<pre>
|
||||
... open the db as described above ...
|
||||
... do something with db ...
|
||||
delete db;
|
||||
</pre>
|
||||
<h1>Reads And Writes</h1>
|
||||
<p>
|
||||
The database provides <code>Put</code>, <code>Delete</code>, and <code>Get</code> methods to
|
||||
modify/query the database. For example, the following code
|
||||
moves the value stored under key1 to key2.
|
||||
<pre>
|
||||
std::string value;
|
||||
leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value);
|
||||
if (s.ok()) s = db->Put(leveldb::WriteOptions(), key2, value);
|
||||
if (s.ok()) s = db->Delete(leveldb::WriteOptions(), key1);
|
||||
</pre>
|
||||
|
||||
<h1>Atomic Updates</h1>
|
||||
<p>
|
||||
Note that if the process dies after the Put of key2 but before the
|
||||
delete of key1, the same value may be left stored under multiple keys.
|
||||
Such problems can be avoided by using the <code>WriteBatch</code> class to
|
||||
atomically apply a set of updates:
|
||||
<p>
|
||||
<pre>
|
||||
#include "leveldb/write_batch.h"
|
||||
...
|
||||
std::string value;
|
||||
leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value);
|
||||
if (s.ok()) {
|
||||
leveldb::WriteBatch batch;
|
||||
batch.Delete(key1);
|
||||
batch.Put(key2, value);
|
||||
s = db->Write(leveldb::WriteOptions(), &batch);
|
||||
}
|
||||
</pre>
|
||||
The <code>WriteBatch</code> holds a sequence of edits to be made to the database,
|
||||
and these edits within the batch are applied in order. Note that we
|
||||
called <code>Delete</code> before <code>Put</code> so that if <code>key1</code> is identical to <code>key2</code>,
|
||||
we do not end up erroneously dropping the value entirely.
|
||||
<p>
|
||||
Apart from its atomicity benefits, <code>WriteBatch</code> may also be used to
|
||||
speed up bulk updates by placing lots of individual mutations into the
|
||||
same batch.
|
||||
|
||||
<h1>Synchronous Writes</h1>
|
||||
By default, each write to <code>leveldb</code> is asynchronous: it
|
||||
returns after pushing the write from the process into the operating
|
||||
system. The transfer from operating system memory to the underlying
|
||||
persistent storage happens asynchronously. The <code>sync</code> flag
|
||||
can be turned on for a particular write to make the write operation
|
||||
not return until the data being written has been pushed all the way to
|
||||
persistent storage. (On Posix systems, this is implemented by calling
|
||||
either <code>fsync(...)</code> or <code>fdatasync(...)</code> or
|
||||
<code>msync(..., MS_SYNC)</code> before the write operation returns.)
|
||||
<pre>
|
||||
leveldb::WriteOptions write_options;
|
||||
write_options.sync = true;
|
||||
db->Put(write_options, ...);
|
||||
</pre>
|
||||
Asynchronous writes are often more than a thousand times as fast as
|
||||
synchronous writes. The downside of asynchronous writes is that a
|
||||
crash of the machine may cause the last few updates to be lost. Note
|
||||
that a crash of just the writing process (i.e., not a reboot) will not
|
||||
cause any loss since even when <code>sync</code> is false, an update
|
||||
is pushed from the process memory into the operating system before it
|
||||
is considered done.
|
||||
|
||||
<p>
|
||||
Asynchronous writes can often be used safely. For example, when
|
||||
loading a large amount of data into the database you can handle lost
|
||||
updates by restarting the bulk load after a crash. A hybrid scheme is
|
||||
also possible where every Nth write is synchronous, and in the event
|
||||
of a crash, the bulk load is restarted just after the last synchronous
|
||||
write finished by the previous run. (The synchronous write can update
|
||||
a marker that describes where to restart on a crash.)
|
||||
|
||||
<p>
|
||||
<code>WriteBatch</code> provides an alternative to asynchronous writes.
|
||||
Multiple updates may be placed in the same <code>WriteBatch</code> and
|
||||
applied together using a synchronous write (i.e.,
|
||||
<code>write_options.sync</code> is set to true). The extra cost of
|
||||
the synchronous write will be amortized across all of the writes in
|
||||
the batch.
|
||||
|
||||
<p>
|
||||
<h1>Concurrency</h1>
|
||||
<p>
|
||||
A database may only be opened by one process at a time.
|
||||
The <code>leveldb</code> implementation acquires a lock from the
|
||||
operating system to prevent misuse. Within a single process, the
|
||||
same <code>leveldb::DB</code> object may be safely shared by multiple
|
||||
concurrent threads. I.e., different threads may write into or fetch
|
||||
iterators or call <code>Get</code> on the same database without any
|
||||
external synchronization (the leveldb implementation will
|
||||
automatically do the required synchronization). However other objects
|
||||
(like Iterator and WriteBatch) may require external synchronization.
|
||||
If two threads share such an object, they must protect access to it
|
||||
using their own locking protocol. More details are available in
|
||||
the public header files.
|
||||
<p>
|
||||
<h1>Iteration</h1>
|
||||
<p>
|
||||
The following example demonstrates how to print all key,value pairs
|
||||
in a database.
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions());
|
||||
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||
cout << it->key().ToString() << ": " << it->value().ToString() << endl;
|
||||
}
|
||||
assert(it->status().ok()); // Check for any errors found during the scan
|
||||
delete it;
|
||||
</pre>
|
||||
The following variation shows how to process just the keys in the
|
||||
range <code>[start,limit)</code>:
|
||||
<p>
|
||||
<pre>
|
||||
for (it->Seek(start);
|
||||
it->Valid() && it->key().ToString() < limit;
|
||||
it->Next()) {
|
||||
...
|
||||
}
|
||||
</pre>
|
||||
You can also process entries in reverse order. (Caveat: reverse
|
||||
iteration may be somewhat slower than forward iteration.)
|
||||
<p>
|
||||
<pre>
|
||||
for (it->SeekToLast(); it->Valid(); it->Prev()) {
|
||||
...
|
||||
}
|
||||
</pre>
|
||||
<h1>Snapshots</h1>
|
||||
<p>
|
||||
Snapshots provide consistent read-only views over the entire state of
|
||||
the key-value store. <code>ReadOptions::snapshot</code> may be non-NULL to indicate
|
||||
that a read should operate on a particular version of the DB state.
|
||||
If <code>ReadOptions::snapshot</code> is NULL, the read will operate on an
|
||||
implicit snapshot of the current state.
|
||||
<p>
|
||||
Snapshots are created by the DB::GetSnapshot() method:
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::ReadOptions options;
|
||||
options.snapshot = db->GetSnapshot();
|
||||
... apply some updates to db ...
|
||||
leveldb::Iterator* iter = db->NewIterator(options);
|
||||
... read using iter to view the state when the snapshot was created ...
|
||||
delete iter;
|
||||
db->ReleaseSnapshot(options.snapshot);
|
||||
</pre>
|
||||
Note that when a snapshot is no longer needed, it should be released
|
||||
using the DB::ReleaseSnapshot interface. This allows the
|
||||
implementation to get rid of state that was being maintained just to
|
||||
support reading as of that snapshot.
|
||||
<h1>Slice</h1>
|
||||
<p>
|
||||
The return value of the <code>it->key()</code> and <code>it->value()</code> calls above
|
||||
are instances of the <code>leveldb::Slice</code> type. <code>Slice</code> is a simple
|
||||
structure that contains a length and a pointer to an external byte
|
||||
array. Returning a <code>Slice</code> is a cheaper alternative to returning a
|
||||
<code>std::string</code> since we do not need to copy potentially large keys and
|
||||
values. In addition, <code>leveldb</code> methods do not return null-terminated
|
||||
C-style strings since <code>leveldb</code> keys and values are allowed to
|
||||
contain '\0' bytes.
|
||||
<p>
|
||||
C++ strings and null-terminated C-style strings can be easily converted
|
||||
to a Slice:
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::Slice s1 = "hello";
|
||||
|
||||
std::string str("world");
|
||||
leveldb::Slice s2 = str;
|
||||
</pre>
|
||||
A Slice can be easily converted back to a C++ string:
|
||||
<pre>
|
||||
std::string str = s1.ToString();
|
||||
assert(str == std::string("hello"));
|
||||
</pre>
|
||||
Be careful when using Slices since it is up to the caller to ensure that
|
||||
the external byte array into which the Slice points remains live while
|
||||
the Slice is in use. For example, the following is buggy:
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::Slice slice;
|
||||
if (...) {
|
||||
std::string str = ...;
|
||||
slice = str;
|
||||
}
|
||||
Use(slice);
|
||||
</pre>
|
||||
When the <code>if</code> statement goes out of scope, <code>str</code> will be destroyed and the
|
||||
backing storage for <code>slice</code> will disappear.
|
||||
<p>
|
||||
<h1>Comparators</h1>
|
||||
<p>
|
||||
The preceding examples used the default ordering function for key,
|
||||
which orders bytes lexicographically. You can however supply a custom
|
||||
comparator when opening a database. For example, suppose each
|
||||
database key consists of two numbers and we should sort by the first
|
||||
number, breaking ties by the second number. First, define a proper
|
||||
subclass of <code>leveldb::Comparator</code> that expresses these rules:
|
||||
<p>
|
||||
<pre>
|
||||
class TwoPartComparator : public leveldb::Comparator {
|
||||
public:
|
||||
// Three-way comparison function:
|
||||
// if a < b: negative result
|
||||
// if a > b: positive result
|
||||
// else: zero result
|
||||
int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const {
|
||||
int a1, a2, b1, b2;
|
||||
ParseKey(a, &a1, &a2);
|
||||
ParseKey(b, &b1, &b2);
|
||||
if (a1 < b1) return -1;
|
||||
if (a1 > b1) return +1;
|
||||
if (a2 < b2) return -1;
|
||||
if (a2 > b2) return +1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Ignore the following methods for now:
|
||||
const char* Name() const { return "TwoPartComparator"; }
|
||||
void FindShortestSeparator(std::string*, const leveldb::Slice&) const { }
|
||||
void FindShortSuccessor(std::string*) const { }
|
||||
};
|
||||
</pre>
|
||||
Now create a database using this custom comparator:
|
||||
<p>
|
||||
<pre>
|
||||
TwoPartComparator cmp;
|
||||
leveldb::DB* db;
|
||||
leveldb::Options options;
|
||||
options.create_if_missing = true;
|
||||
options.comparator = &cmp;
|
||||
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
...
|
||||
</pre>
|
||||
<h2>Backwards compatibility</h2>
|
||||
<p>
|
||||
The result of the comparator's <code>Name</code> method is attached to the
|
||||
database when it is created, and is checked on every subsequent
|
||||
database open. If the name changes, the <code>leveldb::DB::Open</code> call will
|
||||
fail. Therefore, change the name if and only if the new key format
|
||||
and comparison function are incompatible with existing databases, and
|
||||
it is ok to discard the contents of all existing databases.
|
||||
<p>
|
||||
You can however still gradually evolve your key format over time with
|
||||
a little bit of pre-planning. For example, you could store a version
|
||||
number at the end of each key (one byte should suffice for most uses).
|
||||
When you wish to switch to a new key format (e.g., adding an optional
|
||||
third part to the keys processed by <code>TwoPartComparator</code>),
|
||||
(a) keep the same comparator name (b) increment the version number
|
||||
for new keys (c) change the comparator function so it uses the
|
||||
version numbers found in the keys to decide how to interpret them.
|
||||
<p>
|
||||
<h1>Performance</h1>
|
||||
<p>
|
||||
Performance can be tuned by changing the default values of the
|
||||
types defined in <code>include/leveldb/options.h</code>.
|
||||
|
||||
<p>
|
||||
<h2>Block size</h2>
|
||||
<p>
|
||||
<code>leveldb</code> groups adjacent keys together into the same block and such a
|
||||
block is the unit of transfer to and from persistent storage. The
|
||||
default block size is approximately 4096 uncompressed bytes.
|
||||
Applications that mostly do bulk scans over the contents of the
|
||||
database may wish to increase this size. Applications that do a lot
|
||||
of point reads of small values may wish to switch to a smaller block
|
||||
size if performance measurements indicate an improvement. There isn't
|
||||
much benefit in using blocks smaller than one kilobyte, or larger than
|
||||
a few megabytes. Also note that compression will be more effective
|
||||
with larger block sizes.
|
||||
<p>
|
||||
<h2>Compression</h2>
|
||||
<p>
|
||||
Each block is individually compressed before being written to
|
||||
persistent storage. Compression is on by default since the default
|
||||
compression method is very fast, and is automatically disabled for
|
||||
uncompressible data. In rare cases, applications may want to disable
|
||||
compression entirely, but should only do so if benchmarks show a
|
||||
performance improvement:
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::Options options;
|
||||
options.compression = leveldb::kNoCompression;
|
||||
... leveldb::DB::Open(options, name, ...) ....
|
||||
</pre>
|
||||
<h2>Cache</h2>
|
||||
<p>
|
||||
The contents of the database are stored in a set of files in the
|
||||
filesystem and each file stores a sequence of compressed blocks. If
|
||||
<code>options.cache</code> is non-NULL, it is used to cache frequently used
|
||||
uncompressed block contents.
|
||||
<p>
|
||||
<pre>
|
||||
#include "leveldb/cache.h"
|
||||
|
||||
leveldb::Options options;
|
||||
options.cache = leveldb::NewLRUCache(100 * 1048576); // 100MB cache
|
||||
leveldb::DB* db;
|
||||
leveldb::DB::Open(options, name, &db);
|
||||
... use the db ...
|
||||
delete db
|
||||
delete options.cache;
|
||||
</pre>
|
||||
Note that the cache holds uncompressed data, and therefore it should
|
||||
be sized according to application level data sizes, without any
|
||||
reduction from compression. (Caching of compressed blocks is left to
|
||||
the operating system buffer cache, or any custom <code>Env</code>
|
||||
implementation provided by the client.)
|
||||
<p>
|
||||
When performing a bulk read, the application may wish to disable
|
||||
caching so that the data processed by the bulk read does not end up
|
||||
displacing most of the cached contents. A per-iterator option can be
|
||||
used to achieve this:
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::ReadOptions options;
|
||||
options.fill_cache = false;
|
||||
leveldb::Iterator* it = db->NewIterator(options);
|
||||
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||
...
|
||||
}
|
||||
</pre>
|
||||
<h2>Key Layout</h2>
|
||||
<p>
|
||||
Note that the unit of disk transfer and caching is a block. Adjacent
|
||||
keys (according to the database sort order) will usually be placed in
|
||||
the same block. Therefore the application can improve its performance
|
||||
by placing keys that are accessed together near each other and placing
|
||||
infrequently used keys in a separate region of the key space.
|
||||
<p>
|
||||
For example, suppose we are implementing a simple file system on top
|
||||
of <code>leveldb</code>. The types of entries we might wish to store are:
|
||||
<p>
|
||||
<pre>
|
||||
filename -> permission-bits, length, list of file_block_ids
|
||||
file_block_id -> data
|
||||
</pre>
|
||||
We might want to prefix <code>filename</code> keys with one letter (say '/') and the
|
||||
<code>file_block_id</code> keys with a different letter (say '0') so that scans
|
||||
over just the metadata do not force us to fetch and cache bulky file
|
||||
contents.
|
||||
<p>
|
||||
<h2>Filters</h2>
|
||||
<p>
|
||||
Because of the way <code>leveldb</code> data is organized on disk,
|
||||
a single <code>Get()</code> call may involve multiple reads from disk.
|
||||
The optional <code>FilterPolicy</code> mechanism can be used to reduce
|
||||
the number of disk reads substantially.
|
||||
<pre>
|
||||
leveldb::Options options;
|
||||
options.filter_policy = NewBloomFilterPolicy(10);
|
||||
leveldb::DB* db;
|
||||
leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
... use the database ...
|
||||
delete db;
|
||||
delete options.filter_policy;
|
||||
</pre>
|
||||
The preceding code associates a
|
||||
<a href="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</a>
|
||||
based filtering policy with the database. Bloom filter based
|
||||
filtering relies on keeping some number of bits of data in memory per
|
||||
key (in this case 10 bits per key since that is the argument we passed
|
||||
to NewBloomFilterPolicy). This filter will reduce the number of unnecessary
|
||||
disk reads needed for <code>Get()</code> calls by a factor of
|
||||
approximately a 100. Increasing the bits per key will lead to a
|
||||
larger reduction at the cost of more memory usage. We recommend that
|
||||
applications whose working set does not fit in memory and that do a
|
||||
lot of random reads set a filter policy.
|
||||
<p>
|
||||
If you are using a custom comparator, you should ensure that the filter
|
||||
policy you are using is compatible with your comparator. For example,
|
||||
consider a comparator that ignores trailing spaces when comparing keys.
|
||||
<code>NewBloomFilterPolicy</code> must not be used with such a comparator.
|
||||
Instead, the application should provide a custom filter policy that
|
||||
also ignores trailing spaces. For example:
|
||||
<pre>
|
||||
class CustomFilterPolicy : public leveldb::FilterPolicy {
|
||||
private:
|
||||
FilterPolicy* builtin_policy_;
|
||||
public:
|
||||
CustomFilterPolicy() : builtin_policy_(NewBloomFilterPolicy(10)) { }
|
||||
~CustomFilterPolicy() { delete builtin_policy_; }
|
||||
|
||||
const char* Name() const { return "IgnoreTrailingSpacesFilter"; }
|
||||
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
// Use builtin bloom filter code after removing trailing spaces
|
||||
std::vector<Slice> trimmed(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
trimmed[i] = RemoveTrailingSpaces(keys[i]);
|
||||
}
|
||||
return builtin_policy_->CreateFilter(&trimmed[i], n, dst);
|
||||
}
|
||||
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
// Use builtin bloom filter code after removing trailing spaces
|
||||
return builtin_policy_->KeyMayMatch(RemoveTrailingSpaces(key), filter);
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
<p>
|
||||
Advanced applications may provide a filter policy that does not use
|
||||
a bloom filter but uses some other mechanism for summarizing a set
|
||||
of keys. See <code>leveldb/filter_policy.h</code> for detail.
|
||||
<p>
|
||||
<h1>Checksums</h1>
|
||||
<p>
|
||||
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
||||
There are two separate controls provided over how aggressively these
|
||||
checksums are verified:
|
||||
<p>
|
||||
<ul>
|
||||
<li> <code>ReadOptions::verify_checksums</code> may be set to true to force
|
||||
checksum verification of all data that is read from the file system on
|
||||
behalf of a particular read. By default, no such verification is
|
||||
done.
|
||||
<p>
|
||||
<li> <code>Options::paranoid_checks</code> may be set to true before opening a
|
||||
database to make the database implementation raise an error as soon as
|
||||
it detects an internal corruption. Depending on which portion of the
|
||||
database has been corrupted, the error may be raised when the database
|
||||
is opened, or later by another database operation. By default,
|
||||
paranoid checking is off so that the database can be used even if
|
||||
parts of its persistent storage have been corrupted.
|
||||
<p>
|
||||
If a database is corrupted (perhaps it cannot be opened when
|
||||
paranoid checking is turned on), the <code>leveldb::RepairDB</code> function
|
||||
may be used to recover as much of the data as possible
|
||||
<p>
|
||||
</ul>
|
||||
<h1>Approximate Sizes</h1>
|
||||
<p>
|
||||
The <code>GetApproximateSizes</code> method can used to get the approximate
|
||||
number of bytes of file system space used by one or more key ranges.
|
||||
<p>
|
||||
<pre>
|
||||
leveldb::Range ranges[2];
|
||||
ranges[0] = leveldb::Range("a", "c");
|
||||
ranges[1] = leveldb::Range("x", "z");
|
||||
uint64_t sizes[2];
|
||||
leveldb::Status s = db->GetApproximateSizes(ranges, 2, sizes);
|
||||
</pre>
|
||||
The preceding call will set <code>sizes[0]</code> to the approximate number of
|
||||
bytes of file system space used by the key range <code>[a..c)</code> and
|
||||
<code>sizes[1]</code> to the approximate number of bytes used by the key range
|
||||
<code>[x..z)</code>.
|
||||
<p>
|
||||
<h1>Environment</h1>
|
||||
<p>
|
||||
All file operations (and other operating system calls) issued by the
|
||||
<code>leveldb</code> implementation are routed through a <code>leveldb::Env</code> object.
|
||||
Sophisticated clients may wish to provide their own <code>Env</code>
|
||||
implementation to get better control. For example, an application may
|
||||
introduce artificial delays in the file IO paths to limit the impact
|
||||
of <code>leveldb</code> on other activities in the system.
|
||||
<p>
|
||||
<pre>
|
||||
class SlowEnv : public leveldb::Env {
|
||||
.. implementation of the Env interface ...
|
||||
};
|
||||
|
||||
SlowEnv env;
|
||||
leveldb::Options options;
|
||||
options.env = &env;
|
||||
Status s = leveldb::DB::Open(options, ...);
|
||||
</pre>
|
||||
<h1>Porting</h1>
|
||||
<p>
|
||||
<code>leveldb</code> may be ported to a new platform by providing platform
|
||||
specific implementations of the types/methods/functions exported by
|
||||
<code>leveldb/port/port.h</code>. See <code>leveldb/port/port_example.h</code> for more
|
||||
details.
|
||||
<p>
|
||||
In addition, the new platform may need a new default <code>leveldb::Env</code>
|
||||
implementation. See <code>leveldb/util/env_posix.h</code> for an example.
|
||||
|
||||
<h1>Other Information</h1>
|
||||
|
||||
<p>
|
||||
Details about the <code>leveldb</code> implementation may be found in
|
||||
the following documents:
|
||||
<ul>
|
||||
<li> <a href="impl.html">Implementation notes</a>
|
||||
<li> <a href="table_format.txt">Format of an immutable Table file</a>
|
||||
<li> <a href="log_format.txt">Format of a log file</a>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
523
doc/index.md
Normal file
523
doc/index.md
Normal file
@ -0,0 +1,523 @@
|
||||
leveldb
|
||||
=======
|
||||
|
||||
_Jeff Dean, Sanjay Ghemawat_
|
||||
|
||||
The leveldb library provides a persistent key value store. Keys and values are
|
||||
arbitrary byte arrays. The keys are ordered within the key value store
|
||||
according to a user-specified comparator function.
|
||||
|
||||
## Opening A Database
|
||||
|
||||
A leveldb database has a name which corresponds to a file system directory. All
|
||||
of the contents of database are stored in this directory. The following example
|
||||
shows how to open a database, creating it if necessary:
|
||||
|
||||
```c++
|
||||
#include <cassert>
|
||||
#include "leveldb/db.h"
|
||||
|
||||
leveldb::DB* db;
|
||||
leveldb::Options options;
|
||||
options.create_if_missing = true;
|
||||
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
assert(status.ok());
|
||||
...
|
||||
```
|
||||
|
||||
If you want to raise an error if the database already exists, add the following
|
||||
line before the `leveldb::DB::Open` call:
|
||||
|
||||
```c++
|
||||
options.error_if_exists = true;
|
||||
```
|
||||
|
||||
## Status
|
||||
|
||||
You may have noticed the `leveldb::Status` type above. Values of this type are
|
||||
returned by most functions in leveldb that may encounter an error. You can check
|
||||
if such a result is ok, and also print an associated error message:
|
||||
|
||||
```c++
|
||||
leveldb::Status s = ...;
|
||||
if (!s.ok()) cerr << s.ToString() << endl;
|
||||
```
|
||||
|
||||
## Closing A Database
|
||||
|
||||
When you are done with a database, just delete the database object. Example:
|
||||
|
||||
```c++
|
||||
... open the db as described above ...
|
||||
... do something with db ...
|
||||
delete db;
|
||||
```
|
||||
|
||||
## Reads And Writes
|
||||
|
||||
The database provides Put, Delete, and Get methods to modify/query the database.
|
||||
For example, the following code moves the value stored under key1 to key2.
|
||||
|
||||
```c++
|
||||
std::string value;
|
||||
leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value);
|
||||
if (s.ok()) s = db->Put(leveldb::WriteOptions(), key2, value);
|
||||
if (s.ok()) s = db->Delete(leveldb::WriteOptions(), key1);
|
||||
```
|
||||
|
||||
## Atomic Updates
|
||||
|
||||
Note that if the process dies after the Put of key2 but before the delete of
|
||||
key1, the same value may be left stored under multiple keys. Such problems can
|
||||
be avoided by using the `WriteBatch` class to atomically apply a set of updates:
|
||||
|
||||
```c++
|
||||
#include "leveldb/write_batch.h"
|
||||
...
|
||||
std::string value;
|
||||
leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value);
|
||||
if (s.ok()) {
|
||||
leveldb::WriteBatch batch;
|
||||
batch.Delete(key1);
|
||||
batch.Put(key2, value);
|
||||
s = db->Write(leveldb::WriteOptions(), &batch);
|
||||
}
|
||||
```
|
||||
|
||||
The `WriteBatch` holds a sequence of edits to be made to the database, and these
|
||||
edits within the batch are applied in order. Note that we called Delete before
|
||||
Put so that if key1 is identical to key2, we do not end up erroneously dropping
|
||||
the value entirely.
|
||||
|
||||
Apart from its atomicity benefits, `WriteBatch` may also be used to speed up
|
||||
bulk updates by placing lots of individual mutations into the same batch.
|
||||
|
||||
## Synchronous Writes
|
||||
|
||||
By default, each write to leveldb is asynchronous: it returns after pushing the
|
||||
write from the process into the operating system. The transfer from operating
|
||||
system memory to the underlying persistent storage happens asynchronously. The
|
||||
sync flag can be turned on for a particular write to make the write operation
|
||||
not return until the data being written has been pushed all the way to
|
||||
persistent storage. (On Posix systems, this is implemented by calling either
|
||||
`fsync(...)` or `fdatasync(...)` or `msync(..., MS_SYNC)` before the write
|
||||
operation returns.)
|
||||
|
||||
```c++
|
||||
leveldb::WriteOptions write_options;
|
||||
write_options.sync = true;
|
||||
db->Put(write_options, ...);
|
||||
```
|
||||
|
||||
Asynchronous writes are often more than a thousand times as fast as synchronous
|
||||
writes. The downside of asynchronous writes is that a crash of the machine may
|
||||
cause the last few updates to be lost. Note that a crash of just the writing
|
||||
process (i.e., not a reboot) will not cause any loss since even when sync is
|
||||
false, an update is pushed from the process memory into the operating system
|
||||
before it is considered done.
|
||||
|
||||
Asynchronous writes can often be used safely. For example, when loading a large
|
||||
amount of data into the database you can handle lost updates by restarting the
|
||||
bulk load after a crash. A hybrid scheme is also possible where every Nth write
|
||||
is synchronous, and in the event of a crash, the bulk load is restarted just
|
||||
after the last synchronous write finished by the previous run. (The synchronous
|
||||
write can update a marker that describes where to restart on a crash.)
|
||||
|
||||
`WriteBatch` provides an alternative to asynchronous writes. Multiple updates
|
||||
may be placed in the same WriteBatch and applied together using a synchronous
|
||||
write (i.e., `write_options.sync` is set to true). The extra cost of the
|
||||
synchronous write will be amortized across all of the writes in the batch.
|
||||
|
||||
## Concurrency
|
||||
|
||||
A database may only be opened by one process at a time. The leveldb
|
||||
implementation acquires a lock from the operating system to prevent misuse.
|
||||
Within a single process, the same `leveldb::DB` object may be safely shared by
|
||||
multiple concurrent threads. I.e., different threads may write into or fetch
|
||||
iterators or call Get on the same database without any external synchronization
|
||||
(the leveldb implementation will automatically do the required synchronization).
|
||||
However other objects (like Iterator and `WriteBatch`) may require external
|
||||
synchronization. If two threads share such an object, they must protect access
|
||||
to it using their own locking protocol. More details are available in the public
|
||||
header files.
|
||||
|
||||
## Iteration
|
||||
|
||||
The following example demonstrates how to print all key,value pairs in a
|
||||
database.
|
||||
|
||||
```c++
|
||||
leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions());
|
||||
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||
cout << it->key().ToString() << ": " << it->value().ToString() << endl;
|
||||
}
|
||||
assert(it->status().ok()); // Check for any errors found during the scan
|
||||
delete it;
|
||||
```
|
||||
|
||||
The following variation shows how to process just the keys in the range
|
||||
[start,limit):
|
||||
|
||||
```c++
|
||||
for (it->Seek(start);
|
||||
it->Valid() && it->key().ToString() < limit;
|
||||
it->Next()) {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
You can also process entries in reverse order. (Caveat: reverse iteration may be
|
||||
somewhat slower than forward iteration.)
|
||||
|
||||
```c++
|
||||
for (it->SeekToLast(); it->Valid(); it->Prev()) {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
## Snapshots
|
||||
|
||||
Snapshots provide consistent read-only views over the entire state of the
|
||||
key-value store. `ReadOptions::snapshot` may be non-NULL to indicate that a
|
||||
read should operate on a particular version of the DB state. If
|
||||
`ReadOptions::snapshot` is NULL, the read will operate on an implicit snapshot
|
||||
of the current state.
|
||||
|
||||
Snapshots are created by the `DB::GetSnapshot()` method:
|
||||
|
||||
```c++
|
||||
leveldb::ReadOptions options;
|
||||
options.snapshot = db->GetSnapshot();
|
||||
... apply some updates to db ...
|
||||
leveldb::Iterator* iter = db->NewIterator(options);
|
||||
... read using iter to view the state when the snapshot was created ...
|
||||
delete iter;
|
||||
db->ReleaseSnapshot(options.snapshot);
|
||||
```
|
||||
|
||||
Note that when a snapshot is no longer needed, it should be released using the
|
||||
`DB::ReleaseSnapshot` interface. This allows the implementation to get rid of
|
||||
state that was being maintained just to support reading as of that snapshot.
|
||||
|
||||
## Slice
|
||||
|
||||
The return value of the `it->key()` and `it->value()` calls above are instances
|
||||
of the `leveldb::Slice` type. Slice is a simple structure that contains a length
|
||||
and a pointer to an external byte array. Returning a Slice is a cheaper
|
||||
alternative to returning a `std::string` since we do not need to copy
|
||||
potentially large keys and values. In addition, leveldb methods do not return
|
||||
null-terminated C-style strings since leveldb keys and values are allowed to
|
||||
contain `'\0'` bytes.
|
||||
|
||||
C++ strings and null-terminated C-style strings can be easily converted to a
|
||||
Slice:
|
||||
|
||||
```c++
|
||||
leveldb::Slice s1 = "hello";
|
||||
|
||||
std::string str("world");
|
||||
leveldb::Slice s2 = str;
|
||||
```
|
||||
|
||||
A Slice can be easily converted back to a C++ string:
|
||||
|
||||
```c++
|
||||
std::string str = s1.ToString();
|
||||
assert(str == std::string("hello"));
|
||||
```
|
||||
|
||||
Be careful when using Slices since it is up to the caller to ensure that the
|
||||
external byte array into which the Slice points remains live while the Slice is
|
||||
in use. For example, the following is buggy:
|
||||
|
||||
```c++
|
||||
leveldb::Slice slice;
|
||||
if (...) {
|
||||
std::string str = ...;
|
||||
slice = str;
|
||||
}
|
||||
Use(slice);
|
||||
```
|
||||
|
||||
When the if statement goes out of scope, str will be destroyed and the backing
|
||||
storage for slice will disappear.
|
||||
|
||||
## Comparators
|
||||
|
||||
The preceding examples used the default ordering function for key, which orders
|
||||
bytes lexicographically. You can however supply a custom comparator when opening
|
||||
a database. For example, suppose each database key consists of two numbers and
|
||||
we should sort by the first number, breaking ties by the second number. First,
|
||||
define a proper subclass of `leveldb::Comparator` that expresses these rules:
|
||||
|
||||
```c++
|
||||
class TwoPartComparator : public leveldb::Comparator {
|
||||
public:
|
||||
// Three-way comparison function:
|
||||
// if a < b: negative result
|
||||
// if a > b: positive result
|
||||
// else: zero result
|
||||
int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const {
|
||||
int a1, a2, b1, b2;
|
||||
ParseKey(a, &a1, &a2);
|
||||
ParseKey(b, &b1, &b2);
|
||||
if (a1 < b1) return -1;
|
||||
if (a1 > b1) return +1;
|
||||
if (a2 < b2) return -1;
|
||||
if (a2 > b2) return +1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Ignore the following methods for now:
|
||||
const char* Name() const { return "TwoPartComparator"; }
|
||||
void FindShortestSeparator(std::string*, const leveldb::Slice&) const {}
|
||||
void FindShortSuccessor(std::string*) const {}
|
||||
};
|
||||
```
|
||||
|
||||
Now create a database using this custom comparator:
|
||||
|
||||
```c++
|
||||
TwoPartComparator cmp;
|
||||
leveldb::DB* db;
|
||||
leveldb::Options options;
|
||||
options.create_if_missing = true;
|
||||
options.comparator = &cmp;
|
||||
leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
...
|
||||
```
|
||||
|
||||
### Backwards compatibility
|
||||
|
||||
The result of the comparator's Name method is attached to the database when it
|
||||
is created, and is checked on every subsequent database open. If the name
|
||||
changes, the `leveldb::DB::Open` call will fail. Therefore, change the name if
|
||||
and only if the new key format and comparison function are incompatible with
|
||||
existing databases, and it is ok to discard the contents of all existing
|
||||
databases.
|
||||
|
||||
You can however still gradually evolve your key format over time with a little
|
||||
bit of pre-planning. For example, you could store a version number at the end of
|
||||
each key (one byte should suffice for most uses). When you wish to switch to a
|
||||
new key format (e.g., adding an optional third part to the keys processed by
|
||||
`TwoPartComparator`), (a) keep the same comparator name (b) increment the
|
||||
version number for new keys (c) change the comparator function so it uses the
|
||||
version numbers found in the keys to decide how to interpret them.
|
||||
|
||||
## Performance
|
||||
|
||||
Performance can be tuned by changing the default values of the types defined in
|
||||
`include/leveldb/options.h`.
|
||||
|
||||
### Block size
|
||||
|
||||
leveldb groups adjacent keys together into the same block and such a block is
|
||||
the unit of transfer to and from persistent storage. The default block size is
|
||||
approximately 4096 uncompressed bytes. Applications that mostly do bulk scans
|
||||
over the contents of the database may wish to increase this size. Applications
|
||||
that do a lot of point reads of small values may wish to switch to a smaller
|
||||
block size if performance measurements indicate an improvement. There isn't much
|
||||
benefit in using blocks smaller than one kilobyte, or larger than a few
|
||||
megabytes. Also note that compression will be more effective with larger block
|
||||
sizes.
|
||||
|
||||
### Compression
|
||||
|
||||
Each block is individually compressed before being written to persistent
|
||||
storage. Compression is on by default since the default compression method is
|
||||
very fast, and is automatically disabled for uncompressible data. In rare cases,
|
||||
applications may want to disable compression entirely, but should only do so if
|
||||
benchmarks show a performance improvement:
|
||||
|
||||
```c++
|
||||
leveldb::Options options;
|
||||
options.compression = leveldb::kNoCompression;
|
||||
... leveldb::DB::Open(options, name, ...) ....
|
||||
```
|
||||
|
||||
### Cache
|
||||
|
||||
The contents of the database are stored in a set of files in the filesystem and
|
||||
each file stores a sequence of compressed blocks. If options.block_cache is
|
||||
non-NULL, it is used to cache frequently used uncompressed block contents.
|
||||
|
||||
```c++
|
||||
#include "leveldb/cache.h"
|
||||
|
||||
leveldb::Options options;
|
||||
options.block_cache = leveldb::NewLRUCache(100 * 1048576); // 100MB cache
|
||||
leveldb::DB* db;
|
||||
leveldb::DB::Open(options, name, &db);
|
||||
... use the db ...
|
||||
delete db
|
||||
delete options.block_cache;
|
||||
```
|
||||
|
||||
Note that the cache holds uncompressed data, and therefore it should be sized
|
||||
according to application level data sizes, without any reduction from
|
||||
compression. (Caching of compressed blocks is left to the operating system
|
||||
buffer cache, or any custom Env implementation provided by the client.)
|
||||
|
||||
When performing a bulk read, the application may wish to disable caching so that
|
||||
the data processed by the bulk read does not end up displacing most of the
|
||||
cached contents. A per-iterator option can be used to achieve this:
|
||||
|
||||
```c++
|
||||
leveldb::ReadOptions options;
|
||||
options.fill_cache = false;
|
||||
leveldb::Iterator* it = db->NewIterator(options);
|
||||
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
### Key Layout
|
||||
|
||||
Note that the unit of disk transfer and caching is a block. Adjacent keys
|
||||
(according to the database sort order) will usually be placed in the same block.
|
||||
Therefore the application can improve its performance by placing keys that are
|
||||
accessed together near each other and placing infrequently used keys in a
|
||||
separate region of the key space.
|
||||
|
||||
For example, suppose we are implementing a simple file system on top of leveldb.
|
||||
The types of entries we might wish to store are:
|
||||
|
||||
filename -> permission-bits, length, list of file_block_ids
|
||||
file_block_id -> data
|
||||
|
||||
We might want to prefix filename keys with one letter (say '/') and the
|
||||
`file_block_id` keys with a different letter (say '0') so that scans over just
|
||||
the metadata do not force us to fetch and cache bulky file contents.
|
||||
|
||||
### Filters
|
||||
|
||||
Because of the way leveldb data is organized on disk, a single `Get()` call may
|
||||
involve multiple reads from disk. The optional FilterPolicy mechanism can be
|
||||
used to reduce the number of disk reads substantially.
|
||||
|
||||
```c++
|
||||
leveldb::Options options;
|
||||
options.filter_policy = NewBloomFilterPolicy(10);
|
||||
leveldb::DB* db;
|
||||
leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||
... use the database ...
|
||||
delete db;
|
||||
delete options.filter_policy;
|
||||
```
|
||||
|
||||
The preceding code associates a Bloom filter based filtering policy with the
|
||||
database. Bloom filter based filtering relies on keeping some number of bits of
|
||||
data in memory per key (in this case 10 bits per key since that is the argument
|
||||
we passed to `NewBloomFilterPolicy`). This filter will reduce the number of
|
||||
unnecessary disk reads needed for Get() calls by a factor of approximately
|
||||
a 100. Increasing the bits per key will lead to a larger reduction at the cost
|
||||
of more memory usage. We recommend that applications whose working set does not
|
||||
fit in memory and that do a lot of random reads set a filter policy.
|
||||
|
||||
If you are using a custom comparator, you should ensure that the filter policy
|
||||
you are using is compatible with your comparator. For example, consider a
|
||||
comparator that ignores trailing spaces when comparing keys.
|
||||
`NewBloomFilterPolicy` must not be used with such a comparator. Instead, the
|
||||
application should provide a custom filter policy that also ignores trailing
|
||||
spaces. For example:
|
||||
|
||||
```c++
|
||||
class CustomFilterPolicy : public leveldb::FilterPolicy {
|
||||
private:
|
||||
FilterPolicy* builtin_policy_;
|
||||
|
||||
public:
|
||||
CustomFilterPolicy() : builtin_policy_(NewBloomFilterPolicy(10)) {}
|
||||
~CustomFilterPolicy() { delete builtin_policy_; }
|
||||
|
||||
const char* Name() const { return "IgnoreTrailingSpacesFilter"; }
|
||||
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
// Use builtin bloom filter code after removing trailing spaces
|
||||
std::vector<Slice> trimmed(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
trimmed[i] = RemoveTrailingSpaces(keys[i]);
|
||||
}
|
||||
return builtin_policy_->CreateFilter(&trimmed[i], n, dst);
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
Advanced applications may provide a filter policy that does not use a bloom
|
||||
filter but uses some other mechanism for summarizing a set of keys. See
|
||||
`leveldb/filter_policy.h` for detail.
|
||||
|
||||
## Checksums
|
||||
|
||||
leveldb associates checksums with all data it stores in the file system. There
|
||||
are two separate controls provided over how aggressively these checksums are
|
||||
verified:
|
||||
|
||||
`ReadOptions::verify_checksums` may be set to true to force checksum
|
||||
verification of all data that is read from the file system on behalf of a
|
||||
particular read. By default, no such verification is done.
|
||||
|
||||
`Options::paranoid_checks` may be set to true before opening a database to make
|
||||
the database implementation raise an error as soon as it detects an internal
|
||||
corruption. Depending on which portion of the database has been corrupted, the
|
||||
error may be raised when the database is opened, or later by another database
|
||||
operation. By default, paranoid checking is off so that the database can be used
|
||||
even if parts of its persistent storage have been corrupted.
|
||||
|
||||
If a database is corrupted (perhaps it cannot be opened when paranoid checking
|
||||
is turned on), the `leveldb::RepairDB` function may be used to recover as much
|
||||
of the data as possible
|
||||
|
||||
## Approximate Sizes
|
||||
|
||||
The `GetApproximateSizes` method can used to get the approximate number of bytes
|
||||
of file system space used by one or more key ranges.
|
||||
|
||||
```c++
|
||||
leveldb::Range ranges[2];
|
||||
ranges[0] = leveldb::Range("a", "c");
|
||||
ranges[1] = leveldb::Range("x", "z");
|
||||
uint64_t sizes[2];
|
||||
leveldb::Status s = db->GetApproximateSizes(ranges, 2, sizes);
|
||||
```
|
||||
|
||||
The preceding call will set `sizes[0]` to the approximate number of bytes of
|
||||
file system space used by the key range `[a..c)` and `sizes[1]` to the
|
||||
approximate number of bytes used by the key range `[x..z)`.
|
||||
|
||||
## Environment
|
||||
|
||||
All file operations (and other operating system calls) issued by the leveldb
|
||||
implementation are routed through a `leveldb::Env` object. Sophisticated clients
|
||||
may wish to provide their own Env implementation to get better control.
|
||||
For example, an application may introduce artificial delays in the file IO
|
||||
paths to limit the impact of leveldb on other activities in the system.
|
||||
|
||||
```c++
|
||||
class SlowEnv : public leveldb::Env {
|
||||
... implementation of the Env interface ...
|
||||
};
|
||||
|
||||
SlowEnv env;
|
||||
leveldb::Options options;
|
||||
options.env = &env;
|
||||
Status s = leveldb::DB::Open(options, ...);
|
||||
```
|
||||
|
||||
## Porting
|
||||
|
||||
leveldb may be ported to a new platform by providing platform specific
|
||||
implementations of the types/methods/functions exported by
|
||||
`leveldb/port/port.h`. See `leveldb/port/port_example.h` for more details.
|
||||
|
||||
In addition, the new platform may need a new default `leveldb::Env`
|
||||
implementation. See `leveldb/util/env_posix.h` for an example.
|
||||
|
||||
## Other Information
|
||||
|
||||
Details about the leveldb implementation may be found in the following
|
||||
documents:
|
||||
|
||||
1. [Implementation notes](impl.md)
|
||||
2. [Format of an immutable Table file](table_format.md)
|
||||
3. [Format of a log file](log_format.md)
|
75
doc/log_format.md
Normal file
75
doc/log_format.md
Normal file
@ -0,0 +1,75 @@
|
||||
leveldb Log format
|
||||
==================
|
||||
The log file contents are a sequence of 32KB blocks. The only exception is that
|
||||
the tail of the file may contain a partial block.
|
||||
|
||||
Each block consists of a sequence of records:
|
||||
|
||||
block := record* trailer?
|
||||
record :=
|
||||
checksum: uint32 // crc32c of type and data[] ; little-endian
|
||||
length: uint16 // little-endian
|
||||
type: uint8 // One of FULL, FIRST, MIDDLE, LAST
|
||||
data: uint8[length]
|
||||
|
||||
A record never starts within the last six bytes of a block (since it won't fit).
|
||||
Any leftover bytes here form the trailer, which must consist entirely of zero
|
||||
bytes and must be skipped by readers.
|
||||
|
||||
Aside: if exactly seven bytes are left in the current block, and a new non-zero
|
||||
length record is added, the writer must emit a FIRST record (which contains zero
|
||||
bytes of user data) to fill up the trailing seven bytes of the block and then
|
||||
emit all of the user data in subsequent blocks.
|
||||
|
||||
More types may be added in the future. Some Readers may skip record types they
|
||||
do not understand, others may report that some data was skipped.
|
||||
|
||||
FULL == 1
|
||||
FIRST == 2
|
||||
MIDDLE == 3
|
||||
LAST == 4
|
||||
|
||||
The FULL record contains the contents of an entire user record.
|
||||
|
||||
FIRST, MIDDLE, LAST are types used for user records that have been split into
|
||||
multiple fragments (typically because of block boundaries). FIRST is the type
|
||||
of the first fragment of a user record, LAST is the type of the last fragment of
|
||||
a user record, and MIDDLE is the type of all interior fragments of a user
|
||||
record.
|
||||
|
||||
Example: consider a sequence of user records:
|
||||
|
||||
A: length 1000
|
||||
B: length 97270
|
||||
C: length 8000
|
||||
|
||||
**A** will be stored as a FULL record in the first block.
|
||||
|
||||
**B** will be split into three fragments: first fragment occupies the rest of
|
||||
the first block, second fragment occupies the entirety of the second block, and
|
||||
the third fragment occupies a prefix of the third block. This will leave six
|
||||
bytes free in the third block, which will be left empty as the trailer.
|
||||
|
||||
**C** will be stored as a FULL record in the fourth block.
|
||||
|
||||
----
|
||||
|
||||
## Some benefits over the recordio format:
|
||||
|
||||
1. We do not need any heuristics for resyncing - just go to next block boundary
|
||||
and scan. If there is a corruption, skip to the next block. As a
|
||||
side-benefit, we do not get confused when part of the contents of one log
|
||||
file are embedded as a record inside another log file.
|
||||
|
||||
2. Splitting at approximate boundaries (e.g., for mapreduce) is simple: find the
|
||||
next block boundary and skip records until we hit a FULL or FIRST record.
|
||||
|
||||
3. We do not need extra buffering for large records.
|
||||
|
||||
## Some downsides compared to recordio format:
|
||||
|
||||
1. No packing of tiny records. This could be fixed by adding a new record type,
|
||||
so it is a shortcoming of the current implementation, not necessarily the
|
||||
format.
|
||||
|
||||
2. No compression. Again, this could be fixed by adding new record types.
|
@ -1,75 +0,0 @@
|
||||
The log file contents are a sequence of 32KB blocks. The only
|
||||
exception is that the tail of the file may contain a partial block.
|
||||
|
||||
Each block consists of a sequence of records:
|
||||
block := record* trailer?
|
||||
record :=
|
||||
checksum: uint32 // crc32c of type and data[] ; little-endian
|
||||
length: uint16 // little-endian
|
||||
type: uint8 // One of FULL, FIRST, MIDDLE, LAST
|
||||
data: uint8[length]
|
||||
|
||||
A record never starts within the last six bytes of a block (since it
|
||||
won't fit). Any leftover bytes here form the trailer, which must
|
||||
consist entirely of zero bytes and must be skipped by readers.
|
||||
|
||||
Aside: if exactly seven bytes are left in the current block, and a new
|
||||
non-zero length record is added, the writer must emit a FIRST record
|
||||
(which contains zero bytes of user data) to fill up the trailing seven
|
||||
bytes of the block and then emit all of the user data in subsequent
|
||||
blocks.
|
||||
|
||||
More types may be added in the future. Some Readers may skip record
|
||||
types they do not understand, others may report that some data was
|
||||
skipped.
|
||||
|
||||
FULL == 1
|
||||
FIRST == 2
|
||||
MIDDLE == 3
|
||||
LAST == 4
|
||||
|
||||
The FULL record contains the contents of an entire user record.
|
||||
|
||||
FIRST, MIDDLE, LAST are types used for user records that have been
|
||||
split into multiple fragments (typically because of block boundaries).
|
||||
FIRST is the type of the first fragment of a user record, LAST is the
|
||||
type of the last fragment of a user record, and MIDDLE is the type of
|
||||
all interior fragments of a user record.
|
||||
|
||||
Example: consider a sequence of user records:
|
||||
A: length 1000
|
||||
B: length 97270
|
||||
C: length 8000
|
||||
A will be stored as a FULL record in the first block.
|
||||
|
||||
B will be split into three fragments: first fragment occupies the rest
|
||||
of the first block, second fragment occupies the entirety of the
|
||||
second block, and the third fragment occupies a prefix of the third
|
||||
block. This will leave six bytes free in the third block, which will
|
||||
be left empty as the trailer.
|
||||
|
||||
C will be stored as a FULL record in the fourth block.
|
||||
|
||||
===================
|
||||
|
||||
Some benefits over the recordio format:
|
||||
|
||||
(1) We do not need any heuristics for resyncing - just go to next
|
||||
block boundary and scan. If there is a corruption, skip to the next
|
||||
block. As a side-benefit, we do not get confused when part of the
|
||||
contents of one log file are embedded as a record inside another log
|
||||
file.
|
||||
|
||||
(2) Splitting at approximate boundaries (e.g., for mapreduce) is
|
||||
simple: find the next block boundary and skip records until we
|
||||
hit a FULL or FIRST record.
|
||||
|
||||
(3) We do not need extra buffering for large records.
|
||||
|
||||
Some downsides compared to recordio format:
|
||||
|
||||
(1) No packing of tiny records. This could be fixed by adding a new
|
||||
record type, so it is a shortcoming of the current implementation,
|
||||
not necessarily the format.
|
||||
|
||||
(2) No compression. Again, this could be fixed by adding new record types.
|
107
doc/table_format.md
Normal file
107
doc/table_format.md
Normal file
@ -0,0 +1,107 @@
|
||||
leveldb File format
|
||||
===================
|
||||
|
||||
<beginning_of_file>
|
||||
[data block 1]
|
||||
[data block 2]
|
||||
...
|
||||
[data block N]
|
||||
[meta block 1]
|
||||
...
|
||||
[meta block K]
|
||||
[metaindex block]
|
||||
[index block]
|
||||
[Footer] (fixed size; starts at file_size - sizeof(Footer))
|
||||
<end_of_file>
|
||||
|
||||
The file contains internal pointers. Each such pointer is called
|
||||
a BlockHandle and contains the following information:
|
||||
|
||||
offset: varint64
|
||||
size: varint64
|
||||
|
||||
See [varints](https://developers.google.com/protocol-buffers/docs/encoding#varints)
|
||||
for an explanation of varint64 format.
|
||||
|
||||
1. The sequence of key/value pairs in the file are stored in sorted
|
||||
order and partitioned into a sequence of data blocks. These blocks
|
||||
come one after another at the beginning of the file. Each data block
|
||||
is formatted according to the code in `block_builder.cc`, and then
|
||||
optionally compressed.
|
||||
|
||||
2. After the data blocks we store a bunch of meta blocks. The
|
||||
supported meta block types are described below. More meta block types
|
||||
may be added in the future. Each meta block is again formatted using
|
||||
`block_builder.cc` and then optionally compressed.
|
||||
|
||||
3. A "metaindex" block. It contains one entry for every other meta
|
||||
block where the key is the name of the meta block and the value is a
|
||||
BlockHandle pointing to that meta block.
|
||||
|
||||
4. An "index" block. This block contains one entry per data block,
|
||||
where the key is a string >= last key in that data block and before
|
||||
the first key in the successive data block. The value is the
|
||||
BlockHandle for the data block.
|
||||
|
||||
5. At the very end of the file is a fixed length footer that contains
|
||||
the BlockHandle of the metaindex and index blocks as well as a magic number.
|
||||
|
||||
metaindex_handle: char[p]; // Block handle for metaindex
|
||||
index_handle: char[q]; // Block handle for index
|
||||
padding: char[40-p-q];// zeroed bytes to make fixed length
|
||||
// (40==2*BlockHandle::kMaxEncodedLength)
|
||||
magic: fixed64; // == 0xdb4775248b80fb57 (little-endian)
|
||||
|
||||
## "filter" Meta Block
|
||||
|
||||
If a `FilterPolicy` was specified when the database was opened, a
|
||||
filter block is stored in each table. The "metaindex" block contains
|
||||
an entry that maps from `filter.<N>` to the BlockHandle for the filter
|
||||
block where `<N>` is the string returned by the filter policy's
|
||||
`Name()` method.
|
||||
|
||||
The filter block stores a sequence of filters, where filter i contains
|
||||
the output of `FilterPolicy::CreateFilter()` on all keys that are stored
|
||||
in a block whose file offset falls within the range
|
||||
|
||||
[ i*base ... (i+1)*base-1 ]
|
||||
|
||||
Currently, "base" is 2KB. So for example, if blocks X and Y start in
|
||||
the range `[ 0KB .. 2KB-1 ]`, all of the keys in X and Y will be
|
||||
converted to a filter by calling `FilterPolicy::CreateFilter()`, and the
|
||||
resulting filter will be stored as the first filter in the filter
|
||||
block.
|
||||
|
||||
The filter block is formatted as follows:
|
||||
|
||||
[filter 0]
|
||||
[filter 1]
|
||||
[filter 2]
|
||||
...
|
||||
[filter N-1]
|
||||
|
||||
[offset of filter 0] : 4 bytes
|
||||
[offset of filter 1] : 4 bytes
|
||||
[offset of filter 2] : 4 bytes
|
||||
...
|
||||
[offset of filter N-1] : 4 bytes
|
||||
|
||||
[offset of beginning of offset array] : 4 bytes
|
||||
lg(base) : 1 byte
|
||||
|
||||
The offset array at the end of the filter block allows efficient
|
||||
mapping from a data block offset to the corresponding filter.
|
||||
|
||||
## "stats" Meta Block
|
||||
|
||||
This meta block contains a bunch of stats. The key is the name
|
||||
of the statistic. The value contains the statistic.
|
||||
|
||||
TODO(postrelease): record following stats.
|
||||
|
||||
data size
|
||||
index size
|
||||
key size (uncompressed)
|
||||
value size (uncompressed)
|
||||
number of entries
|
||||
number of data blocks
|
@ -1,104 +0,0 @@
|
||||
File format
|
||||
===========
|
||||
|
||||
<beginning_of_file>
|
||||
[data block 1]
|
||||
[data block 2]
|
||||
...
|
||||
[data block N]
|
||||
[meta block 1]
|
||||
...
|
||||
[meta block K]
|
||||
[metaindex block]
|
||||
[index block]
|
||||
[Footer] (fixed size; starts at file_size - sizeof(Footer))
|
||||
<end_of_file>
|
||||
|
||||
The file contains internal pointers. Each such pointer is called
|
||||
a BlockHandle and contains the following information:
|
||||
offset: varint64
|
||||
size: varint64
|
||||
See https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||
for an explanation of varint64 format.
|
||||
|
||||
(1) The sequence of key/value pairs in the file are stored in sorted
|
||||
order and partitioned into a sequence of data blocks. These blocks
|
||||
come one after another at the beginning of the file. Each data block
|
||||
is formatted according to the code in block_builder.cc, and then
|
||||
optionally compressed.
|
||||
|
||||
(2) After the data blocks we store a bunch of meta blocks. The
|
||||
supported meta block types are described below. More meta block types
|
||||
may be added in the future. Each meta block is again formatted using
|
||||
block_builder.cc and then optionally compressed.
|
||||
|
||||
(3) A "metaindex" block. It contains one entry for every other meta
|
||||
block where the key is the name of the meta block and the value is a
|
||||
BlockHandle pointing to that meta block.
|
||||
|
||||
(4) An "index" block. This block contains one entry per data block,
|
||||
where the key is a string >= last key in that data block and before
|
||||
the first key in the successive data block. The value is the
|
||||
BlockHandle for the data block.
|
||||
|
||||
(6) At the very end of the file is a fixed length footer that contains
|
||||
the BlockHandle of the metaindex and index blocks as well as a magic number.
|
||||
metaindex_handle: char[p]; // Block handle for metaindex
|
||||
index_handle: char[q]; // Block handle for index
|
||||
padding: char[40-p-q]; // zeroed bytes to make fixed length
|
||||
// (40==2*BlockHandle::kMaxEncodedLength)
|
||||
magic: fixed64; // == 0xdb4775248b80fb57 (little-endian)
|
||||
|
||||
"filter" Meta Block
|
||||
-------------------
|
||||
|
||||
If a "FilterPolicy" was specified when the database was opened, a
|
||||
filter block is stored in each table. The "metaindex" block contains
|
||||
an entry that maps from "filter.<N>" to the BlockHandle for the filter
|
||||
block where "<N>" is the string returned by the filter policy's
|
||||
"Name()" method.
|
||||
|
||||
The filter block stores a sequence of filters, where filter i contains
|
||||
the output of FilterPolicy::CreateFilter() on all keys that are stored
|
||||
in a block whose file offset falls within the range
|
||||
|
||||
[ i*base ... (i+1)*base-1 ]
|
||||
|
||||
Currently, "base" is 2KB. So for example, if blocks X and Y start in
|
||||
the range [ 0KB .. 2KB-1 ], all of the keys in X and Y will be
|
||||
converted to a filter by calling FilterPolicy::CreateFilter(), and the
|
||||
resulting filter will be stored as the first filter in the filter
|
||||
block.
|
||||
|
||||
The filter block is formatted as follows:
|
||||
|
||||
[filter 0]
|
||||
[filter 1]
|
||||
[filter 2]
|
||||
...
|
||||
[filter N-1]
|
||||
|
||||
[offset of filter 0] : 4 bytes
|
||||
[offset of filter 1] : 4 bytes
|
||||
[offset of filter 2] : 4 bytes
|
||||
...
|
||||
[offset of filter N-1] : 4 bytes
|
||||
|
||||
[offset of beginning of offset array] : 4 bytes
|
||||
lg(base) : 1 byte
|
||||
|
||||
The offset array at the end of the filter block allows efficient
|
||||
mapping from a data block offset to the corresponding filter.
|
||||
|
||||
"stats" Meta Block
|
||||
------------------
|
||||
|
||||
This meta block contains a bunch of stats. The key is the name
|
||||
of the statistic. The value contains the statistic.
|
||||
TODO(postrelease): record following stats.
|
||||
data size
|
||||
index size
|
||||
key size (uncompressed)
|
||||
value size (uncompressed)
|
||||
number of entries
|
||||
number of data blocks
|
@ -4,14 +4,18 @@
|
||||
|
||||
#include "helpers/memenv/memenv.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/status.h"
|
||||
#include "port/port.h"
|
||||
#include "port/thread_annotations.h"
|
||||
#include "util/mutexlock.h"
|
||||
#include <map>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
@ -23,6 +27,10 @@ class FileState {
|
||||
// and the caller must call Ref() at least once.
|
||||
FileState() : refs_(0), size_(0) {}
|
||||
|
||||
// No copying allowed.
|
||||
FileState(const FileState&) = delete;
|
||||
FileState& operator=(const FileState&) = delete;
|
||||
|
||||
// Increase the reference count.
|
||||
void Ref() {
|
||||
MutexLock lock(&refs_mutex_);
|
||||
@ -47,9 +55,22 @@ class FileState {
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t Size() const { return size_; }
|
||||
uint64_t Size() const {
|
||||
MutexLock lock(&blocks_mutex_);
|
||||
return size_;
|
||||
}
|
||||
|
||||
void Truncate() {
|
||||
MutexLock lock(&blocks_mutex_);
|
||||
for (char*& block : blocks_) {
|
||||
delete[] block;
|
||||
}
|
||||
blocks_.clear();
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const {
|
||||
MutexLock lock(&blocks_mutex_);
|
||||
if (offset > size_) {
|
||||
return Status::IOError("Offset greater than file size.");
|
||||
}
|
||||
@ -62,16 +83,9 @@ class FileState {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
assert(offset / kBlockSize <= SIZE_MAX);
|
||||
assert(offset / kBlockSize <= std::numeric_limits<size_t>::max());
|
||||
size_t block = static_cast<size_t>(offset / kBlockSize);
|
||||
size_t block_offset = offset % kBlockSize;
|
||||
|
||||
if (n <= kBlockSize - block_offset) {
|
||||
// The requested bytes are all in the first block.
|
||||
*result = Slice(blocks_[block] + block_offset, n);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t bytes_to_copy = n;
|
||||
char* dst = scratch;
|
||||
|
||||
@ -96,6 +110,7 @@ class FileState {
|
||||
const char* src = data.data();
|
||||
size_t src_len = data.size();
|
||||
|
||||
MutexLock lock(&blocks_mutex_);
|
||||
while (src_len > 0) {
|
||||
size_t avail;
|
||||
size_t offset = size_ % kBlockSize;
|
||||
@ -122,28 +137,17 @@ class FileState {
|
||||
}
|
||||
|
||||
private:
|
||||
// Private since only Unref() should be used to delete it.
|
||||
~FileState() {
|
||||
for (std::vector<char*>::iterator i = blocks_.begin(); i != blocks_.end();
|
||||
++i) {
|
||||
delete [] *i;
|
||||
}
|
||||
}
|
||||
enum { kBlockSize = 8 * 1024 };
|
||||
|
||||
// No copying allowed.
|
||||
FileState(const FileState&);
|
||||
void operator=(const FileState&);
|
||||
// Private since only Unref() should be used to delete it.
|
||||
~FileState() { Truncate(); }
|
||||
|
||||
port::Mutex refs_mutex_;
|
||||
int refs_; // Protected by refs_mutex_;
|
||||
int refs_ GUARDED_BY(refs_mutex_);
|
||||
|
||||
// The following fields are not protected by any mutex. They are only mutable
|
||||
// while the file is being written, and concurrent access is not allowed
|
||||
// to writable files.
|
||||
std::vector<char*> blocks_;
|
||||
uint64_t size_;
|
||||
|
||||
enum { kBlockSize = 8 * 1024 };
|
||||
mutable port::Mutex blocks_mutex_;
|
||||
std::vector<char*> blocks_ GUARDED_BY(blocks_mutex_);
|
||||
uint64_t size_ GUARDED_BY(blocks_mutex_);
|
||||
};
|
||||
|
||||
class SequentialFileImpl : public SequentialFile {
|
||||
@ -152,11 +156,9 @@ class SequentialFileImpl : public SequentialFile {
|
||||
file_->Ref();
|
||||
}
|
||||
|
||||
~SequentialFileImpl() {
|
||||
file_->Unref();
|
||||
}
|
||||
~SequentialFileImpl() override { file_->Unref(); }
|
||||
|
||||
virtual Status Read(size_t n, Slice* result, char* scratch) {
|
||||
Status Read(size_t n, Slice* result, char* scratch) override {
|
||||
Status s = file_->Read(pos_, n, result, scratch);
|
||||
if (s.ok()) {
|
||||
pos_ += result->size();
|
||||
@ -164,7 +166,7 @@ class SequentialFileImpl : public SequentialFile {
|
||||
return s;
|
||||
}
|
||||
|
||||
virtual Status Skip(uint64_t n) {
|
||||
Status Skip(uint64_t n) override {
|
||||
if (pos_ > file_->Size()) {
|
||||
return Status::IOError("pos_ > file_->Size()");
|
||||
}
|
||||
@ -183,16 +185,12 @@ class SequentialFileImpl : public SequentialFile {
|
||||
|
||||
class RandomAccessFileImpl : public RandomAccessFile {
|
||||
public:
|
||||
explicit RandomAccessFileImpl(FileState* file) : file_(file) {
|
||||
file_->Ref();
|
||||
}
|
||||
explicit RandomAccessFileImpl(FileState* file) : file_(file) { file_->Ref(); }
|
||||
|
||||
~RandomAccessFileImpl() {
|
||||
file_->Unref();
|
||||
}
|
||||
~RandomAccessFileImpl() override { file_->Unref(); }
|
||||
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const {
|
||||
Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const override {
|
||||
return file_->Read(offset, n, result, scratch);
|
||||
}
|
||||
|
||||
@ -202,21 +200,15 @@ class RandomAccessFileImpl : public RandomAccessFile {
|
||||
|
||||
class WritableFileImpl : public WritableFile {
|
||||
public:
|
||||
WritableFileImpl(FileState* file) : file_(file) {
|
||||
file_->Ref();
|
||||
}
|
||||
WritableFileImpl(FileState* file) : file_(file) { file_->Ref(); }
|
||||
|
||||
~WritableFileImpl() {
|
||||
file_->Unref();
|
||||
}
|
||||
~WritableFileImpl() override { file_->Unref(); }
|
||||
|
||||
virtual Status Append(const Slice& data) {
|
||||
return file_->Append(data);
|
||||
}
|
||||
Status Append(const Slice& data) override { return file_->Append(data); }
|
||||
|
||||
virtual Status Close() { return Status::OK(); }
|
||||
virtual Status Flush() { return Status::OK(); }
|
||||
virtual Status Sync() { return Status::OK(); }
|
||||
Status Close() override { return Status::OK(); }
|
||||
Status Flush() override { return Status::OK(); }
|
||||
Status Sync() override { return Status::OK(); }
|
||||
|
||||
private:
|
||||
FileState* file_;
|
||||
@ -224,25 +216,25 @@ class WritableFileImpl : public WritableFile {
|
||||
|
||||
class NoOpLogger : public Logger {
|
||||
public:
|
||||
virtual void Logv(const char* format, va_list ap) { }
|
||||
void Logv(const char* format, va_list ap) override {}
|
||||
};
|
||||
|
||||
class InMemoryEnv : public EnvWrapper {
|
||||
public:
|
||||
explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) { }
|
||||
explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) {}
|
||||
|
||||
virtual ~InMemoryEnv() {
|
||||
for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){
|
||||
i->second->Unref();
|
||||
~InMemoryEnv() override {
|
||||
for (const auto& kvp : file_map_) {
|
||||
kvp.second->Unref();
|
||||
}
|
||||
}
|
||||
|
||||
// Partial implementation of the Env interface.
|
||||
virtual Status NewSequentialFile(const std::string& fname,
|
||||
SequentialFile** result) {
|
||||
Status NewSequentialFile(const std::string& fname,
|
||||
SequentialFile** result) override {
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(fname) == file_map_.end()) {
|
||||
*result = NULL;
|
||||
*result = nullptr;
|
||||
return Status::IOError(fname, "File not found");
|
||||
}
|
||||
|
||||
@ -250,11 +242,11 @@ class InMemoryEnv : public EnvWrapper {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status NewRandomAccessFile(const std::string& fname,
|
||||
RandomAccessFile** result) {
|
||||
Status NewRandomAccessFile(const std::string& fname,
|
||||
RandomAccessFile** result) override {
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(fname) == file_map_.end()) {
|
||||
*result = NULL;
|
||||
*result = nullptr;
|
||||
return Status::IOError(fname, "File not found");
|
||||
}
|
||||
|
||||
@ -262,33 +254,51 @@ class InMemoryEnv : public EnvWrapper {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status NewWritableFile(const std::string& fname,
|
||||
WritableFile** result) {
|
||||
Status NewWritableFile(const std::string& fname,
|
||||
WritableFile** result) override {
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(fname) != file_map_.end()) {
|
||||
DeleteFileInternal(fname);
|
||||
}
|
||||
FileSystem::iterator it = file_map_.find(fname);
|
||||
|
||||
FileState* file = new FileState();
|
||||
file->Ref();
|
||||
file_map_[fname] = file;
|
||||
FileState* file;
|
||||
if (it == file_map_.end()) {
|
||||
// File is not currently open.
|
||||
file = new FileState();
|
||||
file->Ref();
|
||||
file_map_[fname] = file;
|
||||
} else {
|
||||
file = it->second;
|
||||
file->Truncate();
|
||||
}
|
||||
|
||||
*result = new WritableFileImpl(file);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual bool FileExists(const std::string& fname) {
|
||||
Status NewAppendableFile(const std::string& fname,
|
||||
WritableFile** result) override {
|
||||
MutexLock lock(&mutex_);
|
||||
FileState** sptr = &file_map_[fname];
|
||||
FileState* file = *sptr;
|
||||
if (file == nullptr) {
|
||||
file = new FileState();
|
||||
file->Ref();
|
||||
}
|
||||
*result = new WritableFileImpl(file);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool FileExists(const std::string& fname) override {
|
||||
MutexLock lock(&mutex_);
|
||||
return file_map_.find(fname) != file_map_.end();
|
||||
}
|
||||
|
||||
virtual Status GetChildren(const std::string& dir,
|
||||
std::vector<std::string>* result) {
|
||||
Status GetChildren(const std::string& dir,
|
||||
std::vector<std::string>* result) override {
|
||||
MutexLock lock(&mutex_);
|
||||
result->clear();
|
||||
|
||||
for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){
|
||||
const std::string& filename = i->first;
|
||||
for (const auto& kvp : file_map_) {
|
||||
const std::string& filename = kvp.first;
|
||||
|
||||
if (filename.size() >= dir.size() + 1 && filename[dir.size()] == '/' &&
|
||||
Slice(filename).starts_with(Slice(dir))) {
|
||||
@ -299,7 +309,8 @@ class InMemoryEnv : public EnvWrapper {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void DeleteFileInternal(const std::string& fname) {
|
||||
void DeleteFileInternal(const std::string& fname)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
|
||||
if (file_map_.find(fname) == file_map_.end()) {
|
||||
return;
|
||||
}
|
||||
@ -308,7 +319,7 @@ class InMemoryEnv : public EnvWrapper {
|
||||
file_map_.erase(fname);
|
||||
}
|
||||
|
||||
virtual Status DeleteFile(const std::string& fname) {
|
||||
Status DeleteFile(const std::string& fname) override {
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(fname) == file_map_.end()) {
|
||||
return Status::IOError(fname, "File not found");
|
||||
@ -318,15 +329,11 @@ class InMemoryEnv : public EnvWrapper {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status CreateDir(const std::string& dirname) {
|
||||
return Status::OK();
|
||||
}
|
||||
Status CreateDir(const std::string& dirname) override { return Status::OK(); }
|
||||
|
||||
virtual Status DeleteDir(const std::string& dirname) {
|
||||
return Status::OK();
|
||||
}
|
||||
Status DeleteDir(const std::string& dirname) override { return Status::OK(); }
|
||||
|
||||
virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) {
|
||||
Status GetFileSize(const std::string& fname, uint64_t* file_size) override {
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(fname) == file_map_.end()) {
|
||||
return Status::IOError(fname, "File not found");
|
||||
@ -336,8 +343,8 @@ class InMemoryEnv : public EnvWrapper {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status RenameFile(const std::string& src,
|
||||
const std::string& target) {
|
||||
Status RenameFile(const std::string& src,
|
||||
const std::string& target) override {
|
||||
MutexLock lock(&mutex_);
|
||||
if (file_map_.find(src) == file_map_.end()) {
|
||||
return Status::IOError(src, "File not found");
|
||||
@ -349,22 +356,22 @@ class InMemoryEnv : public EnvWrapper {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status LockFile(const std::string& fname, FileLock** lock) {
|
||||
Status LockFile(const std::string& fname, FileLock** lock) override {
|
||||
*lock = new FileLock;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status UnlockFile(FileLock* lock) {
|
||||
Status UnlockFile(FileLock* lock) override {
|
||||
delete lock;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status GetTestDirectory(std::string* path) {
|
||||
Status GetTestDirectory(std::string* path) override {
|
||||
*path = "/test";
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status NewLogger(const std::string& fname, Logger** result) {
|
||||
Status NewLogger(const std::string& fname, Logger** result) override {
|
||||
*result = new NoOpLogger;
|
||||
return Status::OK();
|
||||
}
|
||||
@ -372,14 +379,13 @@ class InMemoryEnv : public EnvWrapper {
|
||||
private:
|
||||
// Map from filenames to FileState objects, representing a simple file system.
|
||||
typedef std::map<std::string, FileState*> FileSystem;
|
||||
|
||||
port::Mutex mutex_;
|
||||
FileSystem file_map_; // Protected by mutex_.
|
||||
FileSystem file_map_ GUARDED_BY(mutex_);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Env* NewMemEnv(Env* base_env) {
|
||||
return new InMemoryEnv(base_env);
|
||||
}
|
||||
Env* NewMemEnv(Env* base_env) { return new InMemoryEnv(base_env); }
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -5,6 +5,8 @@
|
||||
#ifndef STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
|
||||
#define STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
|
||||
|
||||
#include "leveldb/export.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Env;
|
||||
@ -13,7 +15,7 @@ class Env;
|
||||
// all non-file-storage tasks to base_env. The caller must delete the result
|
||||
// when it is no longer needed.
|
||||
// *base_env must remain live while the result is in use.
|
||||
Env* NewMemEnv(Env* base_env);
|
||||
LEVELDB_EXPORT Env* NewMemEnv(Env* base_env);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -4,25 +4,22 @@
|
||||
|
||||
#include "helpers/memenv/memenv.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/env.h"
|
||||
#include "util/testharness.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class MemEnvTest {
|
||||
public:
|
||||
Env* env_;
|
||||
MemEnvTest() : env_(NewMemEnv(Env::Default())) {}
|
||||
~MemEnvTest() { delete env_; }
|
||||
|
||||
MemEnvTest()
|
||||
: env_(NewMemEnv(Env::Default())) {
|
||||
}
|
||||
~MemEnvTest() {
|
||||
delete env_;
|
||||
}
|
||||
Env* env_;
|
||||
};
|
||||
|
||||
TEST(MemEnvTest, Basics) {
|
||||
@ -40,6 +37,8 @@ TEST(MemEnvTest, Basics) {
|
||||
|
||||
// Create a file.
|
||||
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
|
||||
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
|
||||
ASSERT_EQ(0, file_size);
|
||||
delete writable_file;
|
||||
|
||||
// Check that the file exists.
|
||||
@ -55,9 +54,16 @@ TEST(MemEnvTest, Basics) {
|
||||
ASSERT_OK(writable_file->Append("abc"));
|
||||
delete writable_file;
|
||||
|
||||
// Check for expected size.
|
||||
// Check that append works.
|
||||
ASSERT_OK(env_->NewAppendableFile("/dir/f", &writable_file));
|
||||
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
|
||||
ASSERT_EQ(3, file_size);
|
||||
ASSERT_OK(writable_file->Append("hello"));
|
||||
delete writable_file;
|
||||
|
||||
// Check for expected size.
|
||||
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
|
||||
ASSERT_EQ(8, file_size);
|
||||
|
||||
// Check that renaming works.
|
||||
ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok());
|
||||
@ -65,7 +71,7 @@ TEST(MemEnvTest, Basics) {
|
||||
ASSERT_TRUE(!env_->FileExists("/dir/f"));
|
||||
ASSERT_TRUE(env_->FileExists("/dir/g"));
|
||||
ASSERT_OK(env_->GetFileSize("/dir/g", &file_size));
|
||||
ASSERT_EQ(3, file_size);
|
||||
ASSERT_EQ(8, file_size);
|
||||
|
||||
// Check that opening non-existent file fails.
|
||||
SequentialFile* seq_file;
|
||||
@ -100,25 +106,25 @@ TEST(MemEnvTest, ReadWrite) {
|
||||
|
||||
// Read sequentially.
|
||||
ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file));
|
||||
ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello".
|
||||
ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello".
|
||||
ASSERT_EQ(0, result.compare("hello"));
|
||||
ASSERT_OK(seq_file->Skip(1));
|
||||
ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world".
|
||||
ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world".
|
||||
ASSERT_EQ(0, result.compare("world"));
|
||||
ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF.
|
||||
ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF.
|
||||
ASSERT_EQ(0, result.size());
|
||||
ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file.
|
||||
ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file.
|
||||
ASSERT_OK(seq_file->Read(1000, &result, scratch));
|
||||
ASSERT_EQ(0, result.size());
|
||||
delete seq_file;
|
||||
|
||||
// Random reads.
|
||||
ASSERT_OK(env_->NewRandomAccessFile("/dir/f", &rand_file));
|
||||
ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world".
|
||||
ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world".
|
||||
ASSERT_EQ(0, result.compare("world"));
|
||||
ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello".
|
||||
ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello".
|
||||
ASSERT_EQ(0, result.compare("hello"));
|
||||
ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d".
|
||||
ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d".
|
||||
ASSERT_EQ(0, result.compare("d"));
|
||||
|
||||
// Too high offset.
|
||||
@ -167,7 +173,7 @@ TEST(MemEnvTest, LargeWrite) {
|
||||
SequentialFile* seq_file;
|
||||
Slice result;
|
||||
ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file));
|
||||
ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo".
|
||||
ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo".
|
||||
ASSERT_EQ(0, result.compare("foo"));
|
||||
|
||||
size_t read = 0;
|
||||
@ -179,7 +185,30 @@ TEST(MemEnvTest, LargeWrite) {
|
||||
}
|
||||
ASSERT_TRUE(write_data == read_data);
|
||||
delete seq_file;
|
||||
delete [] scratch;
|
||||
delete[] scratch;
|
||||
}
|
||||
|
||||
TEST(MemEnvTest, OverwriteOpenFile) {
|
||||
const char kWrite1Data[] = "Write #1 data";
|
||||
const size_t kFileDataLen = sizeof(kWrite1Data) - 1;
|
||||
const std::string kTestFileName = test::TmpDir() + "/leveldb-TestFile.dat";
|
||||
|
||||
ASSERT_OK(WriteStringToFile(env_, kWrite1Data, kTestFileName));
|
||||
|
||||
RandomAccessFile* rand_file;
|
||||
ASSERT_OK(env_->NewRandomAccessFile(kTestFileName, &rand_file));
|
||||
|
||||
const char kWrite2Data[] = "Write #2 data";
|
||||
ASSERT_OK(WriteStringToFile(env_, kWrite2Data, kTestFileName));
|
||||
|
||||
// Verify that overwriting an open file will result in the new file data
|
||||
// being read from files opened before the write.
|
||||
Slice result;
|
||||
char scratch[kFileDataLen];
|
||||
ASSERT_OK(rand_file->Read(0, kFileDataLen, &result, scratch));
|
||||
ASSERT_EQ(0, result.compare(kWrite2Data));
|
||||
|
||||
delete rand_file;
|
||||
}
|
||||
|
||||
TEST(MemEnvTest, DBTest) {
|
||||
@ -227,6 +256,4 @@ TEST(MemEnvTest, DBTest) {
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -48,225 +48,205 @@ extern "C" {
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
|
||||
/* Exported types */
|
||||
|
||||
typedef struct leveldb_t leveldb_t;
|
||||
typedef struct leveldb_cache_t leveldb_cache_t;
|
||||
typedef struct leveldb_comparator_t leveldb_comparator_t;
|
||||
typedef struct leveldb_env_t leveldb_env_t;
|
||||
typedef struct leveldb_filelock_t leveldb_filelock_t;
|
||||
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
|
||||
typedef struct leveldb_iterator_t leveldb_iterator_t;
|
||||
typedef struct leveldb_logger_t leveldb_logger_t;
|
||||
typedef struct leveldb_options_t leveldb_options_t;
|
||||
typedef struct leveldb_randomfile_t leveldb_randomfile_t;
|
||||
typedef struct leveldb_readoptions_t leveldb_readoptions_t;
|
||||
typedef struct leveldb_seqfile_t leveldb_seqfile_t;
|
||||
typedef struct leveldb_snapshot_t leveldb_snapshot_t;
|
||||
typedef struct leveldb_writablefile_t leveldb_writablefile_t;
|
||||
typedef struct leveldb_writebatch_t leveldb_writebatch_t;
|
||||
typedef struct leveldb_writeoptions_t leveldb_writeoptions_t;
|
||||
typedef struct leveldb_t leveldb_t;
|
||||
typedef struct leveldb_cache_t leveldb_cache_t;
|
||||
typedef struct leveldb_comparator_t leveldb_comparator_t;
|
||||
typedef struct leveldb_env_t leveldb_env_t;
|
||||
typedef struct leveldb_filelock_t leveldb_filelock_t;
|
||||
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
|
||||
typedef struct leveldb_iterator_t leveldb_iterator_t;
|
||||
typedef struct leveldb_logger_t leveldb_logger_t;
|
||||
typedef struct leveldb_options_t leveldb_options_t;
|
||||
typedef struct leveldb_randomfile_t leveldb_randomfile_t;
|
||||
typedef struct leveldb_readoptions_t leveldb_readoptions_t;
|
||||
typedef struct leveldb_seqfile_t leveldb_seqfile_t;
|
||||
typedef struct leveldb_snapshot_t leveldb_snapshot_t;
|
||||
typedef struct leveldb_writablefile_t leveldb_writablefile_t;
|
||||
typedef struct leveldb_writebatch_t leveldb_writebatch_t;
|
||||
typedef struct leveldb_writeoptions_t leveldb_writeoptions_t;
|
||||
|
||||
/* DB operations */
|
||||
|
||||
extern leveldb_t* leveldb_open(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT leveldb_t* leveldb_open(const leveldb_options_t* options,
|
||||
const char* name, char** errptr);
|
||||
|
||||
extern void leveldb_close(leveldb_t* db);
|
||||
LEVELDB_EXPORT void leveldb_close(leveldb_t* db);
|
||||
|
||||
extern void leveldb_put(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
const char* val, size_t vallen,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT void leveldb_put(leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen, const char* val,
|
||||
size_t vallen, char** errptr);
|
||||
|
||||
extern void leveldb_delete(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT void leveldb_delete(leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
char** errptr);
|
||||
|
||||
extern void leveldb_write(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT void leveldb_write(leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch, char** errptr);
|
||||
|
||||
/* Returns NULL if not found. A malloc()ed array otherwise.
|
||||
Stores the length of the array in *vallen. */
|
||||
extern char* leveldb_get(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
size_t* vallen,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT char* leveldb_get(leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
const char* key, size_t keylen, size_t* vallen,
|
||||
char** errptr);
|
||||
|
||||
extern leveldb_iterator_t* leveldb_create_iterator(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options);
|
||||
LEVELDB_EXPORT leveldb_iterator_t* leveldb_create_iterator(
|
||||
leveldb_t* db, const leveldb_readoptions_t* options);
|
||||
|
||||
extern const leveldb_snapshot_t* leveldb_create_snapshot(
|
||||
leveldb_t* db);
|
||||
LEVELDB_EXPORT const leveldb_snapshot_t* leveldb_create_snapshot(leveldb_t* db);
|
||||
|
||||
extern void leveldb_release_snapshot(
|
||||
leveldb_t* db,
|
||||
const leveldb_snapshot_t* snapshot);
|
||||
LEVELDB_EXPORT void leveldb_release_snapshot(
|
||||
leveldb_t* db, const leveldb_snapshot_t* snapshot);
|
||||
|
||||
/* Returns NULL if property name is unknown.
|
||||
Else returns a pointer to a malloc()-ed null-terminated value. */
|
||||
extern char* leveldb_property_value(
|
||||
leveldb_t* db,
|
||||
const char* propname);
|
||||
LEVELDB_EXPORT char* leveldb_property_value(leveldb_t* db,
|
||||
const char* propname);
|
||||
|
||||
extern void leveldb_approximate_sizes(
|
||||
leveldb_t* db,
|
||||
int num_ranges,
|
||||
const char* const* range_start_key, const size_t* range_start_key_len,
|
||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||
uint64_t* sizes);
|
||||
LEVELDB_EXPORT void leveldb_approximate_sizes(
|
||||
leveldb_t* db, int num_ranges, const char* const* range_start_key,
|
||||
const size_t* range_start_key_len, const char* const* range_limit_key,
|
||||
const size_t* range_limit_key_len, uint64_t* sizes);
|
||||
|
||||
extern void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len);
|
||||
LEVELDB_EXPORT void leveldb_compact_range(leveldb_t* db, const char* start_key,
|
||||
size_t start_key_len,
|
||||
const char* limit_key,
|
||||
size_t limit_key_len);
|
||||
|
||||
/* Management operations */
|
||||
|
||||
extern void leveldb_destroy_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT void leveldb_destroy_db(const leveldb_options_t* options,
|
||||
const char* name, char** errptr);
|
||||
|
||||
extern void leveldb_repair_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr);
|
||||
LEVELDB_EXPORT void leveldb_repair_db(const leveldb_options_t* options,
|
||||
const char* name, char** errptr);
|
||||
|
||||
/* Iterator */
|
||||
|
||||
extern void leveldb_iter_destroy(leveldb_iterator_t*);
|
||||
extern unsigned char leveldb_iter_valid(const leveldb_iterator_t*);
|
||||
extern void leveldb_iter_seek_to_first(leveldb_iterator_t*);
|
||||
extern void leveldb_iter_seek_to_last(leveldb_iterator_t*);
|
||||
extern void leveldb_iter_seek(leveldb_iterator_t*, const char* k, size_t klen);
|
||||
extern void leveldb_iter_next(leveldb_iterator_t*);
|
||||
extern void leveldb_iter_prev(leveldb_iterator_t*);
|
||||
extern const char* leveldb_iter_key(const leveldb_iterator_t*, size_t* klen);
|
||||
extern const char* leveldb_iter_value(const leveldb_iterator_t*, size_t* vlen);
|
||||
extern void leveldb_iter_get_error(const leveldb_iterator_t*, char** errptr);
|
||||
LEVELDB_EXPORT void leveldb_iter_destroy(leveldb_iterator_t*);
|
||||
LEVELDB_EXPORT unsigned char leveldb_iter_valid(const leveldb_iterator_t*);
|
||||
LEVELDB_EXPORT void leveldb_iter_seek_to_first(leveldb_iterator_t*);
|
||||
LEVELDB_EXPORT void leveldb_iter_seek_to_last(leveldb_iterator_t*);
|
||||
LEVELDB_EXPORT void leveldb_iter_seek(leveldb_iterator_t*, const char* k,
|
||||
size_t klen);
|
||||
LEVELDB_EXPORT void leveldb_iter_next(leveldb_iterator_t*);
|
||||
LEVELDB_EXPORT void leveldb_iter_prev(leveldb_iterator_t*);
|
||||
LEVELDB_EXPORT const char* leveldb_iter_key(const leveldb_iterator_t*,
|
||||
size_t* klen);
|
||||
LEVELDB_EXPORT const char* leveldb_iter_value(const leveldb_iterator_t*,
|
||||
size_t* vlen);
|
||||
LEVELDB_EXPORT void leveldb_iter_get_error(const leveldb_iterator_t*,
|
||||
char** errptr);
|
||||
|
||||
/* Write batch */
|
||||
|
||||
extern leveldb_writebatch_t* leveldb_writebatch_create();
|
||||
extern void leveldb_writebatch_destroy(leveldb_writebatch_t*);
|
||||
extern void leveldb_writebatch_clear(leveldb_writebatch_t*);
|
||||
extern void leveldb_writebatch_put(
|
||||
leveldb_writebatch_t*,
|
||||
const char* key, size_t klen,
|
||||
const char* val, size_t vlen);
|
||||
extern void leveldb_writebatch_delete(
|
||||
leveldb_writebatch_t*,
|
||||
const char* key, size_t klen);
|
||||
extern void leveldb_writebatch_iterate(
|
||||
leveldb_writebatch_t*,
|
||||
void* state,
|
||||
LEVELDB_EXPORT leveldb_writebatch_t* leveldb_writebatch_create(void);
|
||||
LEVELDB_EXPORT void leveldb_writebatch_destroy(leveldb_writebatch_t*);
|
||||
LEVELDB_EXPORT void leveldb_writebatch_clear(leveldb_writebatch_t*);
|
||||
LEVELDB_EXPORT void leveldb_writebatch_put(leveldb_writebatch_t*,
|
||||
const char* key, size_t klen,
|
||||
const char* val, size_t vlen);
|
||||
LEVELDB_EXPORT void leveldb_writebatch_delete(leveldb_writebatch_t*,
|
||||
const char* key, size_t klen);
|
||||
LEVELDB_EXPORT void leveldb_writebatch_iterate(
|
||||
const leveldb_writebatch_t*, void* state,
|
||||
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
|
||||
void (*deleted)(void*, const char* k, size_t klen));
|
||||
LEVELDB_EXPORT void leveldb_writebatch_append(
|
||||
leveldb_writebatch_t* destination, const leveldb_writebatch_t* source);
|
||||
|
||||
/* Options */
|
||||
|
||||
extern leveldb_options_t* leveldb_options_create();
|
||||
extern void leveldb_options_destroy(leveldb_options_t*);
|
||||
extern void leveldb_options_set_comparator(
|
||||
leveldb_options_t*,
|
||||
leveldb_comparator_t*);
|
||||
extern void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t*,
|
||||
leveldb_filterpolicy_t*);
|
||||
extern void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_error_if_exists(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_paranoid_checks(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*);
|
||||
extern void leveldb_options_set_info_log(leveldb_options_t*, leveldb_logger_t*);
|
||||
extern void leveldb_options_set_write_buffer_size(leveldb_options_t*, size_t);
|
||||
extern void leveldb_options_set_max_open_files(leveldb_options_t*, int);
|
||||
extern void leveldb_options_set_cache(leveldb_options_t*, leveldb_cache_t*);
|
||||
extern void leveldb_options_set_block_size(leveldb_options_t*, size_t);
|
||||
extern void leveldb_options_set_block_restart_interval(leveldb_options_t*, int);
|
||||
LEVELDB_EXPORT leveldb_options_t* leveldb_options_create(void);
|
||||
LEVELDB_EXPORT void leveldb_options_destroy(leveldb_options_t*);
|
||||
LEVELDB_EXPORT void leveldb_options_set_comparator(leveldb_options_t*,
|
||||
leveldb_comparator_t*);
|
||||
LEVELDB_EXPORT void leveldb_options_set_filter_policy(leveldb_options_t*,
|
||||
leveldb_filterpolicy_t*);
|
||||
LEVELDB_EXPORT void leveldb_options_set_create_if_missing(leveldb_options_t*,
|
||||
unsigned char);
|
||||
LEVELDB_EXPORT void leveldb_options_set_error_if_exists(leveldb_options_t*,
|
||||
unsigned char);
|
||||
LEVELDB_EXPORT void leveldb_options_set_paranoid_checks(leveldb_options_t*,
|
||||
unsigned char);
|
||||
LEVELDB_EXPORT void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*);
|
||||
LEVELDB_EXPORT void leveldb_options_set_info_log(leveldb_options_t*,
|
||||
leveldb_logger_t*);
|
||||
LEVELDB_EXPORT void leveldb_options_set_write_buffer_size(leveldb_options_t*,
|
||||
size_t);
|
||||
LEVELDB_EXPORT void leveldb_options_set_max_open_files(leveldb_options_t*, int);
|
||||
LEVELDB_EXPORT void leveldb_options_set_cache(leveldb_options_t*,
|
||||
leveldb_cache_t*);
|
||||
LEVELDB_EXPORT void leveldb_options_set_block_size(leveldb_options_t*, size_t);
|
||||
LEVELDB_EXPORT void leveldb_options_set_block_restart_interval(
|
||||
leveldb_options_t*, int);
|
||||
LEVELDB_EXPORT void leveldb_options_set_max_file_size(leveldb_options_t*,
|
||||
size_t);
|
||||
|
||||
enum {
|
||||
leveldb_no_compression = 0,
|
||||
leveldb_snappy_compression = 1
|
||||
};
|
||||
extern void leveldb_options_set_compression(leveldb_options_t*, int);
|
||||
enum { leveldb_no_compression = 0, leveldb_snappy_compression = 1 };
|
||||
LEVELDB_EXPORT void leveldb_options_set_compression(leveldb_options_t*, int);
|
||||
|
||||
/* Comparator */
|
||||
|
||||
extern leveldb_comparator_t* leveldb_comparator_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
int (*compare)(
|
||||
void*,
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen),
|
||||
LEVELDB_EXPORT leveldb_comparator_t* leveldb_comparator_create(
|
||||
void* state, void (*destructor)(void*),
|
||||
int (*compare)(void*, const char* a, size_t alen, const char* b,
|
||||
size_t blen),
|
||||
const char* (*name)(void*));
|
||||
extern void leveldb_comparator_destroy(leveldb_comparator_t*);
|
||||
LEVELDB_EXPORT void leveldb_comparator_destroy(leveldb_comparator_t*);
|
||||
|
||||
/* Filter policy */
|
||||
|
||||
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
LEVELDB_EXPORT leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
void* state, void (*destructor)(void*),
|
||||
char* (*create_filter)(void*, const char* const* key_array,
|
||||
const size_t* key_length_array, int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(void*, const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*));
|
||||
extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
|
||||
LEVELDB_EXPORT void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
|
||||
|
||||
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
|
||||
LEVELDB_EXPORT leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
|
||||
int bits_per_key);
|
||||
|
||||
/* Read options */
|
||||
|
||||
extern leveldb_readoptions_t* leveldb_readoptions_create();
|
||||
extern void leveldb_readoptions_destroy(leveldb_readoptions_t*);
|
||||
extern void leveldb_readoptions_set_verify_checksums(
|
||||
leveldb_readoptions_t*,
|
||||
unsigned char);
|
||||
extern void leveldb_readoptions_set_fill_cache(
|
||||
LEVELDB_EXPORT leveldb_readoptions_t* leveldb_readoptions_create(void);
|
||||
LEVELDB_EXPORT void leveldb_readoptions_destroy(leveldb_readoptions_t*);
|
||||
LEVELDB_EXPORT void leveldb_readoptions_set_verify_checksums(
|
||||
leveldb_readoptions_t*, unsigned char);
|
||||
extern void leveldb_readoptions_set_snapshot(
|
||||
leveldb_readoptions_t*,
|
||||
const leveldb_snapshot_t*);
|
||||
LEVELDB_EXPORT void leveldb_readoptions_set_fill_cache(leveldb_readoptions_t*,
|
||||
unsigned char);
|
||||
LEVELDB_EXPORT void leveldb_readoptions_set_snapshot(leveldb_readoptions_t*,
|
||||
const leveldb_snapshot_t*);
|
||||
|
||||
/* Write options */
|
||||
|
||||
extern leveldb_writeoptions_t* leveldb_writeoptions_create();
|
||||
extern void leveldb_writeoptions_destroy(leveldb_writeoptions_t*);
|
||||
extern void leveldb_writeoptions_set_sync(
|
||||
leveldb_writeoptions_t*, unsigned char);
|
||||
LEVELDB_EXPORT leveldb_writeoptions_t* leveldb_writeoptions_create(void);
|
||||
LEVELDB_EXPORT void leveldb_writeoptions_destroy(leveldb_writeoptions_t*);
|
||||
LEVELDB_EXPORT void leveldb_writeoptions_set_sync(leveldb_writeoptions_t*,
|
||||
unsigned char);
|
||||
|
||||
/* Cache */
|
||||
|
||||
extern leveldb_cache_t* leveldb_cache_create_lru(size_t capacity);
|
||||
extern void leveldb_cache_destroy(leveldb_cache_t* cache);
|
||||
LEVELDB_EXPORT leveldb_cache_t* leveldb_cache_create_lru(size_t capacity);
|
||||
LEVELDB_EXPORT void leveldb_cache_destroy(leveldb_cache_t* cache);
|
||||
|
||||
/* Env */
|
||||
|
||||
extern leveldb_env_t* leveldb_create_default_env();
|
||||
extern void leveldb_env_destroy(leveldb_env_t*);
|
||||
LEVELDB_EXPORT leveldb_env_t* leveldb_create_default_env(void);
|
||||
LEVELDB_EXPORT void leveldb_env_destroy(leveldb_env_t*);
|
||||
|
||||
/* If not NULL, the returned buffer must be released using leveldb_free(). */
|
||||
LEVELDB_EXPORT char* leveldb_env_get_test_directory(leveldb_env_t*);
|
||||
|
||||
/* Utility */
|
||||
|
||||
@ -275,16 +255,16 @@ extern void leveldb_env_destroy(leveldb_env_t*);
|
||||
in this file. Note that in certain cases (typically on Windows), you
|
||||
may need to call this routine instead of free(ptr) to dispose of
|
||||
malloc()-ed memory returned by this library. */
|
||||
extern void leveldb_free(void* ptr);
|
||||
LEVELDB_EXPORT void leveldb_free(void* ptr);
|
||||
|
||||
/* Return the major version number for this release. */
|
||||
extern int leveldb_major_version();
|
||||
LEVELDB_EXPORT int leveldb_major_version(void);
|
||||
|
||||
/* Return the minor version number for this release. */
|
||||
extern int leveldb_minor_version();
|
||||
LEVELDB_EXPORT int leveldb_minor_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end extern "C" */
|
||||
} /* end extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */
|
||||
#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */
|
||||
|
@ -19,26 +19,31 @@
|
||||
#define STORAGE_LEVELDB_INCLUDE_CACHE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/slice.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Cache;
|
||||
class LEVELDB_EXPORT Cache;
|
||||
|
||||
// Create a new cache with a fixed size capacity. This implementation
|
||||
// of Cache uses a least-recently-used eviction policy.
|
||||
extern Cache* NewLRUCache(size_t capacity);
|
||||
LEVELDB_EXPORT Cache* NewLRUCache(size_t capacity);
|
||||
|
||||
class Cache {
|
||||
class LEVELDB_EXPORT Cache {
|
||||
public:
|
||||
Cache() { }
|
||||
Cache() = default;
|
||||
|
||||
Cache(const Cache&) = delete;
|
||||
Cache& operator=(const Cache&) = delete;
|
||||
|
||||
// Destroys all existing entries by calling the "deleter"
|
||||
// function that was passed to the constructor.
|
||||
virtual ~Cache();
|
||||
|
||||
// Opaque handle to an entry stored in the cache.
|
||||
struct Handle { };
|
||||
struct Handle {};
|
||||
|
||||
// Insert a mapping from key->value into the cache and assign it
|
||||
// the specified charge against the total cache capacity.
|
||||
@ -52,7 +57,7 @@ class Cache {
|
||||
virtual Handle* Insert(const Slice& key, void* value, size_t charge,
|
||||
void (*deleter)(const Slice& key, void* value)) = 0;
|
||||
|
||||
// If the cache has no mapping for "key", returns NULL.
|
||||
// If the cache has no mapping for "key", returns nullptr.
|
||||
//
|
||||
// Else return a handle that corresponds to the mapping. The caller
|
||||
// must call this->Release(handle) when the returned mapping is no
|
||||
@ -81,6 +86,17 @@ class Cache {
|
||||
// its cache keys.
|
||||
virtual uint64_t NewId() = 0;
|
||||
|
||||
// Remove all cache entries that are not actively in use. Memory-constrained
|
||||
// applications may wish to call this method to reduce memory usage.
|
||||
// Default implementation of Prune() does nothing. Subclasses are strongly
|
||||
// encouraged to override the default implementation. A future release of
|
||||
// leveldb may change Prune() to a pure abstract method.
|
||||
virtual void Prune() {}
|
||||
|
||||
// Return an estimate of the combined charges of all elements stored in the
|
||||
// cache.
|
||||
virtual size_t TotalCharge() const = 0;
|
||||
|
||||
private:
|
||||
void LRU_Remove(Handle* e);
|
||||
void LRU_Append(Handle* e);
|
||||
@ -88,10 +104,6 @@ class Cache {
|
||||
|
||||
struct Rep;
|
||||
Rep* rep_;
|
||||
|
||||
// No copying allowed
|
||||
Cache(const Cache&);
|
||||
void operator=(const Cache&);
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Slice;
|
||||
@ -15,7 +17,7 @@ class Slice;
|
||||
// used as keys in an sstable or a database. A Comparator implementation
|
||||
// must be thread-safe since leveldb may invoke its methods concurrently
|
||||
// from multiple threads.
|
||||
class Comparator {
|
||||
class LEVELDB_EXPORT Comparator {
|
||||
public:
|
||||
virtual ~Comparator();
|
||||
|
||||
@ -43,9 +45,8 @@ class Comparator {
|
||||
// If *start < limit, changes *start to a short string in [start,limit).
|
||||
// Simple comparator implementations may return with *start unchanged,
|
||||
// i.e., an implementation of this method that does nothing is correct.
|
||||
virtual void FindShortestSeparator(
|
||||
std::string* start,
|
||||
const Slice& limit) const = 0;
|
||||
virtual void FindShortestSeparator(std::string* start,
|
||||
const Slice& limit) const = 0;
|
||||
|
||||
// Changes *key to a short string >= *key.
|
||||
// Simple comparator implementations may return with *key unchanged,
|
||||
@ -56,7 +57,7 @@ class Comparator {
|
||||
// Return a builtin comparator that uses lexicographic byte-wise
|
||||
// ordering. The result remains the property of this module and
|
||||
// must not be deleted.
|
||||
extern const Comparator* BytewiseComparator();
|
||||
LEVELDB_EXPORT const Comparator* BytewiseComparator();
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -7,14 +7,16 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/iterator.h"
|
||||
#include "leveldb/options.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
// Update Makefile if you change these
|
||||
// Update CMakeLists.txt if you change these
|
||||
static const int kMajorVersion = 1;
|
||||
static const int kMinorVersion = 18;
|
||||
static const int kMinorVersion = 22;
|
||||
|
||||
struct Options;
|
||||
struct ReadOptions;
|
||||
@ -24,42 +26,44 @@ class WriteBatch;
|
||||
// Abstract handle to particular state of a DB.
|
||||
// A Snapshot is an immutable object and can therefore be safely
|
||||
// accessed from multiple threads without any external synchronization.
|
||||
class Snapshot {
|
||||
class LEVELDB_EXPORT Snapshot {
|
||||
protected:
|
||||
virtual ~Snapshot();
|
||||
};
|
||||
|
||||
// A range of keys
|
||||
struct Range {
|
||||
Slice start; // Included in the range
|
||||
Slice limit; // Not included in the range
|
||||
struct LEVELDB_EXPORT Range {
|
||||
Range() = default;
|
||||
Range(const Slice& s, const Slice& l) : start(s), limit(l) {}
|
||||
|
||||
Range() { }
|
||||
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
|
||||
Slice start; // Included in the range
|
||||
Slice limit; // Not included in the range
|
||||
};
|
||||
|
||||
// A DB is a persistent ordered map from keys to values.
|
||||
// A DB is safe for concurrent access from multiple threads without
|
||||
// any external synchronization.
|
||||
class DB {
|
||||
class LEVELDB_EXPORT DB {
|
||||
public:
|
||||
// Open the database with the specified "name".
|
||||
// Stores a pointer to a heap-allocated database in *dbptr and returns
|
||||
// OK on success.
|
||||
// Stores NULL in *dbptr and returns a non-OK status on error.
|
||||
// Stores nullptr in *dbptr and returns a non-OK status on error.
|
||||
// Caller should delete *dbptr when it is no longer needed.
|
||||
static Status Open(const Options& options,
|
||||
const std::string& name,
|
||||
static Status Open(const Options& options, const std::string& name,
|
||||
DB** dbptr);
|
||||
|
||||
DB() { }
|
||||
DB() = default;
|
||||
|
||||
DB(const DB&) = delete;
|
||||
DB& operator=(const DB&) = delete;
|
||||
|
||||
virtual ~DB();
|
||||
|
||||
// Set the database entry for "key" to "value". Returns OK on success,
|
||||
// and a non-OK status on error.
|
||||
// Note: consider setting options.sync = true.
|
||||
virtual Status Put(const WriteOptions& options,
|
||||
const Slice& key,
|
||||
virtual Status Put(const WriteOptions& options, const Slice& key,
|
||||
const Slice& value) = 0;
|
||||
|
||||
// Remove the database entry (if any) for "key". Returns OK on
|
||||
@ -80,8 +84,8 @@ class DB {
|
||||
// a status for which Status::IsNotFound() returns true.
|
||||
//
|
||||
// May return some other Status on an error.
|
||||
virtual Status Get(const ReadOptions& options,
|
||||
const Slice& key, std::string* value) = 0;
|
||||
virtual Status Get(const ReadOptions& options, const Slice& key,
|
||||
std::string* value) = 0;
|
||||
|
||||
// Return a heap-allocated iterator over the contents of the database.
|
||||
// The result of NewIterator() is initially invalid (caller must
|
||||
@ -115,6 +119,8 @@ class DB {
|
||||
// about the internal operation of the DB.
|
||||
// "leveldb.sstables" - returns a multi-line string that describes all
|
||||
// of the sstables that make up the db contents.
|
||||
// "leveldb.approximate-memory-usage" - returns the approximate number of
|
||||
// bytes of memory in use by the DB.
|
||||
virtual bool GetProperty(const Slice& property, std::string* value) = 0;
|
||||
|
||||
// For each i in [0,n-1], store in "sizes[i]", the approximate
|
||||
@ -134,27 +140,27 @@ class DB {
|
||||
// needed to access the data. This operation should typically only
|
||||
// be invoked by users who understand the underlying implementation.
|
||||
//
|
||||
// begin==NULL is treated as a key before all keys in the database.
|
||||
// end==NULL is treated as a key after all keys in the database.
|
||||
// begin==nullptr is treated as a key before all keys in the database.
|
||||
// end==nullptr is treated as a key after all keys in the database.
|
||||
// Therefore the following call will compact the entire database:
|
||||
// db->CompactRange(NULL, NULL);
|
||||
// db->CompactRange(nullptr, nullptr);
|
||||
virtual void CompactRange(const Slice* begin, const Slice* end) = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
DB(const DB&);
|
||||
void operator=(const DB&);
|
||||
};
|
||||
|
||||
// Destroy the contents of the specified database.
|
||||
// Be very careful using this method.
|
||||
Status DestroyDB(const std::string& name, const Options& options);
|
||||
//
|
||||
// Note: For backwards compatibility, if DestroyDB is unable to list the
|
||||
// database files, Status::OK() will still be returned masking this failure.
|
||||
LEVELDB_EXPORT Status DestroyDB(const std::string& name,
|
||||
const Options& options);
|
||||
|
||||
// If a DB cannot be opened, you may attempt to call this method to
|
||||
// resurrect as much of the contents of the database as possible.
|
||||
// Some data may be lost, so be careful when calling this function
|
||||
// on a database that contains important information.
|
||||
Status RepairDB(const std::string& dbname, const Options& options);
|
||||
LEVELDB_EXPORT Status RepairDB(const std::string& dbname,
|
||||
const Options& options);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -6,7 +6,9 @@
|
||||
#define STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/status.h"
|
||||
|
||||
namespace leveldb {
|
||||
@ -18,7 +20,8 @@ namespace leveldb {
|
||||
//
|
||||
// Returns a non-OK result if fname does not name a leveldb storage
|
||||
// file, or if the file cannot be read.
|
||||
Status DumpFile(Env* env, const std::string& fname, WritableFile* dst);
|
||||
LEVELDB_EXPORT Status DumpFile(Env* env, const std::string& fname,
|
||||
WritableFile* dst);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -13,12 +13,36 @@
|
||||
#ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_
|
||||
#define STORAGE_LEVELDB_INCLUDE_ENV_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/status.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
// The leveldb::Env class below contains a DeleteFile method.
|
||||
// At the same time, <windows.h>, a fairly popular header
|
||||
// file for Windows applications, defines a DeleteFile macro.
|
||||
//
|
||||
// Without any intervention on our part, the result of this
|
||||
// unfortunate coincidence is that the name of the
|
||||
// leveldb::Env::DeleteFile method seen by the compiler depends on
|
||||
// whether <windows.h> was included before or after the LevelDB
|
||||
// headers.
|
||||
//
|
||||
// To avoid headaches, we undefined DeleteFile (if defined) and
|
||||
// redefine it at the bottom of this file. This way <windows.h>
|
||||
// can be included before this file (or not at all) and the
|
||||
// exported method will always be leveldb::Env::DeleteFile.
|
||||
#if defined(DeleteFile)
|
||||
#undef DeleteFile
|
||||
#define LEVELDB_DELETEFILE_UNDEFINED
|
||||
#endif // defined(DeleteFile)
|
||||
#endif // defined(_WIN32)
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class FileLock;
|
||||
@ -28,9 +52,13 @@ class SequentialFile;
|
||||
class Slice;
|
||||
class WritableFile;
|
||||
|
||||
class Env {
|
||||
class LEVELDB_EXPORT Env {
|
||||
public:
|
||||
Env() { }
|
||||
Env() = default;
|
||||
|
||||
Env(const Env&) = delete;
|
||||
Env& operator=(const Env&) = delete;
|
||||
|
||||
virtual ~Env();
|
||||
|
||||
// Return a default environment suitable for the current operating
|
||||
@ -40,20 +68,22 @@ class Env {
|
||||
// The result of Default() belongs to leveldb and must never be deleted.
|
||||
static Env* Default();
|
||||
|
||||
// Create a brand new sequentially-readable file with the specified name.
|
||||
// Create an object that sequentially reads the file with the specified name.
|
||||
// On success, stores a pointer to the new file in *result and returns OK.
|
||||
// On failure stores NULL in *result and returns non-OK. If the file does
|
||||
// not exist, returns a non-OK status.
|
||||
// On failure stores nullptr in *result and returns non-OK. If the file does
|
||||
// not exist, returns a non-OK status. Implementations should return a
|
||||
// NotFound status when the file does not exist.
|
||||
//
|
||||
// The returned file will only be accessed by one thread at a time.
|
||||
virtual Status NewSequentialFile(const std::string& fname,
|
||||
SequentialFile** result) = 0;
|
||||
|
||||
// Create a brand new random access read-only file with the
|
||||
// Create an object supporting random-access reads from the file with the
|
||||
// specified name. On success, stores a pointer to the new file in
|
||||
// *result and returns OK. On failure stores NULL in *result and
|
||||
// *result and returns OK. On failure stores nullptr in *result and
|
||||
// returns non-OK. If the file does not exist, returns a non-OK
|
||||
// status.
|
||||
// status. Implementations should return a NotFound status when the file does
|
||||
// not exist.
|
||||
//
|
||||
// The returned file may be concurrently accessed by multiple threads.
|
||||
virtual Status NewRandomAccessFile(const std::string& fname,
|
||||
@ -62,13 +92,28 @@ class Env {
|
||||
// Create an object that writes to a new file with the specified
|
||||
// name. Deletes any existing file with the same name and creates a
|
||||
// new file. On success, stores a pointer to the new file in
|
||||
// *result and returns OK. On failure stores NULL in *result and
|
||||
// *result and returns OK. On failure stores nullptr in *result and
|
||||
// returns non-OK.
|
||||
//
|
||||
// The returned file will only be accessed by one thread at a time.
|
||||
virtual Status NewWritableFile(const std::string& fname,
|
||||
WritableFile** result) = 0;
|
||||
|
||||
// Create an object that either appends to an existing file, or
|
||||
// writes to a new file (if the file does not exist to begin with).
|
||||
// On success, stores a pointer to the new file in *result and
|
||||
// returns OK. On failure stores nullptr in *result and returns
|
||||
// non-OK.
|
||||
//
|
||||
// The returned file will only be accessed by one thread at a time.
|
||||
//
|
||||
// May return an IsNotSupportedError error if this Env does
|
||||
// not allow appending to an existing file. Users of Env (including
|
||||
// the leveldb implementation) must be prepared to deal with
|
||||
// an Env that does not support appending.
|
||||
virtual Status NewAppendableFile(const std::string& fname,
|
||||
WritableFile** result);
|
||||
|
||||
// Returns true iff the named file exists.
|
||||
virtual bool FileExists(const std::string& fname) = 0;
|
||||
|
||||
@ -95,7 +140,7 @@ class Env {
|
||||
const std::string& target) = 0;
|
||||
|
||||
// Lock the specified file. Used to prevent concurrent access to
|
||||
// the same db by multiple processes. On failure, stores NULL in
|
||||
// the same db by multiple processes. On failure, stores nullptr in
|
||||
// *lock and returns non-OK.
|
||||
//
|
||||
// On success, stores a pointer to the object that represents the
|
||||
@ -121,16 +166,14 @@ class Env {
|
||||
// added to the same Env may run concurrently in different threads.
|
||||
// I.e., the caller may not assume that background work items are
|
||||
// serialized.
|
||||
virtual void Schedule(
|
||||
void (*function)(void* arg),
|
||||
void* arg) = 0;
|
||||
virtual void Schedule(void (*function)(void* arg), void* arg) = 0;
|
||||
|
||||
// Start a new thread, invoking "function(arg)" within the new thread.
|
||||
// When "function(arg)" returns, the thread will be destroyed.
|
||||
virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
|
||||
|
||||
// *path is set to a temporary directory that can be used for testing. It may
|
||||
// or many not have just been created. The directory may or may not differ
|
||||
// or may not have just been created. The directory may or may not differ
|
||||
// between runs of the same process, but subsequent calls will return the
|
||||
// same directory.
|
||||
virtual Status GetTestDirectory(std::string* path) = 0;
|
||||
@ -144,17 +187,16 @@ class Env {
|
||||
|
||||
// Sleep/delay the thread for the prescribed number of micro-seconds.
|
||||
virtual void SleepForMicroseconds(int micros) = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
Env(const Env&);
|
||||
void operator=(const Env&);
|
||||
};
|
||||
|
||||
// A file abstraction for reading sequentially through a file
|
||||
class SequentialFile {
|
||||
class LEVELDB_EXPORT SequentialFile {
|
||||
public:
|
||||
SequentialFile() { }
|
||||
SequentialFile() = default;
|
||||
|
||||
SequentialFile(const SequentialFile&) = delete;
|
||||
SequentialFile& operator=(const SequentialFile&) = delete;
|
||||
|
||||
virtual ~SequentialFile();
|
||||
|
||||
// Read up to "n" bytes from the file. "scratch[0..n-1]" may be
|
||||
@ -175,17 +217,16 @@ class SequentialFile {
|
||||
//
|
||||
// REQUIRES: External synchronization
|
||||
virtual Status Skip(uint64_t n) = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
SequentialFile(const SequentialFile&);
|
||||
void operator=(const SequentialFile&);
|
||||
};
|
||||
|
||||
// A file abstraction for randomly reading the contents of a file.
|
||||
class RandomAccessFile {
|
||||
class LEVELDB_EXPORT RandomAccessFile {
|
||||
public:
|
||||
RandomAccessFile() { }
|
||||
RandomAccessFile() = default;
|
||||
|
||||
RandomAccessFile(const RandomAccessFile&) = delete;
|
||||
RandomAccessFile& operator=(const RandomAccessFile&) = delete;
|
||||
|
||||
virtual ~RandomAccessFile();
|
||||
|
||||
// Read up to "n" bytes from the file starting at "offset".
|
||||
@ -199,135 +240,148 @@ class RandomAccessFile {
|
||||
// Safe for concurrent use by multiple threads.
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
RandomAccessFile(const RandomAccessFile&);
|
||||
void operator=(const RandomAccessFile&);
|
||||
};
|
||||
|
||||
// A file abstraction for sequential writing. The implementation
|
||||
// must provide buffering since callers may append small fragments
|
||||
// at a time to the file.
|
||||
class WritableFile {
|
||||
class LEVELDB_EXPORT WritableFile {
|
||||
public:
|
||||
WritableFile() { }
|
||||
WritableFile() = default;
|
||||
|
||||
WritableFile(const WritableFile&) = delete;
|
||||
WritableFile& operator=(const WritableFile&) = delete;
|
||||
|
||||
virtual ~WritableFile();
|
||||
|
||||
virtual Status Append(const Slice& data) = 0;
|
||||
virtual Status Close() = 0;
|
||||
virtual Status Flush() = 0;
|
||||
virtual Status Sync() = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
WritableFile(const WritableFile&);
|
||||
void operator=(const WritableFile&);
|
||||
};
|
||||
|
||||
// An interface for writing log messages.
|
||||
class Logger {
|
||||
class LEVELDB_EXPORT Logger {
|
||||
public:
|
||||
Logger() { }
|
||||
Logger() = default;
|
||||
|
||||
Logger(const Logger&) = delete;
|
||||
Logger& operator=(const Logger&) = delete;
|
||||
|
||||
virtual ~Logger();
|
||||
|
||||
// Write an entry to the log file with the specified format.
|
||||
virtual void Logv(const char* format, va_list ap) = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
Logger(const Logger&);
|
||||
void operator=(const Logger&);
|
||||
};
|
||||
|
||||
|
||||
// Identifies a locked file.
|
||||
class FileLock {
|
||||
class LEVELDB_EXPORT FileLock {
|
||||
public:
|
||||
FileLock() { }
|
||||
FileLock() = default;
|
||||
|
||||
FileLock(const FileLock&) = delete;
|
||||
FileLock& operator=(const FileLock&) = delete;
|
||||
|
||||
virtual ~FileLock();
|
||||
private:
|
||||
// No copying allowed
|
||||
FileLock(const FileLock&);
|
||||
void operator=(const FileLock&);
|
||||
};
|
||||
|
||||
// Log the specified data to *info_log if info_log is non-NULL.
|
||||
extern void Log(Logger* info_log, const char* format, ...)
|
||||
# if defined(__GNUC__) || defined(__clang__)
|
||||
__attribute__((__format__ (__printf__, 2, 3)))
|
||||
# endif
|
||||
// Log the specified data to *info_log if info_log is non-null.
|
||||
void Log(Logger* info_log, const char* format, ...)
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
__attribute__((__format__(__printf__, 2, 3)))
|
||||
#endif
|
||||
;
|
||||
|
||||
// A utility routine: write "data" to the named file.
|
||||
extern Status WriteStringToFile(Env* env, const Slice& data,
|
||||
const std::string& fname);
|
||||
LEVELDB_EXPORT Status WriteStringToFile(Env* env, const Slice& data,
|
||||
const std::string& fname);
|
||||
|
||||
// A utility routine: read contents of named file into *data
|
||||
extern Status ReadFileToString(Env* env, const std::string& fname,
|
||||
std::string* data);
|
||||
LEVELDB_EXPORT Status ReadFileToString(Env* env, const std::string& fname,
|
||||
std::string* data);
|
||||
|
||||
// An implementation of Env that forwards all calls to another Env.
|
||||
// May be useful to clients who wish to override just part of the
|
||||
// functionality of another Env.
|
||||
class EnvWrapper : public Env {
|
||||
class LEVELDB_EXPORT EnvWrapper : public Env {
|
||||
public:
|
||||
// Initialize an EnvWrapper that delegates all calls to *t
|
||||
explicit EnvWrapper(Env* t) : target_(t) { }
|
||||
// Initialize an EnvWrapper that delegates all calls to *t.
|
||||
explicit EnvWrapper(Env* t) : target_(t) {}
|
||||
virtual ~EnvWrapper();
|
||||
|
||||
// Return the target to which this Env forwards all calls
|
||||
// Return the target to which this Env forwards all calls.
|
||||
Env* target() const { return target_; }
|
||||
|
||||
// The following text is boilerplate that forwards all methods to target()
|
||||
Status NewSequentialFile(const std::string& f, SequentialFile** r) {
|
||||
// The following text is boilerplate that forwards all methods to target().
|
||||
Status NewSequentialFile(const std::string& f, SequentialFile** r) override {
|
||||
return target_->NewSequentialFile(f, r);
|
||||
}
|
||||
Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
|
||||
Status NewRandomAccessFile(const std::string& f,
|
||||
RandomAccessFile** r) override {
|
||||
return target_->NewRandomAccessFile(f, r);
|
||||
}
|
||||
Status NewWritableFile(const std::string& f, WritableFile** r) {
|
||||
Status NewWritableFile(const std::string& f, WritableFile** r) override {
|
||||
return target_->NewWritableFile(f, r);
|
||||
}
|
||||
bool FileExists(const std::string& f) { return target_->FileExists(f); }
|
||||
Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
|
||||
Status NewAppendableFile(const std::string& f, WritableFile** r) override {
|
||||
return target_->NewAppendableFile(f, r);
|
||||
}
|
||||
bool FileExists(const std::string& f) override {
|
||||
return target_->FileExists(f);
|
||||
}
|
||||
Status GetChildren(const std::string& dir,
|
||||
std::vector<std::string>* r) override {
|
||||
return target_->GetChildren(dir, r);
|
||||
}
|
||||
Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); }
|
||||
Status CreateDir(const std::string& d) { return target_->CreateDir(d); }
|
||||
Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); }
|
||||
Status GetFileSize(const std::string& f, uint64_t* s) {
|
||||
Status DeleteFile(const std::string& f) override {
|
||||
return target_->DeleteFile(f);
|
||||
}
|
||||
Status CreateDir(const std::string& d) override {
|
||||
return target_->CreateDir(d);
|
||||
}
|
||||
Status DeleteDir(const std::string& d) override {
|
||||
return target_->DeleteDir(d);
|
||||
}
|
||||
Status GetFileSize(const std::string& f, uint64_t* s) override {
|
||||
return target_->GetFileSize(f, s);
|
||||
}
|
||||
Status RenameFile(const std::string& s, const std::string& t) {
|
||||
Status RenameFile(const std::string& s, const std::string& t) override {
|
||||
return target_->RenameFile(s, t);
|
||||
}
|
||||
Status LockFile(const std::string& f, FileLock** l) {
|
||||
Status LockFile(const std::string& f, FileLock** l) override {
|
||||
return target_->LockFile(f, l);
|
||||
}
|
||||
Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); }
|
||||
void Schedule(void (*f)(void*), void* a) {
|
||||
Status UnlockFile(FileLock* l) override { return target_->UnlockFile(l); }
|
||||
void Schedule(void (*f)(void*), void* a) override {
|
||||
return target_->Schedule(f, a);
|
||||
}
|
||||
void StartThread(void (*f)(void*), void* a) {
|
||||
void StartThread(void (*f)(void*), void* a) override {
|
||||
return target_->StartThread(f, a);
|
||||
}
|
||||
virtual Status GetTestDirectory(std::string* path) {
|
||||
Status GetTestDirectory(std::string* path) override {
|
||||
return target_->GetTestDirectory(path);
|
||||
}
|
||||
virtual Status NewLogger(const std::string& fname, Logger** result) {
|
||||
Status NewLogger(const std::string& fname, Logger** result) override {
|
||||
return target_->NewLogger(fname, result);
|
||||
}
|
||||
uint64_t NowMicros() {
|
||||
return target_->NowMicros();
|
||||
}
|
||||
void SleepForMicroseconds(int micros) {
|
||||
uint64_t NowMicros() override { return target_->NowMicros(); }
|
||||
void SleepForMicroseconds(int micros) override {
|
||||
target_->SleepForMicroseconds(micros);
|
||||
}
|
||||
|
||||
private:
|
||||
Env* target_;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
// Redefine DeleteFile if necessary.
|
||||
#if defined(_WIN32) && defined(LEVELDB_DELETEFILE_UNDEFINED)
|
||||
#if defined(UNICODE)
|
||||
#define DeleteFile DeleteFileW
|
||||
#else
|
||||
#define DeleteFile DeleteFileA
|
||||
#endif // defined(UNICODE)
|
||||
#endif // defined(_WIN32) && defined(LEVELDB_DELETEFILE_UNDEFINED)
|
||||
|
||||
#endif // STORAGE_LEVELDB_INCLUDE_ENV_H_
|
||||
|
33
include/leveldb/export.h
Normal file
33
include/leveldb/export.h
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright (c) 2017 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_INCLUDE_EXPORT_H_
|
||||
#define STORAGE_LEVELDB_INCLUDE_EXPORT_H_
|
||||
|
||||
#if !defined(LEVELDB_EXPORT)
|
||||
|
||||
#if defined(LEVELDB_SHARED_LIBRARY)
|
||||
#if defined(_WIN32)
|
||||
|
||||
#if defined(LEVELDB_COMPILE_LIBRARY)
|
||||
#define LEVELDB_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define LEVELDB_EXPORT __declspec(dllimport)
|
||||
#endif // defined(LEVELDB_COMPILE_LIBRARY)
|
||||
|
||||
#else // defined(_WIN32)
|
||||
#if defined(LEVELDB_COMPILE_LIBRARY)
|
||||
#define LEVELDB_EXPORT __attribute__((visibility("default")))
|
||||
#else
|
||||
#define LEVELDB_EXPORT
|
||||
#endif
|
||||
#endif // defined(_WIN32)
|
||||
|
||||
#else // defined(LEVELDB_SHARED_LIBRARY)
|
||||
#define LEVELDB_EXPORT
|
||||
#endif
|
||||
|
||||
#endif // !defined(LEVELDB_EXPORT)
|
||||
|
||||
#endif // STORAGE_LEVELDB_INCLUDE_EXPORT_H_
|
@ -18,11 +18,13 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Slice;
|
||||
|
||||
class FilterPolicy {
|
||||
class LEVELDB_EXPORT FilterPolicy {
|
||||
public:
|
||||
virtual ~FilterPolicy();
|
||||
|
||||
@ -38,8 +40,8 @@ class FilterPolicy {
|
||||
//
|
||||
// Warning: do not change the initial contents of *dst. Instead,
|
||||
// append the newly constructed filter to *dst.
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
|
||||
const = 0;
|
||||
virtual void CreateFilter(const Slice* keys, int n,
|
||||
std::string* dst) const = 0;
|
||||
|
||||
// "filter" contains the data appended by a preceding call to
|
||||
// CreateFilter() on this class. This method must return true if
|
||||
@ -63,8 +65,8 @@ class FilterPolicy {
|
||||
// ignores trailing spaces, it would be incorrect to use a
|
||||
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
|
||||
// trailing spaces in keys.
|
||||
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
|
||||
LEVELDB_EXPORT const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
|
||||
|
||||
}
|
||||
} // namespace leveldb
|
||||
|
||||
#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
||||
|
@ -15,14 +15,19 @@
|
||||
#ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
|
||||
#define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/slice.h"
|
||||
#include "leveldb/status.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Iterator {
|
||||
class LEVELDB_EXPORT Iterator {
|
||||
public:
|
||||
Iterator();
|
||||
|
||||
Iterator(const Iterator&) = delete;
|
||||
Iterator& operator=(const Iterator&) = delete;
|
||||
|
||||
virtual ~Iterator();
|
||||
|
||||
// An iterator is either positioned at a key/value pair, or
|
||||
@ -72,28 +77,35 @@ class Iterator {
|
||||
//
|
||||
// Note that unlike all of the preceding methods, this method is
|
||||
// not abstract and therefore clients should not override it.
|
||||
typedef void (*CleanupFunction)(void* arg1, void* arg2);
|
||||
using CleanupFunction = void (*)(void* arg1, void* arg2);
|
||||
void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
|
||||
|
||||
private:
|
||||
struct Cleanup {
|
||||
// Cleanup functions are stored in a single-linked list.
|
||||
// The list's head node is inlined in the iterator.
|
||||
struct CleanupNode {
|
||||
// True if the node is not used. Only head nodes might be unused.
|
||||
bool IsEmpty() const { return function == nullptr; }
|
||||
// Invokes the cleanup function.
|
||||
void Run() {
|
||||
assert(function != nullptr);
|
||||
(*function)(arg1, arg2);
|
||||
}
|
||||
|
||||
// The head node is used if the function pointer is not null.
|
||||
CleanupFunction function;
|
||||
void* arg1;
|
||||
void* arg2;
|
||||
Cleanup* next;
|
||||
CleanupNode* next;
|
||||
};
|
||||
Cleanup cleanup_;
|
||||
|
||||
// No copying allowed
|
||||
Iterator(const Iterator&);
|
||||
void operator=(const Iterator&);
|
||||
CleanupNode cleanup_head_;
|
||||
};
|
||||
|
||||
// Return an empty iterator (yields nothing).
|
||||
extern Iterator* NewEmptyIterator();
|
||||
LEVELDB_EXPORT Iterator* NewEmptyIterator();
|
||||
|
||||
// Return an empty iterator with the specified status.
|
||||
extern Iterator* NewErrorIterator(const Status& status);
|
||||
LEVELDB_EXPORT Iterator* NewErrorIterator(const Status& status);
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Cache;
|
||||
@ -23,12 +25,15 @@ class Snapshot;
|
||||
enum CompressionType {
|
||||
// NOTE: do not change the values of existing entries, as these are
|
||||
// part of the persistent format on disk.
|
||||
kNoCompression = 0x0,
|
||||
kNoCompression = 0x0,
|
||||
kSnappyCompression = 0x1
|
||||
};
|
||||
|
||||
// Options to control the behavior of a database (passed to DB::Open)
|
||||
struct Options {
|
||||
struct LEVELDB_EXPORT Options {
|
||||
// Create an Options object with default values for all fields.
|
||||
Options();
|
||||
|
||||
// -------------------
|
||||
// Parameters that affect behavior
|
||||
|
||||
@ -41,20 +46,17 @@ struct Options {
|
||||
const Comparator* comparator;
|
||||
|
||||
// If true, the database will be created if it is missing.
|
||||
// Default: false
|
||||
bool create_if_missing;
|
||||
bool create_if_missing = false;
|
||||
|
||||
// If true, an error is raised if the database already exists.
|
||||
// Default: false
|
||||
bool error_if_exists;
|
||||
bool error_if_exists = false;
|
||||
|
||||
// If true, the implementation will do aggressive checking of the
|
||||
// data it is processing and will stop early if it detects any
|
||||
// errors. This may have unforeseen ramifications: for example, a
|
||||
// corruption of one DB entry may cause a large number of entries to
|
||||
// become unreadable or for the entire DB to become unopenable.
|
||||
// Default: false
|
||||
bool paranoid_checks;
|
||||
bool paranoid_checks = false;
|
||||
|
||||
// Use the specified object to interact with the environment,
|
||||
// e.g. to read/write files, schedule background work, etc.
|
||||
@ -62,10 +64,9 @@ struct Options {
|
||||
Env* env;
|
||||
|
||||
// Any internal progress/error information generated by the db will
|
||||
// be written to info_log if it is non-NULL, or to a file stored
|
||||
// in the same directory as the DB contents if info_log is NULL.
|
||||
// Default: NULL
|
||||
Logger* info_log;
|
||||
// be written to info_log if it is non-null, or to a file stored
|
||||
// in the same directory as the DB contents if info_log is null.
|
||||
Logger* info_log = nullptr;
|
||||
|
||||
// -------------------
|
||||
// Parameters that affect performance
|
||||
@ -78,39 +79,40 @@ struct Options {
|
||||
// so you may wish to adjust this parameter to control memory usage.
|
||||
// Also, a larger write buffer will result in a longer recovery time
|
||||
// the next time the database is opened.
|
||||
//
|
||||
// Default: 4MB
|
||||
size_t write_buffer_size;
|
||||
size_t write_buffer_size = 4 * 1024 * 1024;
|
||||
|
||||
// Number of open files that can be used by the DB. You may need to
|
||||
// increase this if your database has a large working set (budget
|
||||
// one open file per 2MB of working set).
|
||||
//
|
||||
// Default: 1000
|
||||
int max_open_files;
|
||||
int max_open_files = 1000;
|
||||
|
||||
// Control over blocks (user data is stored in a set of blocks, and
|
||||
// a block is the unit of reading from disk).
|
||||
|
||||
// If non-NULL, use the specified cache for blocks.
|
||||
// If NULL, leveldb will automatically create and use an 8MB internal cache.
|
||||
// Default: NULL
|
||||
Cache* block_cache;
|
||||
// If non-null, use the specified cache for blocks.
|
||||
// If null, leveldb will automatically create and use an 8MB internal cache.
|
||||
Cache* block_cache = nullptr;
|
||||
|
||||
// Approximate size of user data packed per block. Note that the
|
||||
// block size specified here corresponds to uncompressed data. The
|
||||
// actual size of the unit read from disk may be smaller if
|
||||
// compression is enabled. This parameter can be changed dynamically.
|
||||
//
|
||||
// Default: 4K
|
||||
size_t block_size;
|
||||
size_t block_size = 4 * 1024;
|
||||
|
||||
// Number of keys between restart points for delta encoding of keys.
|
||||
// This parameter can be changed dynamically. Most clients should
|
||||
// leave this parameter alone.
|
||||
//
|
||||
// Default: 16
|
||||
int block_restart_interval;
|
||||
int block_restart_interval = 16;
|
||||
|
||||
// Leveldb will write up to this amount of bytes to a file before
|
||||
// switching to a new one.
|
||||
// Most clients should leave this parameter alone. However if your
|
||||
// filesystem is more efficient with larger files, you could
|
||||
// consider increasing the value. The downside will be longer
|
||||
// compactions and hence longer latency/performance hiccups.
|
||||
// Another reason to increase this parameter might be when you are
|
||||
// initially populating a large database.
|
||||
size_t max_file_size = 2 * 1024 * 1024;
|
||||
|
||||
// Compress blocks using the specified compression algorithm. This
|
||||
// parameter can be changed dynamically.
|
||||
@ -126,47 +128,43 @@ struct Options {
|
||||
// worth switching to kNoCompression. Even if the input data is
|
||||
// incompressible, the kSnappyCompression implementation will
|
||||
// efficiently detect that and will switch to uncompressed mode.
|
||||
CompressionType compression;
|
||||
CompressionType compression = kSnappyCompression;
|
||||
|
||||
// If non-NULL, use the specified filter policy to reduce disk reads.
|
||||
// EXPERIMENTAL: If true, append to existing MANIFEST and log files
|
||||
// when a database is opened. This can significantly speed up open.
|
||||
//
|
||||
// Default: currently false, but may become true later.
|
||||
bool reuse_logs = false;
|
||||
|
||||
// If non-null, use the specified filter policy to reduce disk reads.
|
||||
// Many applications will benefit from passing the result of
|
||||
// NewBloomFilterPolicy() here.
|
||||
//
|
||||
// Default: NULL
|
||||
const FilterPolicy* filter_policy;
|
||||
|
||||
// Create an Options object with default values for all fields.
|
||||
Options();
|
||||
const FilterPolicy* filter_policy = nullptr;
|
||||
};
|
||||
|
||||
// Options that control read operations
|
||||
struct ReadOptions {
|
||||
struct LEVELDB_EXPORT ReadOptions {
|
||||
ReadOptions() = default;
|
||||
|
||||
// If true, all data read from underlying storage will be
|
||||
// verified against corresponding checksums.
|
||||
// Default: false
|
||||
bool verify_checksums;
|
||||
bool verify_checksums = false;
|
||||
|
||||
// Should the data read for this iteration be cached in memory?
|
||||
// Callers may wish to set this field to false for bulk scans.
|
||||
// Default: true
|
||||
bool fill_cache;
|
||||
bool fill_cache = true;
|
||||
|
||||
// If "snapshot" is non-NULL, read as of the supplied snapshot
|
||||
// If "snapshot" is non-null, read as of the supplied snapshot
|
||||
// (which must belong to the DB that is being read and which must
|
||||
// not have been released). If "snapshot" is NULL, use an implicit
|
||||
// not have been released). If "snapshot" is null, use an implicit
|
||||
// snapshot of the state at the beginning of this read operation.
|
||||
// Default: NULL
|
||||
const Snapshot* snapshot;
|
||||
|
||||
ReadOptions()
|
||||
: verify_checksums(false),
|
||||
fill_cache(true),
|
||||
snapshot(NULL) {
|
||||
}
|
||||
const Snapshot* snapshot = nullptr;
|
||||
};
|
||||
|
||||
// Options that control write operations
|
||||
struct WriteOptions {
|
||||
struct LEVELDB_EXPORT WriteOptions {
|
||||
WriteOptions() = default;
|
||||
|
||||
// If true, the write will be flushed from the operating system
|
||||
// buffer cache (by calling WritableFile::Sync()) before the write
|
||||
// is considered complete. If this flag is true, writes will be
|
||||
@ -181,13 +179,7 @@ struct WriteOptions {
|
||||
// crash semantics as the "write()" system call. A DB write
|
||||
// with sync==true has similar crash semantics to a "write()"
|
||||
// system call followed by "fsync()".
|
||||
//
|
||||
// Default: false
|
||||
bool sync;
|
||||
|
||||
WriteOptions()
|
||||
: sync(false) {
|
||||
}
|
||||
bool sync = false;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -18,23 +18,30 @@
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Slice {
|
||||
class LEVELDB_EXPORT Slice {
|
||||
public:
|
||||
// Create an empty slice.
|
||||
Slice() : data_(""), size_(0) { }
|
||||
Slice() : data_(""), size_(0) {}
|
||||
|
||||
// Create a slice that refers to d[0,n-1].
|
||||
Slice(const char* d, size_t n) : data_(d), size_(n) { }
|
||||
Slice(const char* d, size_t n) : data_(d), size_(n) {}
|
||||
|
||||
// Create a slice that refers to the contents of "s"
|
||||
Slice(const std::string& s) : data_(s.data()), size_(s.size()) { }
|
||||
Slice(const std::string& s) : data_(s.data()), size_(s.size()) {}
|
||||
|
||||
// Create a slice that refers to s[0,strlen(s)-1]
|
||||
Slice(const char* s) : data_(s), size_(strlen(s)) { }
|
||||
Slice(const char* s) : data_(s), size_(strlen(s)) {}
|
||||
|
||||
// Intentionally copyable.
|
||||
Slice(const Slice&) = default;
|
||||
Slice& operator=(const Slice&) = default;
|
||||
|
||||
// Return a pointer to the beginning of the referenced data
|
||||
const char* data() const { return data_; }
|
||||
@ -53,7 +60,10 @@ class Slice {
|
||||
}
|
||||
|
||||
// Change this slice to refer to an empty array
|
||||
void clear() { data_ = ""; size_ = 0; }
|
||||
void clear() {
|
||||
data_ = "";
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
// Drop the first "n" bytes from this slice.
|
||||
void remove_prefix(size_t n) {
|
||||
@ -73,15 +83,12 @@ class Slice {
|
||||
|
||||
// Return true iff "x" is a prefix of "*this"
|
||||
bool starts_with(const Slice& x) const {
|
||||
return ((size_ >= x.size_) &&
|
||||
(memcmp(data_, x.data_, x.size_) == 0));
|
||||
return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0));
|
||||
}
|
||||
|
||||
private:
|
||||
const char* data_;
|
||||
size_t size_;
|
||||
|
||||
// Intentionally copyable
|
||||
};
|
||||
|
||||
inline bool operator==(const Slice& x, const Slice& y) {
|
||||
@ -89,21 +96,20 @@ inline bool operator==(const Slice& x, const Slice& y) {
|
||||
(memcmp(x.data(), y.data(), x.size()) == 0));
|
||||
}
|
||||
|
||||
inline bool operator!=(const Slice& x, const Slice& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); }
|
||||
|
||||
inline int Slice::compare(const Slice& b) const {
|
||||
const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
|
||||
int r = memcmp(data_, b.data_, min_len);
|
||||
if (r == 0) {
|
||||
if (size_ < b.size_) r = -1;
|
||||
else if (size_ > b.size_) r = +1;
|
||||
if (size_ < b.size_)
|
||||
r = -1;
|
||||
else if (size_ > b.size_)
|
||||
r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
|
||||
#endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_
|
||||
|
@ -13,20 +13,25 @@
|
||||
#ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_
|
||||
#define STORAGE_LEVELDB_INCLUDE_STATUS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/slice.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Status {
|
||||
class LEVELDB_EXPORT Status {
|
||||
public:
|
||||
// Create a success status.
|
||||
Status() : state_(NULL) { }
|
||||
Status() noexcept : state_(nullptr) {}
|
||||
~Status() { delete[] state_; }
|
||||
|
||||
// Copy the specified status.
|
||||
Status(const Status& s);
|
||||
void operator=(const Status& s);
|
||||
Status(const Status& rhs);
|
||||
Status& operator=(const Status& rhs);
|
||||
|
||||
Status(Status&& rhs) noexcept : state_(rhs.state_) { rhs.state_ = nullptr; }
|
||||
Status& operator=(Status&& rhs) noexcept;
|
||||
|
||||
// Return a success status.
|
||||
static Status OK() { return Status(); }
|
||||
@ -49,7 +54,7 @@ class Status {
|
||||
}
|
||||
|
||||
// Returns true iff the status indicates success.
|
||||
bool ok() const { return (state_ == NULL); }
|
||||
bool ok() const { return (state_ == nullptr); }
|
||||
|
||||
// Returns true iff the status indicates a NotFound error.
|
||||
bool IsNotFound() const { return code() == kNotFound; }
|
||||
@ -60,18 +65,17 @@ class Status {
|
||||
// Returns true iff the status indicates an IOError.
|
||||
bool IsIOError() const { return code() == kIOError; }
|
||||
|
||||
// Returns true iff the status indicates a NotSupportedError.
|
||||
bool IsNotSupportedError() const { return code() == kNotSupported; }
|
||||
|
||||
// Returns true iff the status indicates an InvalidArgument.
|
||||
bool IsInvalidArgument() const { return code() == kInvalidArgument; }
|
||||
|
||||
// Return a string representation of this status suitable for printing.
|
||||
// Returns the string "OK" for success.
|
||||
std::string ToString() const;
|
||||
|
||||
private:
|
||||
// OK status has a NULL state_. Otherwise, state_ is a new[] array
|
||||
// of the following form:
|
||||
// state_[0..3] == length of message
|
||||
// state_[4] == code
|
||||
// state_[5..] == message
|
||||
const char* state_;
|
||||
|
||||
enum Code {
|
||||
kOk = 0,
|
||||
kNotFound = 1,
|
||||
@ -82,23 +86,35 @@ class Status {
|
||||
};
|
||||
|
||||
Code code() const {
|
||||
return (state_ == NULL) ? kOk : static_cast<Code>(state_[4]);
|
||||
return (state_ == nullptr) ? kOk : static_cast<Code>(state_[4]);
|
||||
}
|
||||
|
||||
Status(Code code, const Slice& msg, const Slice& msg2);
|
||||
static const char* CopyState(const char* s);
|
||||
|
||||
// OK status has a null state_. Otherwise, state_ is a new[] array
|
||||
// of the following form:
|
||||
// state_[0..3] == length of message
|
||||
// state_[4] == code
|
||||
// state_[5..] == message
|
||||
const char* state_;
|
||||
};
|
||||
|
||||
inline Status::Status(const Status& s) {
|
||||
state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
|
||||
inline Status::Status(const Status& rhs) {
|
||||
state_ = (rhs.state_ == nullptr) ? nullptr : CopyState(rhs.state_);
|
||||
}
|
||||
inline void Status::operator=(const Status& s) {
|
||||
// The following condition catches both aliasing (when this == &s),
|
||||
// and the common case where both s and *this are ok.
|
||||
if (state_ != s.state_) {
|
||||
inline Status& Status::operator=(const Status& rhs) {
|
||||
// The following condition catches both aliasing (when this == &rhs),
|
||||
// and the common case where both rhs and *this are ok.
|
||||
if (state_ != rhs.state_) {
|
||||
delete[] state_;
|
||||
state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
|
||||
state_ = (rhs.state_ == nullptr) ? nullptr : CopyState(rhs.state_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
inline Status& Status::operator=(Status&& rhs) noexcept {
|
||||
std::swap(state_, rhs.state_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -6,6 +6,8 @@
|
||||
#define STORAGE_LEVELDB_INCLUDE_TABLE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/iterator.h"
|
||||
|
||||
namespace leveldb {
|
||||
@ -21,7 +23,7 @@ class TableCache;
|
||||
// A Table is a sorted map from strings to strings. Tables are
|
||||
// immutable and persistent. A Table may be safely accessed from
|
||||
// multiple threads without external synchronization.
|
||||
class Table {
|
||||
class LEVELDB_EXPORT Table {
|
||||
public:
|
||||
// Attempt to open the table that is stored in bytes [0..file_size)
|
||||
// of "file", and read the metadata entries necessary to allow
|
||||
@ -30,15 +32,16 @@ class Table {
|
||||
// If successful, returns ok and sets "*table" to the newly opened
|
||||
// table. The client should delete "*table" when no longer needed.
|
||||
// If there was an error while initializing the table, sets "*table"
|
||||
// to NULL and returns a non-ok status. Does not take ownership of
|
||||
// to nullptr and returns a non-ok status. Does not take ownership of
|
||||
// "*source", but the client must ensure that "source" remains live
|
||||
// for the duration of the returned table's lifetime.
|
||||
//
|
||||
// *file must remain live while this Table is in use.
|
||||
static Status Open(const Options& options,
|
||||
RandomAccessFile* file,
|
||||
uint64_t file_size,
|
||||
Table** table);
|
||||
static Status Open(const Options& options, RandomAccessFile* file,
|
||||
uint64_t file_size, Table** table);
|
||||
|
||||
Table(const Table&) = delete;
|
||||
Table& operator=(const Table&) = delete;
|
||||
|
||||
~Table();
|
||||
|
||||
@ -56,28 +59,24 @@ class Table {
|
||||
uint64_t ApproximateOffsetOf(const Slice& key) const;
|
||||
|
||||
private:
|
||||
friend class TableCache;
|
||||
struct Rep;
|
||||
Rep* rep_;
|
||||
|
||||
explicit Table(Rep* rep) { rep_ = rep; }
|
||||
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&);
|
||||
|
||||
explicit Table(Rep* rep) : rep_(rep) {}
|
||||
|
||||
// Calls (*handle_result)(arg, ...) with the entry found after a call
|
||||
// to Seek(key). May not make such a call if filter policy says
|
||||
// that key is not present.
|
||||
friend class TableCache;
|
||||
Status InternalGet(
|
||||
const ReadOptions&, const Slice& key,
|
||||
void* arg,
|
||||
void (*handle_result)(void* arg, const Slice& k, const Slice& v));
|
||||
|
||||
Status InternalGet(const ReadOptions&, const Slice& key, void* arg,
|
||||
void (*handle_result)(void* arg, const Slice& k,
|
||||
const Slice& v));
|
||||
|
||||
void ReadMeta(const Footer& footer);
|
||||
void ReadFilter(const Slice& filter_handle_value);
|
||||
|
||||
// No copying allowed
|
||||
Table(const Table&);
|
||||
void operator=(const Table&);
|
||||
Rep* const rep_;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -14,6 +14,8 @@
|
||||
#define STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/options.h"
|
||||
#include "leveldb/status.h"
|
||||
|
||||
@ -23,13 +25,16 @@ class BlockBuilder;
|
||||
class BlockHandle;
|
||||
class WritableFile;
|
||||
|
||||
class TableBuilder {
|
||||
class LEVELDB_EXPORT TableBuilder {
|
||||
public:
|
||||
// Create a builder that will store the contents of the table it is
|
||||
// building in *file. Does not close the file. It is up to the
|
||||
// caller to close the file after calling Finish().
|
||||
TableBuilder(const Options& options, WritableFile* file);
|
||||
|
||||
TableBuilder(const TableBuilder&) = delete;
|
||||
TableBuilder& operator=(const TableBuilder&) = delete;
|
||||
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
~TableBuilder();
|
||||
|
||||
@ -81,10 +86,6 @@ class TableBuilder {
|
||||
|
||||
struct Rep;
|
||||
Rep* rep_;
|
||||
|
||||
// No copying allowed
|
||||
TableBuilder(const TableBuilder&);
|
||||
void operator=(const TableBuilder&);
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -22,15 +22,29 @@
|
||||
#define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "leveldb/export.h"
|
||||
#include "leveldb/status.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Slice;
|
||||
|
||||
class WriteBatch {
|
||||
class LEVELDB_EXPORT WriteBatch {
|
||||
public:
|
||||
class LEVELDB_EXPORT Handler {
|
||||
public:
|
||||
virtual ~Handler();
|
||||
virtual void Put(const Slice& key, const Slice& value) = 0;
|
||||
virtual void Delete(const Slice& key) = 0;
|
||||
};
|
||||
|
||||
WriteBatch();
|
||||
|
||||
// Intentionally copyable.
|
||||
WriteBatch(const WriteBatch&) = default;
|
||||
WriteBatch& operator=(const WriteBatch&) = default;
|
||||
|
||||
~WriteBatch();
|
||||
|
||||
// Store the mapping "key->value" in the database.
|
||||
@ -42,21 +56,26 @@ class WriteBatch {
|
||||
// Clear all updates buffered in this batch.
|
||||
void Clear();
|
||||
|
||||
// The size of the database changes caused by this batch.
|
||||
//
|
||||
// This number is tied to implementation details, and may change across
|
||||
// releases. It is intended for LevelDB usage metrics.
|
||||
size_t ApproximateSize() const;
|
||||
|
||||
// Copies the operations in "source" to this batch.
|
||||
//
|
||||
// This runs in O(source size) time. However, the constant factor is better
|
||||
// than calling Iterate() over the source batch with a Handler that replicates
|
||||
// the operations into this batch.
|
||||
void Append(const WriteBatch& source);
|
||||
|
||||
// Support for iterating over the contents of a batch.
|
||||
class Handler {
|
||||
public:
|
||||
virtual ~Handler();
|
||||
virtual void Put(const Slice& key, const Slice& value) = 0;
|
||||
virtual void Delete(const Slice& key) = 0;
|
||||
};
|
||||
Status Iterate(Handler* handler) const;
|
||||
|
||||
private:
|
||||
friend class WriteBatchInternal;
|
||||
|
||||
std::string rep_; // See comment in write_batch.cc for the format of rep_
|
||||
|
||||
// Intentionally copyable
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -3,9 +3,9 @@
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
// Test for issue 178: a manual compaction causes deleted data to reappear.
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
@ -21,11 +21,9 @@ std::string Key1(int i) {
|
||||
return buf;
|
||||
}
|
||||
|
||||
std::string Key2(int i) {
|
||||
return Key1(i) + "_xxx";
|
||||
}
|
||||
std::string Key2(int i) { return Key1(i) + "_xxx"; }
|
||||
|
||||
class Issue178 { };
|
||||
class Issue178 {};
|
||||
|
||||
TEST(Issue178, Test) {
|
||||
// Get rid of any state from an old run.
|
||||
@ -87,6 +85,4 @@ TEST(Issue178, Test) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
@ -11,14 +11,14 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Issue200 { };
|
||||
class Issue200 {};
|
||||
|
||||
TEST(Issue200, Test) {
|
||||
// Get rid of any state from an old run.
|
||||
std::string dbpath = test::TmpDir() + "/leveldb_issue200_test";
|
||||
DestroyDB(dbpath, Options());
|
||||
|
||||
DB *db;
|
||||
DB* db;
|
||||
Options options;
|
||||
options.create_if_missing = true;
|
||||
ASSERT_OK(DB::Open(options, dbpath, &db));
|
||||
@ -31,7 +31,7 @@ TEST(Issue200, Test) {
|
||||
ASSERT_OK(db->Put(write_options, "5", "f"));
|
||||
|
||||
ReadOptions read_options;
|
||||
Iterator *iter = db->NewIterator(read_options);
|
||||
Iterator* iter = db->NewIterator(read_options);
|
||||
|
||||
// Add an element that should not be reflected in the iterator.
|
||||
ASSERT_OK(db->Put(write_options, "25", "cd"));
|
||||
@ -54,6 +54,4 @@ TEST(Issue200, Test) {
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
||||
|
128
issues/issue320_test.cc
Normal file
128
issues/issue320_test.cc
Normal file
@ -0,0 +1,128 @@
|
||||
// Copyright (c) 2019 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/write_batch.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
namespace {
|
||||
|
||||
// Creates a random number in the range of [0, max).
|
||||
int GenerateRandomNumber(int max) { return std::rand() % max; }
|
||||
|
||||
std::string CreateRandomString(int32_t index) {
|
||||
static const size_t len = 1024;
|
||||
char bytes[len];
|
||||
size_t i = 0;
|
||||
while (i < 8) {
|
||||
bytes[i] = 'a' + ((index >> (4 * i)) & 0xf);
|
||||
++i;
|
||||
}
|
||||
while (i < sizeof(bytes)) {
|
||||
bytes[i] = 'a' + GenerateRandomNumber(26);
|
||||
++i;
|
||||
}
|
||||
return std::string(bytes, sizeof(bytes));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class Issue320 {};
|
||||
|
||||
TEST(Issue320, Test) {
|
||||
std::srand(0);
|
||||
|
||||
bool delete_before_put = false;
|
||||
bool keep_snapshots = true;
|
||||
|
||||
std::vector<std::unique_ptr<std::pair<std::string, std::string>>> test_map(
|
||||
10000);
|
||||
std::vector<Snapshot const*> snapshots(100, nullptr);
|
||||
|
||||
DB* db;
|
||||
Options options;
|
||||
options.create_if_missing = true;
|
||||
|
||||
std::string dbpath = test::TmpDir() + "/leveldb_issue320_test";
|
||||
ASSERT_OK(DB::Open(options, dbpath, &db));
|
||||
|
||||
uint32_t target_size = 10000;
|
||||
uint32_t num_items = 0;
|
||||
uint32_t count = 0;
|
||||
std::string key;
|
||||
std::string value, old_value;
|
||||
|
||||
WriteOptions writeOptions;
|
||||
ReadOptions readOptions;
|
||||
while (count < 200000) {
|
||||
if ((++count % 1000) == 0) {
|
||||
std::cout << "count: " << count << std::endl;
|
||||
}
|
||||
|
||||
int index = GenerateRandomNumber(test_map.size());
|
||||
WriteBatch batch;
|
||||
|
||||
if (test_map[index] == nullptr) {
|
||||
num_items++;
|
||||
test_map[index].reset(new std::pair<std::string, std::string>(
|
||||
CreateRandomString(index), CreateRandomString(index)));
|
||||
batch.Put(test_map[index]->first, test_map[index]->second);
|
||||
} else {
|
||||
ASSERT_OK(db->Get(readOptions, test_map[index]->first, &old_value));
|
||||
if (old_value != test_map[index]->second) {
|
||||
std::cout << "ERROR incorrect value returned by Get" << std::endl;
|
||||
std::cout << " count=" << count << std::endl;
|
||||
std::cout << " old value=" << old_value << std::endl;
|
||||
std::cout << " test_map[index]->second=" << test_map[index]->second
|
||||
<< std::endl;
|
||||
std::cout << " test_map[index]->first=" << test_map[index]->first
|
||||
<< std::endl;
|
||||
std::cout << " index=" << index << std::endl;
|
||||
ASSERT_EQ(old_value, test_map[index]->second);
|
||||
}
|
||||
|
||||
if (num_items >= target_size && GenerateRandomNumber(100) > 30) {
|
||||
batch.Delete(test_map[index]->first);
|
||||
test_map[index] = nullptr;
|
||||
--num_items;
|
||||
} else {
|
||||
test_map[index]->second = CreateRandomString(index);
|
||||
if (delete_before_put) batch.Delete(test_map[index]->first);
|
||||
batch.Put(test_map[index]->first, test_map[index]->second);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_OK(db->Write(writeOptions, &batch));
|
||||
|
||||
if (keep_snapshots && GenerateRandomNumber(10) == 0) {
|
||||
int i = GenerateRandomNumber(snapshots.size());
|
||||
if (snapshots[i] != nullptr) {
|
||||
db->ReleaseSnapshot(snapshots[i]);
|
||||
}
|
||||
snapshots[i] = db->GetSnapshot();
|
||||
}
|
||||
}
|
||||
|
||||
for (Snapshot const* snapshot : snapshots) {
|
||||
if (snapshot) {
|
||||
db->ReleaseSnapshot(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
delete db;
|
||||
DestroyDB(dbpath, options);
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }
|
@ -5,6 +5,6 @@ Code in the rest of the package includes "port.h" from this directory.
|
||||
"port.h" in turn includes a platform specific "port_<platform>.h" file
|
||||
that provides the platform specific implementation.
|
||||
|
||||
See port_posix.h for an example of what must be provided in a platform
|
||||
See port_stdcxx.h for an example of what must be provided in a platform
|
||||
specific header file.
|
||||
|
@ -1,233 +0,0 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
// AtomicPointer provides storage for a lock-free pointer.
|
||||
// Platform-dependent implementation of AtomicPointer:
|
||||
// - If the platform provides a cheap barrier, we use it with raw pointers
|
||||
// - If <atomic> is present (on newer versions of gcc, it is), we use
|
||||
// a <atomic>-based AtomicPointer. However we prefer the memory
|
||||
// barrier based version, because at least on a gcc 4.4 32-bit build
|
||||
// on linux, we have encountered a buggy <atomic> implementation.
|
||||
// Also, some <atomic> implementations are much slower than a memory-barrier
|
||||
// based implementation (~16ns for <atomic> based acquire-load vs. ~1ns for
|
||||
// a barrier based acquire-load).
|
||||
// This code is based on atomicops-internals-* in Google's perftools:
|
||||
// http://code.google.com/p/google-perftools/source/browse/#svn%2Ftrunk%2Fsrc%2Fbase
|
||||
|
||||
#ifndef PORT_ATOMIC_POINTER_H_
|
||||
#define PORT_ATOMIC_POINTER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#ifdef LEVELDB_ATOMIC_PRESENT
|
||||
#include <atomic>
|
||||
#endif
|
||||
#ifdef OS_WIN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#ifdef OS_MACOSX
|
||||
#include <libkern/OSAtomic.h>
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define ARCH_CPU_X86_FAMILY 1
|
||||
#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
|
||||
#define ARCH_CPU_X86_FAMILY 1
|
||||
#elif defined(__ARMEL__)
|
||||
#define ARCH_CPU_ARM_FAMILY 1
|
||||
#elif defined(__aarch64__)
|
||||
#define ARCH_CPU_ARM64_FAMILY 1
|
||||
#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
|
||||
#define ARCH_CPU_PPC_FAMILY 1
|
||||
#endif
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
// Define MemoryBarrier() if available
|
||||
// Windows on x86
|
||||
#if defined(OS_WIN) && defined(COMPILER_MSVC) && defined(ARCH_CPU_X86_FAMILY)
|
||||
// windows.h already provides a MemoryBarrier(void) macro
|
||||
// http://msdn.microsoft.com/en-us/library/ms684208(v=vs.85).aspx
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
// Mac OS
|
||||
#elif defined(OS_MACOSX)
|
||||
inline void MemoryBarrier() {
|
||||
OSMemoryBarrier();
|
||||
}
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
// Gcc on x86
|
||||
#elif defined(ARCH_CPU_X86_FAMILY) && defined(__GNUC__)
|
||||
inline void MemoryBarrier() {
|
||||
// See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on
|
||||
// this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering.
|
||||
__asm__ __volatile__("" : : : "memory");
|
||||
}
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
// Sun Studio
|
||||
#elif defined(ARCH_CPU_X86_FAMILY) && defined(__SUNPRO_CC)
|
||||
inline void MemoryBarrier() {
|
||||
// See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on
|
||||
// this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering.
|
||||
asm volatile("" : : : "memory");
|
||||
}
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
// ARM Linux
|
||||
#elif defined(ARCH_CPU_ARM_FAMILY) && defined(__linux__)
|
||||
typedef void (*LinuxKernelMemoryBarrierFunc)(void);
|
||||
// The Linux ARM kernel provides a highly optimized device-specific memory
|
||||
// barrier function at a fixed memory address that is mapped in every
|
||||
// user-level process.
|
||||
//
|
||||
// This beats using CPU-specific instructions which are, on single-core
|
||||
// devices, un-necessary and very costly (e.g. ARMv7-A "dmb" takes more
|
||||
// than 180ns on a Cortex-A8 like the one on a Nexus One). Benchmarking
|
||||
// shows that the extra function call cost is completely negligible on
|
||||
// multi-core devices.
|
||||
//
|
||||
inline void MemoryBarrier() {
|
||||
(*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)();
|
||||
}
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
// ARM64
|
||||
#elif defined(ARCH_CPU_ARM64_FAMILY)
|
||||
inline void MemoryBarrier() {
|
||||
asm volatile("dmb sy" : : : "memory");
|
||||
}
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
// PPC
|
||||
#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
|
||||
inline void MemoryBarrier() {
|
||||
// TODO for some powerpc expert: is there a cheaper suitable variant?
|
||||
// Perhaps by having separate barriers for acquire and release ops.
|
||||
asm volatile("sync" : : : "memory");
|
||||
}
|
||||
#define LEVELDB_HAVE_MEMORY_BARRIER
|
||||
|
||||
#endif
|
||||
|
||||
// AtomicPointer built using platform-specific MemoryBarrier()
|
||||
#if defined(LEVELDB_HAVE_MEMORY_BARRIER)
|
||||
class AtomicPointer {
|
||||
private:
|
||||
void* rep_;
|
||||
public:
|
||||
AtomicPointer() { }
|
||||
explicit AtomicPointer(void* p) : rep_(p) {}
|
||||
inline void* NoBarrier_Load() const { return rep_; }
|
||||
inline void NoBarrier_Store(void* v) { rep_ = v; }
|
||||
inline void* Acquire_Load() const {
|
||||
void* result = rep_;
|
||||
MemoryBarrier();
|
||||
return result;
|
||||
}
|
||||
inline void Release_Store(void* v) {
|
||||
MemoryBarrier();
|
||||
rep_ = v;
|
||||
}
|
||||
};
|
||||
|
||||
// AtomicPointer based on <cstdatomic>
|
||||
#elif defined(LEVELDB_ATOMIC_PRESENT)
|
||||
class AtomicPointer {
|
||||
private:
|
||||
std::atomic<void*> rep_;
|
||||
public:
|
||||
AtomicPointer() { }
|
||||
explicit AtomicPointer(void* v) : rep_(v) { }
|
||||
inline void* Acquire_Load() const {
|
||||
return rep_.load(std::memory_order_acquire);
|
||||
}
|
||||
inline void Release_Store(void* v) {
|
||||
rep_.store(v, std::memory_order_release);
|
||||
}
|
||||
inline void* NoBarrier_Load() const {
|
||||
return rep_.load(std::memory_order_relaxed);
|
||||
}
|
||||
inline void NoBarrier_Store(void* v) {
|
||||
rep_.store(v, std::memory_order_relaxed);
|
||||
}
|
||||
};
|
||||
|
||||
// Atomic pointer based on sparc memory barriers
|
||||
#elif defined(__sparcv9) && defined(__GNUC__)
|
||||
class AtomicPointer {
|
||||
private:
|
||||
void* rep_;
|
||||
public:
|
||||
AtomicPointer() { }
|
||||
explicit AtomicPointer(void* v) : rep_(v) { }
|
||||
inline void* Acquire_Load() const {
|
||||
void* val;
|
||||
__asm__ __volatile__ (
|
||||
"ldx [%[rep_]], %[val] \n\t"
|
||||
"membar #LoadLoad|#LoadStore \n\t"
|
||||
: [val] "=r" (val)
|
||||
: [rep_] "r" (&rep_)
|
||||
: "memory");
|
||||
return val;
|
||||
}
|
||||
inline void Release_Store(void* v) {
|
||||
__asm__ __volatile__ (
|
||||
"membar #LoadStore|#StoreStore \n\t"
|
||||
"stx %[v], [%[rep_]] \n\t"
|
||||
:
|
||||
: [rep_] "r" (&rep_), [v] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
inline void* NoBarrier_Load() const { return rep_; }
|
||||
inline void NoBarrier_Store(void* v) { rep_ = v; }
|
||||
};
|
||||
|
||||
// Atomic pointer based on ia64 acq/rel
|
||||
#elif defined(__ia64) && defined(__GNUC__)
|
||||
class AtomicPointer {
|
||||
private:
|
||||
void* rep_;
|
||||
public:
|
||||
AtomicPointer() { }
|
||||
explicit AtomicPointer(void* v) : rep_(v) { }
|
||||
inline void* Acquire_Load() const {
|
||||
void* val ;
|
||||
__asm__ __volatile__ (
|
||||
"ld8.acq %[val] = [%[rep_]] \n\t"
|
||||
: [val] "=r" (val)
|
||||
: [rep_] "r" (&rep_)
|
||||
: "memory"
|
||||
);
|
||||
return val;
|
||||
}
|
||||
inline void Release_Store(void* v) {
|
||||
__asm__ __volatile__ (
|
||||
"st8.rel [%[rep_]] = %[v] \n\t"
|
||||
:
|
||||
: [rep_] "r" (&rep_), [v] "r" (v)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
inline void* NoBarrier_Load() const { return rep_; }
|
||||
inline void NoBarrier_Store(void* v) { rep_ = v; }
|
||||
};
|
||||
|
||||
// We have neither MemoryBarrier(), nor <atomic>
|
||||
#else
|
||||
#error Please implement AtomicPointer for this platform.
|
||||
|
||||
#endif
|
||||
|
||||
#undef LEVELDB_HAVE_MEMORY_BARRIER
|
||||
#undef ARCH_CPU_X86_FAMILY
|
||||
#undef ARCH_CPU_ARM_FAMILY
|
||||
#undef ARCH_CPU_ARM64_FAMILY
|
||||
#undef ARCH_CPU_PPC_FAMILY
|
||||
|
||||
} // namespace port
|
||||
} // namespace leveldb
|
||||
|
||||
#endif // PORT_ATOMIC_POINTER_H_
|
@ -10,10 +10,10 @@
|
||||
// Include the appropriate platform specific file below. If you are
|
||||
// porting to a new platform, see "port_example.h" for documentation
|
||||
// of what the new port_<platform>.h file must provide.
|
||||
#if defined(LEVELDB_PLATFORM_POSIX)
|
||||
# include "port/port_posix.h"
|
||||
#if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_WINDOWS)
|
||||
#include "port/port_stdcxx.h"
|
||||
#elif defined(LEVELDB_PLATFORM_CHROMIUM)
|
||||
# include "port/port_chromium.h"
|
||||
#include "port/port_chromium.h"
|
||||
#endif
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_PORT_H_
|
||||
|
39
port/port_config.h.in
Normal file
39
port/port_config.h.in
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright 2017 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_PORT_PORT_CONFIG_H_
|
||||
#define STORAGE_LEVELDB_PORT_PORT_CONFIG_H_
|
||||
|
||||
// Define to 1 if you have a definition for fdatasync() in <unistd.h>.
|
||||
#if !defined(HAVE_FDATASYNC)
|
||||
#cmakedefine01 HAVE_FDATASYNC
|
||||
#endif // !defined(HAVE_FDATASYNC)
|
||||
|
||||
// Define to 1 if you have a definition for F_FULLFSYNC in <fcntl.h>.
|
||||
#if !defined(HAVE_FULLFSYNC)
|
||||
#cmakedefine01 HAVE_FULLFSYNC
|
||||
#endif // !defined(HAVE_FULLFSYNC)
|
||||
|
||||
// Define to 1 if you have a definition for O_CLOEXEC in <fcntl.h>.
|
||||
#if !defined(HAVE_O_CLOEXEC)
|
||||
#cmakedefine01 HAVE_O_CLOEXEC
|
||||
#endif // !defined(HAVE_O_CLOEXEC)
|
||||
|
||||
// Define to 1 if you have Google CRC32C.
|
||||
#if !defined(HAVE_CRC32C)
|
||||
#cmakedefine01 HAVE_CRC32C
|
||||
#endif // !defined(HAVE_CRC32C)
|
||||
|
||||
// Define to 1 if you have Google Snappy.
|
||||
#if !defined(HAVE_SNAPPY)
|
||||
#cmakedefine01 HAVE_SNAPPY
|
||||
#endif // !defined(HAVE_SNAPPY)
|
||||
|
||||
// Define to 1 if your processor stores words with the most significant byte
|
||||
// first (like Motorola and SPARC, unlike Intel and VAX).
|
||||
#if !defined(LEVELDB_IS_BIG_ENDIAN)
|
||||
#cmakedefine01 LEVELDB_IS_BIG_ENDIAN
|
||||
#endif // !defined(LEVELDB_IS_BIG_ENDIAN)
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_PORT_CONFIG_H_
|
@ -10,6 +10,8 @@
|
||||
#ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_
|
||||
#define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_
|
||||
|
||||
#include "port/thread_annotations.h"
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
@ -23,23 +25,23 @@ static const bool kLittleEndian = true /* or some other expression */;
|
||||
// ------------------ Threading -------------------
|
||||
|
||||
// A Mutex represents an exclusive lock.
|
||||
class Mutex {
|
||||
class LOCKABLE Mutex {
|
||||
public:
|
||||
Mutex();
|
||||
~Mutex();
|
||||
|
||||
// Lock the mutex. Waits until other lockers have exited.
|
||||
// Will deadlock if the mutex is already locked by this thread.
|
||||
void Lock();
|
||||
void Lock() EXCLUSIVE_LOCK_FUNCTION();
|
||||
|
||||
// Unlock the mutex.
|
||||
// REQUIRES: This mutex was locked by this thread.
|
||||
void Unlock();
|
||||
void Unlock() UNLOCK_FUNCTION();
|
||||
|
||||
// Optionally crash if this thread does not hold this mutex.
|
||||
// The implementation must be fast, especially if NDEBUG is
|
||||
// defined. The implementation is allowed to skip all checks.
|
||||
void AssertHeld();
|
||||
void AssertHeld() ASSERT_EXCLUSIVE_LOCK();
|
||||
};
|
||||
|
||||
class CondVar {
|
||||
@ -60,57 +62,18 @@ class CondVar {
|
||||
void SignallAll();
|
||||
};
|
||||
|
||||
// Thread-safe initialization.
|
||||
// Used as follows:
|
||||
// static port::OnceType init_control = LEVELDB_ONCE_INIT;
|
||||
// static void Initializer() { ... do something ...; }
|
||||
// ...
|
||||
// port::InitOnce(&init_control, &Initializer);
|
||||
typedef intptr_t OnceType;
|
||||
#define LEVELDB_ONCE_INIT 0
|
||||
extern void InitOnce(port::OnceType*, void (*initializer)());
|
||||
|
||||
// A type that holds a pointer that can be read or written atomically
|
||||
// (i.e., without word-tearing.)
|
||||
class AtomicPointer {
|
||||
private:
|
||||
intptr_t rep_;
|
||||
public:
|
||||
// Initialize to arbitrary value
|
||||
AtomicPointer();
|
||||
|
||||
// Initialize to hold v
|
||||
explicit AtomicPointer(void* v) : rep_(v) { }
|
||||
|
||||
// Read and return the stored pointer with the guarantee that no
|
||||
// later memory access (read or write) by this thread can be
|
||||
// reordered ahead of this read.
|
||||
void* Acquire_Load() const;
|
||||
|
||||
// Set v as the stored pointer with the guarantee that no earlier
|
||||
// memory access (read or write) by this thread can be reordered
|
||||
// after this store.
|
||||
void Release_Store(void* v);
|
||||
|
||||
// Read the stored pointer with no ordering guarantees.
|
||||
void* NoBarrier_Load() const;
|
||||
|
||||
// Set va as the stored pointer with no ordering guarantees.
|
||||
void NoBarrier_Store(void* v);
|
||||
};
|
||||
|
||||
// ------------------ Compression -------------------
|
||||
|
||||
// Store the snappy compression of "input[0,input_length-1]" in *output.
|
||||
// Returns false if snappy is not supported by this port.
|
||||
extern bool Snappy_Compress(const char* input, size_t input_length,
|
||||
std::string* output);
|
||||
bool Snappy_Compress(const char* input, size_t input_length,
|
||||
std::string* output);
|
||||
|
||||
// If input[0,input_length-1] looks like a valid snappy compressed
|
||||
// buffer, store the size of the uncompressed data in *result and
|
||||
// return true. Else return false.
|
||||
extern bool Snappy_GetUncompressedLength(const char* input, size_t length,
|
||||
size_t* result);
|
||||
bool Snappy_GetUncompressedLength(const char* input, size_t length,
|
||||
size_t* result);
|
||||
|
||||
// Attempt to snappy uncompress input[0,input_length-1] into *output.
|
||||
// Returns true if successful, false if the input is invalid lightweight
|
||||
@ -119,15 +82,21 @@ extern bool Snappy_GetUncompressedLength(const char* input, size_t length,
|
||||
// REQUIRES: at least the first "n" bytes of output[] must be writable
|
||||
// where "n" is the result of a successful call to
|
||||
// Snappy_GetUncompressedLength.
|
||||
extern bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
||||
char* output);
|
||||
bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
||||
char* output);
|
||||
|
||||
// ------------------ Miscellaneous -------------------
|
||||
|
||||
// If heap profiling is not supported, returns false.
|
||||
// Else repeatedly calls (*func)(arg, data, n) and then returns true.
|
||||
// The concatenation of all "data[0,n-1]" fragments is the heap profile.
|
||||
extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg);
|
||||
bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg);
|
||||
|
||||
// Extend the CRC to include the first n bytes of buf.
|
||||
//
|
||||
// Returns zero if the CRC cannot be extended using acceleration, else returns
|
||||
// the newly extended CRC value (which may also be zero).
|
||||
uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size);
|
||||
|
||||
} // namespace port
|
||||
} // namespace leveldb
|
||||
|
@ -1,54 +0,0 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "port/port_posix.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "util/logging.h"
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
static void PthreadCall(const char* label, int result) {
|
||||
if (result != 0) {
|
||||
fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); }
|
||||
|
||||
Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); }
|
||||
|
||||
void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); }
|
||||
|
||||
void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); }
|
||||
|
||||
CondVar::CondVar(Mutex* mu)
|
||||
: mu_(mu) {
|
||||
PthreadCall("init cv", pthread_cond_init(&cv_, NULL));
|
||||
}
|
||||
|
||||
CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); }
|
||||
|
||||
void CondVar::Wait() {
|
||||
PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_));
|
||||
}
|
||||
|
||||
void CondVar::Signal() {
|
||||
PthreadCall("signal", pthread_cond_signal(&cv_));
|
||||
}
|
||||
|
||||
void CondVar::SignalAll() {
|
||||
PthreadCall("broadcast", pthread_cond_broadcast(&cv_));
|
||||
}
|
||||
|
||||
void InitOnce(OnceType* once, void (*initializer)()) {
|
||||
PthreadCall("once", pthread_once(once, initializer));
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace leveldb
|
@ -1,154 +0,0 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// See port_example.h for documentation for the following types/functions.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_
|
||||
#define STORAGE_LEVELDB_PORT_PORT_POSIX_H_
|
||||
|
||||
#undef PLATFORM_IS_LITTLE_ENDIAN
|
||||
#if defined(OS_MACOSX)
|
||||
#include <machine/endian.h>
|
||||
#if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER)
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN \
|
||||
(__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN)
|
||||
#endif
|
||||
#elif defined(OS_SOLARIS)
|
||||
#include <sys/isa_defs.h>
|
||||
#ifdef _LITTLE_ENDIAN
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN true
|
||||
#else
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN false
|
||||
#endif
|
||||
#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) ||\
|
||||
defined(OS_NETBSD) || defined(OS_DRAGONFLYBSD)
|
||||
#include <sys/types.h>
|
||||
#include <sys/endian.h>
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
|
||||
#elif defined(OS_HPUX)
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN false
|
||||
#elif defined(OS_ANDROID)
|
||||
// Due to a bug in the NDK x86 <sys/endian.h> definition,
|
||||
// _BYTE_ORDER must be used instead of __BYTE_ORDER on Android.
|
||||
// See http://code.google.com/p/android/issues/detail?id=39824
|
||||
#include <endian.h>
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
|
||||
#else
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include <pthread.h>
|
||||
#ifdef SNAPPY
|
||||
#include <snappy.h>
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include "port/atomic_pointer.h"
|
||||
|
||||
#ifndef PLATFORM_IS_LITTLE_ENDIAN
|
||||
#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN)
|
||||
#endif
|
||||
|
||||
#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\
|
||||
defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\
|
||||
defined(OS_ANDROID) || defined(OS_HPUX) || defined(CYGWIN)
|
||||
// Use fread/fwrite/fflush on platforms without _unlocked variants
|
||||
#define fread_unlocked fread
|
||||
#define fwrite_unlocked fwrite
|
||||
#define fflush_unlocked fflush
|
||||
#endif
|
||||
|
||||
#if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\
|
||||
defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD)
|
||||
// Use fsync() on platforms without fdatasync()
|
||||
#define fdatasync fsync
|
||||
#endif
|
||||
|
||||
#if defined(OS_ANDROID) && __ANDROID_API__ < 9
|
||||
// fdatasync() was only introduced in API level 9 on Android. Use fsync()
|
||||
// when targetting older platforms.
|
||||
#define fdatasync fsync
|
||||
#endif
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
static const bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN;
|
||||
#undef PLATFORM_IS_LITTLE_ENDIAN
|
||||
|
||||
class CondVar;
|
||||
|
||||
class Mutex {
|
||||
public:
|
||||
Mutex();
|
||||
~Mutex();
|
||||
|
||||
void Lock();
|
||||
void Unlock();
|
||||
void AssertHeld() { }
|
||||
|
||||
private:
|
||||
friend class CondVar;
|
||||
pthread_mutex_t mu_;
|
||||
|
||||
// No copying
|
||||
Mutex(const Mutex&);
|
||||
void operator=(const Mutex&);
|
||||
};
|
||||
|
||||
class CondVar {
|
||||
public:
|
||||
explicit CondVar(Mutex* mu);
|
||||
~CondVar();
|
||||
void Wait();
|
||||
void Signal();
|
||||
void SignalAll();
|
||||
private:
|
||||
pthread_cond_t cv_;
|
||||
Mutex* mu_;
|
||||
};
|
||||
|
||||
typedef pthread_once_t OnceType;
|
||||
#define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT
|
||||
extern void InitOnce(OnceType* once, void (*initializer)());
|
||||
|
||||
inline bool Snappy_Compress(const char* input, size_t length,
|
||||
::std::string* output) {
|
||||
#ifdef SNAPPY
|
||||
output->resize(snappy::MaxCompressedLength(length));
|
||||
size_t outlen;
|
||||
snappy::RawCompress(input, length, &(*output)[0], &outlen);
|
||||
output->resize(outlen);
|
||||
return true;
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
|
||||
size_t* result) {
|
||||
#ifdef SNAPPY
|
||||
return snappy::GetUncompressedLength(input, length, result);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool Snappy_Uncompress(const char* input, size_t length,
|
||||
char* output) {
|
||||
#ifdef SNAPPY
|
||||
return snappy::RawUncompress(input, length, output);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace leveldb
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_
|
153
port/port_stdcxx.h
Normal file
153
port/port_stdcxx.h
Normal file
@ -0,0 +1,153 @@
|
||||
// Copyright (c) 2018 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
|
||||
#define STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
|
||||
|
||||
// port/port_config.h availability is automatically detected via __has_include
|
||||
// in newer compilers. If LEVELDB_HAS_PORT_CONFIG_H is defined, it overrides the
|
||||
// configuration detection.
|
||||
#if defined(LEVELDB_HAS_PORT_CONFIG_H)
|
||||
|
||||
#if LEVELDB_HAS_PORT_CONFIG_H
|
||||
#include "port/port_config.h"
|
||||
#endif // LEVELDB_HAS_PORT_CONFIG_H
|
||||
|
||||
#elif defined(__has_include)
|
||||
|
||||
#if __has_include("port/port_config.h")
|
||||
#include "port/port_config.h"
|
||||
#endif // __has_include("port/port_config.h")
|
||||
|
||||
#endif // defined(LEVELDB_HAS_PORT_CONFIG_H)
|
||||
|
||||
#if HAVE_CRC32C
|
||||
#include <crc32c/crc32c.h>
|
||||
#endif // HAVE_CRC32C
|
||||
#if HAVE_SNAPPY
|
||||
#include <snappy.h>
|
||||
#endif // HAVE_SNAPPY
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable> // NOLINT
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <mutex> // NOLINT
|
||||
#include <string>
|
||||
|
||||
#include "port/thread_annotations.h"
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
static const bool kLittleEndian = !LEVELDB_IS_BIG_ENDIAN;
|
||||
|
||||
class CondVar;
|
||||
|
||||
// Thinly wraps std::mutex.
|
||||
class LOCKABLE Mutex {
|
||||
public:
|
||||
Mutex() = default;
|
||||
~Mutex() = default;
|
||||
|
||||
Mutex(const Mutex&) = delete;
|
||||
Mutex& operator=(const Mutex&) = delete;
|
||||
|
||||
void Lock() EXCLUSIVE_LOCK_FUNCTION() { mu_.lock(); }
|
||||
void Unlock() UNLOCK_FUNCTION() { mu_.unlock(); }
|
||||
void AssertHeld() ASSERT_EXCLUSIVE_LOCK() {}
|
||||
|
||||
private:
|
||||
friend class CondVar;
|
||||
std::mutex mu_;
|
||||
};
|
||||
|
||||
// Thinly wraps std::condition_variable.
|
||||
class CondVar {
|
||||
public:
|
||||
explicit CondVar(Mutex* mu) : mu_(mu) { assert(mu != nullptr); }
|
||||
~CondVar() = default;
|
||||
|
||||
CondVar(const CondVar&) = delete;
|
||||
CondVar& operator=(const CondVar&) = delete;
|
||||
|
||||
void Wait() {
|
||||
std::unique_lock<std::mutex> lock(mu_->mu_, std::adopt_lock);
|
||||
cv_.wait(lock);
|
||||
lock.release();
|
||||
}
|
||||
void Signal() { cv_.notify_one(); }
|
||||
void SignalAll() { cv_.notify_all(); }
|
||||
|
||||
private:
|
||||
std::condition_variable cv_;
|
||||
Mutex* const mu_;
|
||||
};
|
||||
|
||||
inline bool Snappy_Compress(const char* input, size_t length,
|
||||
std::string* output) {
|
||||
#if HAVE_SNAPPY
|
||||
output->resize(snappy::MaxCompressedLength(length));
|
||||
size_t outlen;
|
||||
snappy::RawCompress(input, length, &(*output)[0], &outlen);
|
||||
output->resize(outlen);
|
||||
return true;
|
||||
#else
|
||||
// Silence compiler warnings about unused arguments.
|
||||
(void)input;
|
||||
(void)length;
|
||||
(void)output;
|
||||
#endif // HAVE_SNAPPY
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
|
||||
size_t* result) {
|
||||
#if HAVE_SNAPPY
|
||||
return snappy::GetUncompressedLength(input, length, result);
|
||||
#else
|
||||
// Silence compiler warnings about unused arguments.
|
||||
(void)input;
|
||||
(void)length;
|
||||
(void)result;
|
||||
return false;
|
||||
#endif // HAVE_SNAPPY
|
||||
}
|
||||
|
||||
inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
|
||||
#if HAVE_SNAPPY
|
||||
return snappy::RawUncompress(input, length, output);
|
||||
#else
|
||||
// Silence compiler warnings about unused arguments.
|
||||
(void)input;
|
||||
(void)length;
|
||||
(void)output;
|
||||
return false;
|
||||
#endif // HAVE_SNAPPY
|
||||
}
|
||||
|
||||
inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
|
||||
// Silence compiler warnings about unused arguments.
|
||||
(void)func;
|
||||
(void)arg;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
|
||||
#if HAVE_CRC32C
|
||||
return ::crc32c::Extend(crc, reinterpret_cast<const uint8_t*>(buf), size);
|
||||
#else
|
||||
// Silence compiler warnings about unused arguments.
|
||||
(void)crc;
|
||||
(void)buf;
|
||||
(void)size;
|
||||
return 0;
|
||||
#endif // HAVE_CRC32C
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace leveldb
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_PORT_STDCXX_H_
|
@ -5,56 +5,104 @@
|
||||
#ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_
|
||||
#define STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_
|
||||
|
||||
// Some environments provide custom macros to aid in static thread-safety
|
||||
// analysis. Provide empty definitions of such macros unless they are already
|
||||
// defined.
|
||||
// Use Clang's thread safety analysis annotations when available. In other
|
||||
// environments, the macros receive empty definitions.
|
||||
// Usage documentation: https://clang.llvm.org/docs/ThreadSafetyAnalysis.html
|
||||
|
||||
#if !defined(THREAD_ANNOTATION_ATTRIBUTE__)
|
||||
|
||||
#if defined(__clang__)
|
||||
|
||||
#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
|
||||
#else
|
||||
#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
|
||||
#endif
|
||||
|
||||
#endif // !defined(THREAD_ANNOTATION_ATTRIBUTE__)
|
||||
|
||||
#ifndef GUARDED_BY
|
||||
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
|
||||
#endif
|
||||
|
||||
#ifndef PT_GUARDED_BY
|
||||
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
|
||||
#endif
|
||||
|
||||
#ifndef ACQUIRED_AFTER
|
||||
#define ACQUIRED_AFTER(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef ACQUIRED_BEFORE
|
||||
#define ACQUIRED_BEFORE(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef EXCLUSIVE_LOCKS_REQUIRED
|
||||
#define EXCLUSIVE_LOCKS_REQUIRED(...)
|
||||
#define EXCLUSIVE_LOCKS_REQUIRED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_LOCKS_REQUIRED
|
||||
#define SHARED_LOCKS_REQUIRED(...)
|
||||
#define SHARED_LOCKS_REQUIRED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef LOCKS_EXCLUDED
|
||||
#define LOCKS_EXCLUDED(...)
|
||||
#define LOCKS_EXCLUDED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef LOCK_RETURNED
|
||||
#define LOCK_RETURNED(x)
|
||||
#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
|
||||
#endif
|
||||
|
||||
#ifndef LOCKABLE
|
||||
#define LOCKABLE
|
||||
#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable)
|
||||
#endif
|
||||
|
||||
#ifndef SCOPED_LOCKABLE
|
||||
#define SCOPED_LOCKABLE
|
||||
#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
|
||||
#endif
|
||||
|
||||
#ifndef EXCLUSIVE_LOCK_FUNCTION
|
||||
#define EXCLUSIVE_LOCK_FUNCTION(...)
|
||||
#define EXCLUSIVE_LOCK_FUNCTION(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_LOCK_FUNCTION
|
||||
#define SHARED_LOCK_FUNCTION(...)
|
||||
#define SHARED_LOCK_FUNCTION(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef EXCLUSIVE_TRYLOCK_FUNCTION
|
||||
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
|
||||
#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_TRYLOCK_FUNCTION
|
||||
#define SHARED_TRYLOCK_FUNCTION(...)
|
||||
#define SHARED_TRYLOCK_FUNCTION(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef UNLOCK_FUNCTION
|
||||
#define UNLOCK_FUNCTION(...)
|
||||
#define UNLOCK_FUNCTION(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef NO_THREAD_SAFETY_ANALYSIS
|
||||
#define NO_THREAD_SAFETY_ANALYSIS
|
||||
#define NO_THREAD_SAFETY_ANALYSIS \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
|
||||
#endif
|
||||
|
||||
#ifndef ASSERT_EXCLUSIVE_LOCK
|
||||
#define ASSERT_EXCLUSIVE_LOCK(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(assert_exclusive_lock(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#ifndef ASSERT_SHARED_LOCK
|
||||
#define ASSERT_SHARED_LOCK(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_lock(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_
|
||||
|
@ -1,24 +0,0 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
// MSVC didn't ship with this file until the 2010 version.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_
|
||||
#define STORAGE_LEVELDB_PORT_WIN_STDINT_H_
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#error This file should only be included when compiling with MSVC.
|
||||
#endif
|
||||
|
||||
// Define C99 equivalent types.
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef signed int int32_t;
|
||||
typedef signed long long int64_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_WIN_STDINT_H_
|
@ -6,8 +6,9 @@
|
||||
|
||||
#include "table/block.h"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "leveldb/comparator.h"
|
||||
#include "table/format.h"
|
||||
#include "util/coding.h"
|
||||
@ -27,7 +28,7 @@ Block::Block(const BlockContents& contents)
|
||||
if (size_ < sizeof(uint32_t)) {
|
||||
size_ = 0; // Error marker
|
||||
} else {
|
||||
size_t max_restarts_allowed = (size_-sizeof(uint32_t)) / sizeof(uint32_t);
|
||||
size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t);
|
||||
if (NumRestarts() > max_restarts_allowed) {
|
||||
// The size is too small for NumRestarts()
|
||||
size_ = 0;
|
||||
@ -48,13 +49,12 @@ Block::~Block() {
|
||||
// and the length of the value in "*shared", "*non_shared", and
|
||||
// "*value_length", respectively. Will not dereference past "limit".
|
||||
//
|
||||
// If any errors are detected, returns NULL. Otherwise, returns a
|
||||
// If any errors are detected, returns nullptr. Otherwise, returns a
|
||||
// pointer to the key delta (just past the three decoded values).
|
||||
static inline const char* DecodeEntry(const char* p, const char* limit,
|
||||
uint32_t* shared,
|
||||
uint32_t* non_shared,
|
||||
uint32_t* shared, uint32_t* non_shared,
|
||||
uint32_t* value_length) {
|
||||
if (limit - p < 3) return NULL;
|
||||
if (limit - p < 3) return nullptr;
|
||||
*shared = reinterpret_cast<const unsigned char*>(p)[0];
|
||||
*non_shared = reinterpret_cast<const unsigned char*>(p)[1];
|
||||
*value_length = reinterpret_cast<const unsigned char*>(p)[2];
|
||||
@ -62,13 +62,13 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
|
||||
// Fast path: all three values are encoded in one byte each
|
||||
p += 3;
|
||||
} else {
|
||||
if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL;
|
||||
if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL;
|
||||
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
|
||||
if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr;
|
||||
if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr;
|
||||
if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr;
|
||||
}
|
||||
|
||||
if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
@ -76,9 +76,9 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
|
||||
class Block::Iter : public Iterator {
|
||||
private:
|
||||
const Comparator* const comparator_;
|
||||
const char* const data_; // underlying block contents
|
||||
uint32_t const restarts_; // Offset of restart array (list of fixed32)
|
||||
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
|
||||
const char* const data_; // underlying block contents
|
||||
uint32_t const restarts_; // Offset of restart array (list of fixed32)
|
||||
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
|
||||
|
||||
// current_ is offset in data_ of current entry. >= restarts_ if !Valid
|
||||
uint32_t current_;
|
||||
@ -112,9 +112,7 @@ class Block::Iter : public Iterator {
|
||||
}
|
||||
|
||||
public:
|
||||
Iter(const Comparator* comparator,
|
||||
const char* data,
|
||||
uint32_t restarts,
|
||||
Iter(const Comparator* comparator, const char* data, uint32_t restarts,
|
||||
uint32_t num_restarts)
|
||||
: comparator_(comparator),
|
||||
data_(data),
|
||||
@ -125,23 +123,23 @@ class Block::Iter : public Iterator {
|
||||
assert(num_restarts_ > 0);
|
||||
}
|
||||
|
||||
virtual bool Valid() const { return current_ < restarts_; }
|
||||
virtual Status status() const { return status_; }
|
||||
virtual Slice key() const {
|
||||
bool Valid() const override { return current_ < restarts_; }
|
||||
Status status() const override { return status_; }
|
||||
Slice key() const override {
|
||||
assert(Valid());
|
||||
return key_;
|
||||
}
|
||||
virtual Slice value() const {
|
||||
Slice value() const override {
|
||||
assert(Valid());
|
||||
return value_;
|
||||
}
|
||||
|
||||
virtual void Next() {
|
||||
void Next() override {
|
||||
assert(Valid());
|
||||
ParseNextKey();
|
||||
}
|
||||
|
||||
virtual void Prev() {
|
||||
void Prev() override {
|
||||
assert(Valid());
|
||||
|
||||
// Scan backwards to a restart point before current_
|
||||
@ -162,7 +160,7 @@ class Block::Iter : public Iterator {
|
||||
} while (ParseNextKey() && NextEntryOffset() < original);
|
||||
}
|
||||
|
||||
virtual void Seek(const Slice& target) {
|
||||
void Seek(const Slice& target) override {
|
||||
// Binary search in restart array to find the last restart point
|
||||
// with a key < target
|
||||
uint32_t left = 0;
|
||||
@ -171,10 +169,10 @@ class Block::Iter : public Iterator {
|
||||
uint32_t mid = (left + right + 1) / 2;
|
||||
uint32_t region_offset = GetRestartPoint(mid);
|
||||
uint32_t shared, non_shared, value_length;
|
||||
const char* key_ptr = DecodeEntry(data_ + region_offset,
|
||||
data_ + restarts_,
|
||||
&shared, &non_shared, &value_length);
|
||||
if (key_ptr == NULL || (shared != 0)) {
|
||||
const char* key_ptr =
|
||||
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
|
||||
&non_shared, &value_length);
|
||||
if (key_ptr == nullptr || (shared != 0)) {
|
||||
CorruptionError();
|
||||
return;
|
||||
}
|
||||
@ -202,12 +200,12 @@ class Block::Iter : public Iterator {
|
||||
}
|
||||
}
|
||||
|
||||
virtual void SeekToFirst() {
|
||||
void SeekToFirst() override {
|
||||
SeekToRestartPoint(0);
|
||||
ParseNextKey();
|
||||
}
|
||||
|
||||
virtual void SeekToLast() {
|
||||
void SeekToLast() override {
|
||||
SeekToRestartPoint(num_restarts_ - 1);
|
||||
while (ParseNextKey() && NextEntryOffset() < restarts_) {
|
||||
// Keep skipping
|
||||
@ -237,7 +235,7 @@ class Block::Iter : public Iterator {
|
||||
// Decode next entry
|
||||
uint32_t shared, non_shared, value_length;
|
||||
p = DecodeEntry(p, limit, &shared, &non_shared, &value_length);
|
||||
if (p == NULL || key_.size() < shared) {
|
||||
if (p == nullptr || key_.size() < shared) {
|
||||
CorruptionError();
|
||||
return false;
|
||||
} else {
|
||||
@ -253,7 +251,7 @@ class Block::Iter : public Iterator {
|
||||
}
|
||||
};
|
||||
|
||||
Iterator* Block::NewIterator(const Comparator* cmp) {
|
||||
Iterator* Block::NewIterator(const Comparator* comparator) {
|
||||
if (size_ < sizeof(uint32_t)) {
|
||||
return NewErrorIterator(Status::Corruption("bad block contents"));
|
||||
}
|
||||
@ -261,7 +259,7 @@ Iterator* Block::NewIterator(const Comparator* cmp) {
|
||||
if (num_restarts == 0) {
|
||||
return NewEmptyIterator();
|
||||
} else {
|
||||
return new Iter(cmp, data_, restart_offset_, num_restarts);
|
||||
return new Iter(comparator, data_, restart_offset_, num_restarts);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "leveldb/iterator.h"
|
||||
|
||||
namespace leveldb {
|
||||
@ -19,24 +20,23 @@ class Block {
|
||||
// Initialize the block with the specified contents.
|
||||
explicit Block(const BlockContents& contents);
|
||||
|
||||
Block(const Block&) = delete;
|
||||
Block& operator=(const Block&) = delete;
|
||||
|
||||
~Block();
|
||||
|
||||
size_t size() const { return size_; }
|
||||
Iterator* NewIterator(const Comparator* comparator);
|
||||
|
||||
private:
|
||||
class Iter;
|
||||
|
||||
uint32_t NumRestarts() const;
|
||||
|
||||
const char* data_;
|
||||
size_t size_;
|
||||
uint32_t restart_offset_; // Offset in data_ of restart array
|
||||
bool owned_; // Block owns data_[]
|
||||
|
||||
// No copying allowed
|
||||
Block(const Block&);
|
||||
void operator=(const Block&);
|
||||
|
||||
class Iter;
|
||||
uint32_t restart_offset_; // Offset in data_ of restart array
|
||||
bool owned_; // Block owns data_[]
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user