mirror of
https://github.com/google/leveldb.git
synced 2025-06-11 04:33:22 +08:00
Refactor secondary key handling: rename SKeyReturnVal to SecondaryKeyReturnVal, update related methods and tests, and adjust MemTable structure for clarity.
This commit is contained in:
parent
2262f3ca43
commit
29c05db3f4
2
.gitignore
vendored
2
.gitignore
vendored
@ -9,4 +9,4 @@ build/
|
||||
out/
|
||||
|
||||
# test db
|
||||
test_level_db_idx
|
||||
test_level_db_*
|
@ -76,7 +76,7 @@ void queryWithIndex(leveldb::DB* db, int targetAge, int numRecords,
|
||||
|
||||
auto startWithIndex = high_resolution_clock::now();
|
||||
|
||||
vector<leveldb::SKeyReturnVal> values;
|
||||
vector<leveldb::SecondaryKeyReturnVal> values;
|
||||
leveldb::Status s = db->Get(
|
||||
roptions, leveldb::Slice(std::to_string(targetAge)), &values, numRecords);
|
||||
|
||||
@ -146,7 +146,7 @@ void runComparison(leveldb::DB* db, int targetAge, int numRecords,
|
||||
std::cout << "PERFORMANCE COMPARISON\n";
|
||||
std::cout << "==========================================\n";
|
||||
|
||||
vector<leveldb::SKeyReturnVal> values;
|
||||
vector<leveldb::SecondaryKeyReturnVal> values;
|
||||
auto startWithIndex = high_resolution_clock::now();
|
||||
db->Get(roptions, leveldb::Slice(std::to_string(targetAge)), &values,
|
||||
numRecords);
|
||||
@ -234,7 +234,7 @@ int main(int argc, char* argv[]) {
|
||||
// Setup database
|
||||
leveldb::DB* db;
|
||||
leveldb::Options options;
|
||||
options.filter_policy = leveldb::NewBloomFilterPolicy(10);
|
||||
options.filter_policy = leveldb::NewBloomFilterPolicy(20);
|
||||
options.primary_key = "id";
|
||||
options.secondary_key = "age";
|
||||
options.create_if_missing = true;
|
||||
|
@ -1174,12 +1174,13 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& key,
|
||||
return s;
|
||||
}
|
||||
|
||||
static bool NewestFirst(const SKeyReturnVal& a, const SKeyReturnVal& b) {
|
||||
static bool NewestFirst(const SecondaryKeyReturnVal& a,
|
||||
const SecondaryKeyReturnVal& b) {
|
||||
return a.sequence_number < b.sequence_number ? false : true;
|
||||
}
|
||||
|
||||
Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
|
||||
std::vector<SKeyReturnVal>* acc, int top_k_outputs) {
|
||||
std::vector<SecondaryKeyReturnVal>* acc, int top_k_outputs) {
|
||||
Status s;
|
||||
MutexLock l(&mutex_);
|
||||
SequenceNumber snapshot;
|
||||
@ -1206,12 +1207,10 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
|
||||
LookupKey lkey(s_key, snapshot);
|
||||
|
||||
std::unordered_set<std::string> result_set;
|
||||
mem->Get(s_key, snapshot, acc, &s, this->options_.secondary_key,
|
||||
&result_set, top_k_outputs);
|
||||
mem->Get(s_key, snapshot, acc, &s, &result_set, top_k_outputs);
|
||||
|
||||
if (imm != nullptr && top_k_outputs - acc->size() > 0) {
|
||||
imm->Get(s_key, snapshot, acc, &s, this->options_.secondary_key,
|
||||
&result_set, top_k_outputs);
|
||||
imm->Get(s_key, snapshot, acc, &s, &result_set, top_k_outputs);
|
||||
}
|
||||
|
||||
if (top_k_outputs > (int)(acc->size())) {
|
||||
|
@ -48,7 +48,8 @@ class DBImpl : public DB {
|
||||
Status Get(const ReadOptions& options, const Slice& key,
|
||||
std::string* value) override;
|
||||
Status Get(const ReadOptions& options, const Slice& s_key,
|
||||
std::vector<SKeyReturnVal>* acc, int top_k_outputs) override;
|
||||
std::vector<SecondaryKeyReturnVal>* acc,
|
||||
int top_k_outputs) override;
|
||||
Iterator* NewIterator(const ReadOptions&) override;
|
||||
const Snapshot* GetSnapshot() override;
|
||||
void ReleaseSnapshot(const Snapshot* snapshot) override;
|
||||
|
@ -29,13 +29,13 @@ static Slice GetLengthPrefixedSlice(const char* data) {
|
||||
MemTable::MemTable(const InternalKeyComparator& comparator,
|
||||
std::string secondary_key)
|
||||
: comparator_(comparator), refs_(0), table_(comparator_, &arena_) {
|
||||
secAttribute = secondary_key;
|
||||
secondary_attribute_ = secondary_key;
|
||||
}
|
||||
|
||||
MemTable::~MemTable() {
|
||||
assert(refs_ == 0);
|
||||
for (SecMemTable::iterator it = secTable_.begin(); it != secTable_.end();
|
||||
it++) {
|
||||
for (SecMemTable::iterator it = secondary_table_.begin();
|
||||
it != secondary_table_.end(); it++) {
|
||||
std::pair<std::string, std::vector<std::string>*> pr = *it;
|
||||
|
||||
std::vector<std::string>* invertedList = pr.second;
|
||||
@ -131,21 +131,20 @@ void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
|
||||
if (type == kTypeDeletion) {
|
||||
return;
|
||||
}
|
||||
std::string secKey;
|
||||
Status st =
|
||||
ExtractKeyFromJSON(value.ToString().c_str(), secAttribute, &secKey);
|
||||
std::string extracted_secondary_key;
|
||||
Status st = ExtractKeyFromJSON(value.ToString().c_str(), secondary_attribute_,
|
||||
&extracted_secondary_key);
|
||||
if (!st.ok()) {
|
||||
return;
|
||||
}
|
||||
SecMemTable::const_iterator lookup = secTable_.find(secKey);
|
||||
if (lookup == secTable_.end()) {
|
||||
SecMemTable::const_iterator lookup =
|
||||
secondary_table_.find(extracted_secondary_key);
|
||||
if (lookup == secondary_table_.end()) {
|
||||
std::vector<std::string>* invertedList = new std::vector<std::string>();
|
||||
invertedList->push_back(key.ToString());
|
||||
|
||||
secTable_.insert(std::make_pair(secKey, invertedList));
|
||||
}
|
||||
|
||||
else {
|
||||
secondary_table_.insert(
|
||||
std::make_pair(extracted_secondary_key, invertedList));
|
||||
} else {
|
||||
std::pair<std::string, std::vector<std::string>*> pr = *lookup;
|
||||
pr.second->push_back(key.ToString());
|
||||
}
|
||||
@ -225,12 +224,11 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
}
|
||||
|
||||
void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
|
||||
std::vector<SKeyReturnVal>* acc, Status* s,
|
||||
std::string secondary_key,
|
||||
std::vector<SecondaryKeyReturnVal>* acc, Status* s,
|
||||
std::unordered_set<std::string>* result_set,
|
||||
int top_k_output) {
|
||||
auto lookup = secTable_.find(skey.ToString());
|
||||
if (lookup == secTable_.end()) {
|
||||
auto lookup = secondary_table_.find(skey.ToString());
|
||||
if (lookup == secondary_table_.end()) {
|
||||
return;
|
||||
}
|
||||
std::pair<std::string, std::vector<std::string>*> pr = *lookup;
|
||||
@ -246,11 +244,11 @@ void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
|
||||
if (!this->Get(lkey, &svalue, &s, &tag)) return;
|
||||
if (s.IsNotFound()) return;
|
||||
|
||||
Status st = ExtractKeyFromJSON(svalue, secAttribute, &secKeyVal);
|
||||
Status st = ExtractKeyFromJSON(svalue, secondary_attribute_, &secKeyVal);
|
||||
if (!st.ok()) return;
|
||||
if (comparator_.comparator.user_comparator()->Compare(secKeyVal, skey) ==
|
||||
0) {
|
||||
struct SKeyReturnVal newVal;
|
||||
struct SecondaryKeyReturnVal newVal;
|
||||
newVal.key = pr.second->at(i);
|
||||
std::string temp;
|
||||
|
||||
|
@ -72,8 +72,7 @@ class MemTable {
|
||||
// Get methods for Secondary Memtable
|
||||
bool Get(const LookupKey& key, std::string* value, Status* s, uint64_t* tag);
|
||||
void Get(const Slice& s_key, SequenceNumber snapshot,
|
||||
std::vector<SKeyReturnVal>* value, Status* s,
|
||||
std::string secondary_key,
|
||||
std::vector<SecondaryKeyReturnVal>* value, Status* s,
|
||||
std::unordered_set<std::string>* result_set, int top_k_value);
|
||||
|
||||
private:
|
||||
@ -97,8 +96,8 @@ class MemTable {
|
||||
|
||||
// SECONDARY MEMTABLE
|
||||
typedef btree::btree_map<std::string, std::vector<std::string>*> SecMemTable;
|
||||
SecMemTable secTable_;
|
||||
std::string secAttribute;
|
||||
SecMemTable secondary_table_;
|
||||
std::string secondary_attribute_;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
@ -266,7 +266,7 @@ struct SecSaver {
|
||||
SaverState state;
|
||||
const Comparator* ucmp;
|
||||
Slice user_key;
|
||||
std::vector<SKeyReturnVal>* acc;
|
||||
std::vector<SecondaryKeyReturnVal>* acc;
|
||||
std::unordered_set<std::string>* result_set;
|
||||
};
|
||||
} // namespace
|
||||
@ -302,7 +302,7 @@ static bool SecSaveValue(void* arg, const Slice& ikey, const Slice& v,
|
||||
if (s->ucmp->Compare(key, s->user_key) == 0) {
|
||||
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
|
||||
if (s->state == kFound) {
|
||||
struct SKeyReturnVal new_val;
|
||||
struct SecondaryKeyReturnVal new_val;
|
||||
Slice ukey = ExtractUserKey(ikey);
|
||||
if (s->result_set->find(ukey.ToString()) == s->result_set->end()) {
|
||||
new_val.key = ukey.ToString();
|
||||
@ -340,7 +340,8 @@ static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
||||
return a->number > b->number;
|
||||
}
|
||||
|
||||
static bool NewestFirstSequenceNumber(SKeyReturnVal a, SKeyReturnVal b) {
|
||||
static bool NewestFirstSequenceNumber(SecondaryKeyReturnVal a,
|
||||
SecondaryKeyReturnVal b) {
|
||||
return a.sequence_number > b.sequence_number;
|
||||
}
|
||||
|
||||
@ -466,7 +467,7 @@ Status Version::Get(const ReadOptions& options, const LookupKey& k,
|
||||
}
|
||||
|
||||
Status Version::Get(const ReadOptions& options, const LookupKey& k,
|
||||
std::vector<SKeyReturnVal>* acc, GetStats* stats,
|
||||
std::vector<SecondaryKeyReturnVal>* acc, GetStats* stats,
|
||||
std::string secondary_key, int top_k_output,
|
||||
std::unordered_set<std::string>* result_set, DBImpl* db) {
|
||||
Slice ikey = k.internal_key();
|
||||
|
@ -81,7 +81,7 @@ class Version {
|
||||
GetStats* stats);
|
||||
|
||||
Status Get(const ReadOptions&, const LookupKey& key,
|
||||
std::vector<SKeyReturnVal>* value, GetStats* stats,
|
||||
std::vector<SecondaryKeyReturnVal>* value, GetStats* stats,
|
||||
std::string secondary_key, int top_k_outputs,
|
||||
std::unordered_set<std::string>* result_set, DBImpl* db);
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "leveldb/db.h"
|
||||
#include "leveldb/filter_policy.h"
|
||||
#include "leveldb/options.h"
|
||||
#include "leveldb/status.h"
|
||||
|
||||
@ -19,29 +20,58 @@ int main(int argc, char* argv[]) {
|
||||
std::string db_path = argv[1];
|
||||
int port = (argc > 2) ? std::stoi(argv[2]) : 8080;
|
||||
|
||||
// Open the database
|
||||
leveldb::DB* db;
|
||||
leveldb::Options options;
|
||||
options.create_if_missing = true;
|
||||
options.primary_key = "id";
|
||||
options.secondary_key = "age";
|
||||
// Create two DB instances
|
||||
leveldb::DB* db_with_bloom;
|
||||
leveldb::DB* db_without_bloom;
|
||||
|
||||
leveldb::Status status = leveldb::DB::Open(options, db_path, &db);
|
||||
if (!status.ok()) {
|
||||
std::cerr << "Unable to open/create database: " << status.ToString()
|
||||
<< std::endl;
|
||||
// Configure options for DB with Bloom filter
|
||||
leveldb::Options options_with_bloom;
|
||||
options_with_bloom.filter_policy = leveldb::NewBloomFilterPolicy(20);
|
||||
options_with_bloom.create_if_missing = true;
|
||||
options_with_bloom.primary_key = "id";
|
||||
options_with_bloom.secondary_key = "age";
|
||||
|
||||
// Configure options for DB without Bloom filter
|
||||
leveldb::Options options_without_bloom;
|
||||
options_without_bloom.filter_policy = nullptr; // No Bloom filter
|
||||
options_without_bloom.create_if_missing = true;
|
||||
options_without_bloom.primary_key = "id";
|
||||
|
||||
// Open both databases
|
||||
leveldb::Status status_with_bloom = leveldb::DB::Open(
|
||||
options_with_bloom, db_path + "with_bloom", &db_with_bloom);
|
||||
if (!status_with_bloom.ok()) {
|
||||
std::cerr << "Unable to open/create database with Bloom filter: "
|
||||
<< status_with_bloom.ToString() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
leveldb::Status status_without_bloom = leveldb::DB::Open(
|
||||
options_without_bloom, db_path + "without_bloom", &db_without_bloom);
|
||||
if (!status_without_bloom.ok()) {
|
||||
std::cerr << "Unable to open/create database without Bloom filter: "
|
||||
<< status_without_bloom.ToString() << std::endl;
|
||||
delete db_with_bloom;
|
||||
return 1;
|
||||
}
|
||||
|
||||
crow::SimpleApp app;
|
||||
|
||||
// Primary key get - specify which DB to use with ?bloom=true/false parameter
|
||||
CROW_ROUTE(app, "/db/get/<string>")
|
||||
.methods("GET"_method)([db](const crow::request& req,
|
||||
std::string primary_key) {
|
||||
.methods(
|
||||
"GET"_method)([db_with_bloom, db_without_bloom](
|
||||
const crow::request& req, std::string primary_key) {
|
||||
// Determine which DB to use
|
||||
auto use_bloom = req.url_params.get("bloom");
|
||||
leveldb::DB* db_to_use =
|
||||
(use_bloom && std::string(use_bloom) == "false") ? db_without_bloom
|
||||
: db_with_bloom;
|
||||
|
||||
// Get value by primary key
|
||||
std::string value;
|
||||
leveldb::Status s =
|
||||
db->Get(leveldb::ReadOptions(), primary_key, &value);
|
||||
db_to_use->Get(leveldb::ReadOptions(), primary_key, &value);
|
||||
|
||||
if (s.ok()) {
|
||||
// Return the JSON value
|
||||
@ -53,15 +83,27 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
});
|
||||
|
||||
// Define route for secondary key operations
|
||||
// Secondary key query - with parameter to select DB
|
||||
CROW_ROUTE(app, "/db/query")
|
||||
.methods("GET"_method)([db](const crow::request& req) {
|
||||
.methods("GET"_method)([db_with_bloom,
|
||||
db_without_bloom](const crow::request& req) {
|
||||
// Get query parameters
|
||||
auto secondary_key = req.url_params.get("key");
|
||||
auto limit_param = req.url_params.get("limit");
|
||||
auto use_secondary = req.url_params.get("use_secondary");
|
||||
int limit = 10;
|
||||
|
||||
// Determine which DB to use
|
||||
|
||||
leveldb::DB* db_to_use = nullptr;
|
||||
if (use_secondary) {
|
||||
std::cout << "Using Db with bloom " << std::endl;
|
||||
db_to_use = db_with_bloom;
|
||||
} else {
|
||||
std::cout << "Using Db without bloom " << std::endl;
|
||||
db_to_use = db_without_bloom;
|
||||
}
|
||||
|
||||
int limit = 10;
|
||||
if (limit_param) {
|
||||
try {
|
||||
limit = std::stoi(limit_param);
|
||||
@ -73,7 +115,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
if (!use_secondary) {
|
||||
std::unique_ptr<leveldb::Iterator> it(
|
||||
db->NewIterator(leveldb::ReadOptions()));
|
||||
db_to_use->NewIterator(leveldb::ReadOptions()));
|
||||
std::vector<std::string> results;
|
||||
int target_key_value = std::stoi(secondary_key);
|
||||
|
||||
@ -118,10 +160,10 @@ int main(int argc, char* argv[]) {
|
||||
"{\"error\": \"Missing 'key' parameter\"}");
|
||||
}
|
||||
|
||||
std::vector<leveldb::SKeyReturnVal> results;
|
||||
std::vector<leveldb::SecondaryKeyReturnVal> results;
|
||||
leveldb::Status s =
|
||||
db->Get(leveldb::ReadOptions(), leveldb::Slice(secondary_key),
|
||||
&results, limit);
|
||||
db_to_use->Get(leveldb::ReadOptions(),
|
||||
leveldb::Slice(secondary_key), &results, limit);
|
||||
|
||||
if (s.ok()) {
|
||||
std::string json_results = "[";
|
||||
@ -138,26 +180,245 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
});
|
||||
|
||||
// Put to both DBs to keep them in sync
|
||||
CROW_ROUTE(app, "/db/put")
|
||||
.methods("POST"_method)([db](const crow::request& req) {
|
||||
.methods("POST"_method)([db_with_bloom,
|
||||
db_without_bloom](const crow::request& req) {
|
||||
auto body = req.body;
|
||||
if (body.empty()) {
|
||||
return crow::response(400, "{\"error\": \"Empty request body\"}");
|
||||
}
|
||||
|
||||
leveldb::Status s = db->Put(leveldb::WriteOptions(), body);
|
||||
if (s.ok()) {
|
||||
// Write to both databases
|
||||
leveldb::Status s1 = db_with_bloom->Put(leveldb::WriteOptions(), body);
|
||||
leveldb::Status s2 =
|
||||
db_without_bloom->Put(leveldb::WriteOptions(), body);
|
||||
|
||||
if (s1.ok() && s2.ok()) {
|
||||
return crow::response(200, "{\"status\": \"success\"}");
|
||||
} else {
|
||||
return crow::response(500, "{\"error\": \"" + s.ToString() + "\"}");
|
||||
std::string error = "";
|
||||
if (!s1.ok()) error += "With Bloom: " + s1.ToString();
|
||||
if (!s2.ok()) {
|
||||
if (!error.empty()) error += ", ";
|
||||
error += "Without Bloom: " + s2.ToString();
|
||||
}
|
||||
return crow::response(500, "{\"error\": \"" + error + "\"}");
|
||||
}
|
||||
});
|
||||
|
||||
// Add a new endpoint for stats/comparison
|
||||
CROW_ROUTE(app, "/db/stats")
|
||||
.methods("GET"_method)(
|
||||
[db_with_bloom, db_without_bloom](const crow::request& req) {
|
||||
// This would be expanded in a real implementation
|
||||
// to return performance statistics, etc.
|
||||
crow::json::wvalue stats;
|
||||
stats["bloom_filter_enabled"]["status"] = "active";
|
||||
stats["no_bloom_filter"]["status"] = "active";
|
||||
return crow::response(200, stats);
|
||||
});
|
||||
|
||||
// Bulk insertion endpoint
|
||||
CROW_ROUTE(app, "/db/bulk-insert")
|
||||
.methods("POST"_method)([db_with_bloom,
|
||||
db_without_bloom](const crow::request& req) {
|
||||
// Parse parameters from JSON body
|
||||
rapidjson::Document doc;
|
||||
if (doc.Parse(req.body.c_str()).HasParseError()) {
|
||||
return crow::response(400, "{\"error\": \"Invalid JSON body\"}");
|
||||
}
|
||||
|
||||
// Get number of records to insert
|
||||
if (!doc.HasMember("numRecords") || !doc["numRecords"].IsInt()) {
|
||||
return crow::response(
|
||||
400,
|
||||
"{\"error\": \"Missing or invalid 'numRecords' parameter\"}");
|
||||
}
|
||||
int numRecords = doc["numRecords"].GetInt();
|
||||
|
||||
// Get which DB to use (both, bloom only, or no-bloom only)
|
||||
bool useBloom = true;
|
||||
bool useNoBloom = true;
|
||||
if (doc.HasMember("useBloom") && doc["useBloom"].IsBool()) {
|
||||
useBloom = doc["useBloom"].GetBool();
|
||||
}
|
||||
if (doc.HasMember("useNoBloom") && doc["useNoBloom"].IsBool()) {
|
||||
useNoBloom = doc["useNoBloom"].GetBool();
|
||||
}
|
||||
|
||||
// Set up timing
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
int successCount = 0;
|
||||
int errorCount = 0;
|
||||
|
||||
// Set up write options
|
||||
leveldb::WriteOptions woptions;
|
||||
|
||||
// Insert data
|
||||
for (int i = 0; i < numRecords; ++i) {
|
||||
// Create sample JSON document
|
||||
std::stringstream ss;
|
||||
ss << "{\n \"id\": " << i << ",\n \"age\": " << (i % 50 + 10)
|
||||
<< ",\n \"name\": \"User" << i << "\"\n}";
|
||||
std::string json_string = ss.str();
|
||||
|
||||
// Insert into selected DBs
|
||||
bool success = true;
|
||||
if (useBloom) {
|
||||
leveldb::Status s = db_with_bloom->Put(woptions, json_string);
|
||||
if (!s.ok()) {
|
||||
errorCount++;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (useNoBloom) {
|
||||
leveldb::Status s = db_without_bloom->Put(woptions, json_string);
|
||||
if (!s.ok()) {
|
||||
errorCount++;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (success) {
|
||||
successCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate time taken
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
endTime - startTime);
|
||||
|
||||
// Build response
|
||||
crow::json::wvalue result;
|
||||
result["success"] = true;
|
||||
result["recordsRequested"] = numRecords;
|
||||
result["recordsInserted"] = successCount;
|
||||
result["recordsFailed"] = errorCount;
|
||||
result["timeMs"] = duration.count();
|
||||
result["usedBloomFilter"] = useBloom;
|
||||
result["usedNoBloomFilter"] = useNoBloom;
|
||||
|
||||
return crow::response(200, result);
|
||||
});
|
||||
|
||||
// Also add an endpoint to run performance comparison tests
|
||||
CROW_ROUTE(app, "/db/performance-test")
|
||||
.methods("POST"_method)([db_with_bloom,
|
||||
db_without_bloom](const crow::request& req) {
|
||||
// Parse parameters
|
||||
rapidjson::Document doc;
|
||||
if (doc.Parse(req.body.c_str()).HasParseError()) {
|
||||
return crow::response(400, "{\"error\": \"Invalid JSON body\"}");
|
||||
}
|
||||
|
||||
// Get target age to search for
|
||||
int targetAge = 30; // default
|
||||
if (doc.HasMember("targetAge") && doc["targetAge"].IsInt()) {
|
||||
targetAge = doc["targetAge"].GetInt();
|
||||
}
|
||||
|
||||
// Results object
|
||||
crow::json::wvalue results;
|
||||
|
||||
// Test with bloom filter
|
||||
{
|
||||
auto startWithBloom = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::vector<leveldb::SecondaryKeyReturnVal> values;
|
||||
leveldb::ReadOptions roptions;
|
||||
db_with_bloom->Get(roptions,
|
||||
leveldb::Slice(std::to_string(targetAge)), &values,
|
||||
1000);
|
||||
|
||||
auto endWithBloom = std::chrono::high_resolution_clock::now();
|
||||
auto bloomDuration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
endWithBloom - startWithBloom);
|
||||
|
||||
results["withBloomFilter"]["timeUs"] = bloomDuration.count();
|
||||
results["withBloomFilter"]["recordsFound"] = values.size();
|
||||
}
|
||||
|
||||
// Test without bloom filter
|
||||
{
|
||||
auto startNoBloom = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::vector<leveldb::SecondaryKeyReturnVal> values;
|
||||
leveldb::ReadOptions roptions;
|
||||
db_without_bloom->Get(roptions,
|
||||
leveldb::Slice(std::to_string(targetAge)),
|
||||
&values, 1000);
|
||||
|
||||
auto endNoBloom = std::chrono::high_resolution_clock::now();
|
||||
auto noBloomDuration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
endNoBloom - startNoBloom);
|
||||
|
||||
results["withoutBloomFilter"]["timeUs"] = noBloomDuration.count();
|
||||
results["withoutBloomFilter"]["recordsFound"] = values.size();
|
||||
}
|
||||
|
||||
// Full scan test on bloom filter DB (for comparison)
|
||||
{
|
||||
auto startScan = std::chrono::high_resolution_clock::now();
|
||||
|
||||
leveldb::ReadOptions roptions;
|
||||
leveldb::Iterator* it = db_with_bloom->NewIterator(roptions);
|
||||
int count = 0;
|
||||
|
||||
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse<0>(it->value().ToString().c_str());
|
||||
if (!doc.HasParseError() && doc.HasMember("age") &&
|
||||
doc["age"].IsInt()) {
|
||||
if (doc["age"].GetInt() == targetAge) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto endScan = std::chrono::high_resolution_clock::now();
|
||||
auto scanDuration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(endScan -
|
||||
startScan);
|
||||
|
||||
results["fullScan"]["timeUs"] = scanDuration.count();
|
||||
results["fullScan"]["recordsFound"] = count;
|
||||
|
||||
delete it;
|
||||
}
|
||||
|
||||
// Calculate speedups
|
||||
double bloomVsNoBloom =
|
||||
std::stod(results["withoutBloomFilter"]["timeUs"].dump()) /
|
||||
std::stod(results["withBloomFilter"]["timeUs"].dump());
|
||||
double bloomVsScan =
|
||||
std::stod(results["fullScan"]["timeUs"].dump()) /
|
||||
std::stod(results["withBloomFilter"]["timeUs"].dump());
|
||||
|
||||
results["speedups"]["bloomVsNoBloom"] = bloomVsNoBloom;
|
||||
results["speedups"]["bloomVsFullScan"] = bloomVsScan;
|
||||
results["targetAge"] = targetAge;
|
||||
|
||||
return crow::response(200, results);
|
||||
});
|
||||
|
||||
// Start the server
|
||||
std::cout << "Starting LevelDB HTTP server on port " << port << std::endl;
|
||||
std::cout << "Database path: " << db_path << std::endl;
|
||||
std::cout << "Database paths:" << std::endl;
|
||||
std::cout << " With Bloom filter: " << db_path + "_with_bloom" << std::endl;
|
||||
std::cout << " Without Bloom filter: " << db_path + "_without_bloom"
|
||||
<< std::endl;
|
||||
app.port(port).multithreaded().run();
|
||||
|
||||
delete db;
|
||||
// Clean up both DB instances
|
||||
delete db_with_bloom;
|
||||
delete db_without_bloom;
|
||||
delete options_with_bloom
|
||||
.filter_policy; // Don't forget to free the Bloom filter
|
||||
|
||||
return 0;
|
||||
}
|
@ -44,25 +44,26 @@ struct LEVELDB_EXPORT Range {
|
||||
Slice limit; // Not included in the range
|
||||
};
|
||||
|
||||
struct LEVELDB_EXPORT SKeyReturnVal {
|
||||
struct LEVELDB_EXPORT SecondaryKeyReturnVal {
|
||||
std::string key; // Included in the range
|
||||
std::string value; // Not included in the range
|
||||
uint64_t sequence_number; // presumably sequence_number that leveldb assigns
|
||||
// to each KV pair, we use this for top_k sorting
|
||||
|
||||
static bool comp(const leveldb::SKeyReturnVal& a,
|
||||
const leveldb::SKeyReturnVal& b) {
|
||||
static bool comp(const leveldb::SecondaryKeyReturnVal& a,
|
||||
const leveldb::SecondaryKeyReturnVal& b) {
|
||||
return a.sequence_number < b.sequence_number ? false : true;
|
||||
}
|
||||
|
||||
void Push(std::vector<leveldb::SKeyReturnVal>* heap,
|
||||
leveldb::SKeyReturnVal val) {
|
||||
void Push(std::vector<leveldb::SecondaryKeyReturnVal>* heap,
|
||||
leveldb::SecondaryKeyReturnVal val) {
|
||||
heap->push_back(val);
|
||||
push_heap(heap->begin(), heap->end(), comp);
|
||||
}
|
||||
|
||||
leveldb::SKeyReturnVal Pop(std::vector<leveldb::SKeyReturnVal>* heap) {
|
||||
leveldb::SKeyReturnVal val = heap->front();
|
||||
leveldb::SecondaryKeyReturnVal Pop(
|
||||
std::vector<leveldb::SecondaryKeyReturnVal>* heap) {
|
||||
leveldb::SecondaryKeyReturnVal val = heap->front();
|
||||
|
||||
// This operation will move the smallest element to the end of the vector
|
||||
pop_heap(heap->begin(), heap->end(), comp);
|
||||
@ -129,7 +130,8 @@ class LEVELDB_EXPORT DB {
|
||||
|
||||
// New Get method for query on secondary Key
|
||||
virtual Status Get(const ReadOptions& options, const Slice& skey,
|
||||
std::vector<SKeyReturnVal>* value, int top_k_outputs) {
|
||||
std::vector<SecondaryKeyReturnVal>* value,
|
||||
int top_k_outputs) {
|
||||
return Status::NotSupported("Get not implemented in ModelDB");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user