Refactor secondary key handling: rename SKeyReturnVal to SecondaryKeyReturnVal, update related methods and tests, and adjust MemTable structure for clarity.

This commit is contained in:
harish876 2025-04-13 02:55:02 +00:00
parent 2262f3ca43
commit 29c05db3f4
10 changed files with 334 additions and 73 deletions

2
.gitignore vendored
View File

@ -9,4 +9,4 @@ build/
out/ out/
# test db # test db
test_level_db_idx test_level_db_*

View File

@ -76,7 +76,7 @@ void queryWithIndex(leveldb::DB* db, int targetAge, int numRecords,
auto startWithIndex = high_resolution_clock::now(); auto startWithIndex = high_resolution_clock::now();
vector<leveldb::SKeyReturnVal> values; vector<leveldb::SecondaryKeyReturnVal> values;
leveldb::Status s = db->Get( leveldb::Status s = db->Get(
roptions, leveldb::Slice(std::to_string(targetAge)), &values, numRecords); roptions, leveldb::Slice(std::to_string(targetAge)), &values, numRecords);
@ -146,7 +146,7 @@ void runComparison(leveldb::DB* db, int targetAge, int numRecords,
std::cout << "PERFORMANCE COMPARISON\n"; std::cout << "PERFORMANCE COMPARISON\n";
std::cout << "==========================================\n"; std::cout << "==========================================\n";
vector<leveldb::SKeyReturnVal> values; vector<leveldb::SecondaryKeyReturnVal> values;
auto startWithIndex = high_resolution_clock::now(); auto startWithIndex = high_resolution_clock::now();
db->Get(roptions, leveldb::Slice(std::to_string(targetAge)), &values, db->Get(roptions, leveldb::Slice(std::to_string(targetAge)), &values,
numRecords); numRecords);
@ -234,7 +234,7 @@ int main(int argc, char* argv[]) {
// Setup database // Setup database
leveldb::DB* db; leveldb::DB* db;
leveldb::Options options; leveldb::Options options;
options.filter_policy = leveldb::NewBloomFilterPolicy(10); options.filter_policy = leveldb::NewBloomFilterPolicy(20);
options.primary_key = "id"; options.primary_key = "id";
options.secondary_key = "age"; options.secondary_key = "age";
options.create_if_missing = true; options.create_if_missing = true;

View File

@ -1174,12 +1174,13 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& key,
return s; return s;
} }
static bool NewestFirst(const SKeyReturnVal& a, const SKeyReturnVal& b) { static bool NewestFirst(const SecondaryKeyReturnVal& a,
const SecondaryKeyReturnVal& b) {
return a.sequence_number < b.sequence_number ? false : true; return a.sequence_number < b.sequence_number ? false : true;
} }
Status DBImpl::Get(const ReadOptions& options, const Slice& s_key, Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
std::vector<SKeyReturnVal>* acc, int top_k_outputs) { std::vector<SecondaryKeyReturnVal>* acc, int top_k_outputs) {
Status s; Status s;
MutexLock l(&mutex_); MutexLock l(&mutex_);
SequenceNumber snapshot; SequenceNumber snapshot;
@ -1206,12 +1207,10 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
LookupKey lkey(s_key, snapshot); LookupKey lkey(s_key, snapshot);
std::unordered_set<std::string> result_set; std::unordered_set<std::string> result_set;
mem->Get(s_key, snapshot, acc, &s, this->options_.secondary_key, mem->Get(s_key, snapshot, acc, &s, &result_set, top_k_outputs);
&result_set, top_k_outputs);
if (imm != nullptr && top_k_outputs - acc->size() > 0) { if (imm != nullptr && top_k_outputs - acc->size() > 0) {
imm->Get(s_key, snapshot, acc, &s, this->options_.secondary_key, imm->Get(s_key, snapshot, acc, &s, &result_set, top_k_outputs);
&result_set, top_k_outputs);
} }
if (top_k_outputs > (int)(acc->size())) { if (top_k_outputs > (int)(acc->size())) {

View File

@ -48,7 +48,8 @@ class DBImpl : public DB {
Status Get(const ReadOptions& options, const Slice& key, Status Get(const ReadOptions& options, const Slice& key,
std::string* value) override; std::string* value) override;
Status Get(const ReadOptions& options, const Slice& s_key, Status Get(const ReadOptions& options, const Slice& s_key,
std::vector<SKeyReturnVal>* acc, int top_k_outputs) override; std::vector<SecondaryKeyReturnVal>* acc,
int top_k_outputs) override;
Iterator* NewIterator(const ReadOptions&) override; Iterator* NewIterator(const ReadOptions&) override;
const Snapshot* GetSnapshot() override; const Snapshot* GetSnapshot() override;
void ReleaseSnapshot(const Snapshot* snapshot) override; void ReleaseSnapshot(const Snapshot* snapshot) override;

View File

@ -29,13 +29,13 @@ static Slice GetLengthPrefixedSlice(const char* data) {
MemTable::MemTable(const InternalKeyComparator& comparator, MemTable::MemTable(const InternalKeyComparator& comparator,
std::string secondary_key) std::string secondary_key)
: comparator_(comparator), refs_(0), table_(comparator_, &arena_) { : comparator_(comparator), refs_(0), table_(comparator_, &arena_) {
secAttribute = secondary_key; secondary_attribute_ = secondary_key;
} }
MemTable::~MemTable() { MemTable::~MemTable() {
assert(refs_ == 0); assert(refs_ == 0);
for (SecMemTable::iterator it = secTable_.begin(); it != secTable_.end(); for (SecMemTable::iterator it = secondary_table_.begin();
it++) { it != secondary_table_.end(); it++) {
std::pair<std::string, std::vector<std::string>*> pr = *it; std::pair<std::string, std::vector<std::string>*> pr = *it;
std::vector<std::string>* invertedList = pr.second; std::vector<std::string>* invertedList = pr.second;
@ -131,21 +131,20 @@ void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
if (type == kTypeDeletion) { if (type == kTypeDeletion) {
return; return;
} }
std::string secKey; std::string extracted_secondary_key;
Status st = Status st = ExtractKeyFromJSON(value.ToString().c_str(), secondary_attribute_,
ExtractKeyFromJSON(value.ToString().c_str(), secAttribute, &secKey); &extracted_secondary_key);
if (!st.ok()) { if (!st.ok()) {
return; return;
} }
SecMemTable::const_iterator lookup = secTable_.find(secKey); SecMemTable::const_iterator lookup =
if (lookup == secTable_.end()) { secondary_table_.find(extracted_secondary_key);
if (lookup == secondary_table_.end()) {
std::vector<std::string>* invertedList = new std::vector<std::string>(); std::vector<std::string>* invertedList = new std::vector<std::string>();
invertedList->push_back(key.ToString()); invertedList->push_back(key.ToString());
secondary_table_.insert(
secTable_.insert(std::make_pair(secKey, invertedList)); std::make_pair(extracted_secondary_key, invertedList));
} } else {
else {
std::pair<std::string, std::vector<std::string>*> pr = *lookup; std::pair<std::string, std::vector<std::string>*> pr = *lookup;
pr.second->push_back(key.ToString()); pr.second->push_back(key.ToString());
} }
@ -225,12 +224,11 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
} }
void MemTable::Get(const Slice& skey, SequenceNumber snapshot, void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
std::vector<SKeyReturnVal>* acc, Status* s, std::vector<SecondaryKeyReturnVal>* acc, Status* s,
std::string secondary_key,
std::unordered_set<std::string>* result_set, std::unordered_set<std::string>* result_set,
int top_k_output) { int top_k_output) {
auto lookup = secTable_.find(skey.ToString()); auto lookup = secondary_table_.find(skey.ToString());
if (lookup == secTable_.end()) { if (lookup == secondary_table_.end()) {
return; return;
} }
std::pair<std::string, std::vector<std::string>*> pr = *lookup; std::pair<std::string, std::vector<std::string>*> pr = *lookup;
@ -246,11 +244,11 @@ void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
if (!this->Get(lkey, &svalue, &s, &tag)) return; if (!this->Get(lkey, &svalue, &s, &tag)) return;
if (s.IsNotFound()) return; if (s.IsNotFound()) return;
Status st = ExtractKeyFromJSON(svalue, secAttribute, &secKeyVal); Status st = ExtractKeyFromJSON(svalue, secondary_attribute_, &secKeyVal);
if (!st.ok()) return; if (!st.ok()) return;
if (comparator_.comparator.user_comparator()->Compare(secKeyVal, skey) == if (comparator_.comparator.user_comparator()->Compare(secKeyVal, skey) ==
0) { 0) {
struct SKeyReturnVal newVal; struct SecondaryKeyReturnVal newVal;
newVal.key = pr.second->at(i); newVal.key = pr.second->at(i);
std::string temp; std::string temp;

View File

@ -72,8 +72,7 @@ class MemTable {
// Get methods for Secondary Memtable // Get methods for Secondary Memtable
bool Get(const LookupKey& key, std::string* value, Status* s, uint64_t* tag); bool Get(const LookupKey& key, std::string* value, Status* s, uint64_t* tag);
void Get(const Slice& s_key, SequenceNumber snapshot, void Get(const Slice& s_key, SequenceNumber snapshot,
std::vector<SKeyReturnVal>* value, Status* s, std::vector<SecondaryKeyReturnVal>* value, Status* s,
std::string secondary_key,
std::unordered_set<std::string>* result_set, int top_k_value); std::unordered_set<std::string>* result_set, int top_k_value);
private: private:
@ -97,8 +96,8 @@ class MemTable {
// SECONDARY MEMTABLE // SECONDARY MEMTABLE
typedef btree::btree_map<std::string, std::vector<std::string>*> SecMemTable; typedef btree::btree_map<std::string, std::vector<std::string>*> SecMemTable;
SecMemTable secTable_; SecMemTable secondary_table_;
std::string secAttribute; std::string secondary_attribute_;
}; };
} // namespace leveldb } // namespace leveldb

View File

@ -266,7 +266,7 @@ struct SecSaver {
SaverState state; SaverState state;
const Comparator* ucmp; const Comparator* ucmp;
Slice user_key; Slice user_key;
std::vector<SKeyReturnVal>* acc; std::vector<SecondaryKeyReturnVal>* acc;
std::unordered_set<std::string>* result_set; std::unordered_set<std::string>* result_set;
}; };
} // namespace } // namespace
@ -302,7 +302,7 @@ static bool SecSaveValue(void* arg, const Slice& ikey, const Slice& v,
if (s->ucmp->Compare(key, s->user_key) == 0) { if (s->ucmp->Compare(key, s->user_key) == 0) {
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted; s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
if (s->state == kFound) { if (s->state == kFound) {
struct SKeyReturnVal new_val; struct SecondaryKeyReturnVal new_val;
Slice ukey = ExtractUserKey(ikey); Slice ukey = ExtractUserKey(ikey);
if (s->result_set->find(ukey.ToString()) == s->result_set->end()) { if (s->result_set->find(ukey.ToString()) == s->result_set->end()) {
new_val.key = ukey.ToString(); new_val.key = ukey.ToString();
@ -340,7 +340,8 @@ static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
return a->number > b->number; return a->number > b->number;
} }
static bool NewestFirstSequenceNumber(SKeyReturnVal a, SKeyReturnVal b) { static bool NewestFirstSequenceNumber(SecondaryKeyReturnVal a,
SecondaryKeyReturnVal b) {
return a.sequence_number > b.sequence_number; return a.sequence_number > b.sequence_number;
} }
@ -466,7 +467,7 @@ Status Version::Get(const ReadOptions& options, const LookupKey& k,
} }
Status Version::Get(const ReadOptions& options, const LookupKey& k, Status Version::Get(const ReadOptions& options, const LookupKey& k,
std::vector<SKeyReturnVal>* acc, GetStats* stats, std::vector<SecondaryKeyReturnVal>* acc, GetStats* stats,
std::string secondary_key, int top_k_output, std::string secondary_key, int top_k_output,
std::unordered_set<std::string>* result_set, DBImpl* db) { std::unordered_set<std::string>* result_set, DBImpl* db) {
Slice ikey = k.internal_key(); Slice ikey = k.internal_key();

View File

@ -81,7 +81,7 @@ class Version {
GetStats* stats); GetStats* stats);
Status Get(const ReadOptions&, const LookupKey& key, Status Get(const ReadOptions&, const LookupKey& key,
std::vector<SKeyReturnVal>* value, GetStats* stats, std::vector<SecondaryKeyReturnVal>* value, GetStats* stats,
std::string secondary_key, int top_k_outputs, std::string secondary_key, int top_k_outputs,
std::unordered_set<std::string>* result_set, DBImpl* db); std::unordered_set<std::string>* result_set, DBImpl* db);

View File

@ -4,6 +4,7 @@
#include <vector> #include <vector>
#include "leveldb/db.h" #include "leveldb/db.h"
#include "leveldb/filter_policy.h"
#include "leveldb/options.h" #include "leveldb/options.h"
#include "leveldb/status.h" #include "leveldb/status.h"
@ -19,29 +20,58 @@ int main(int argc, char* argv[]) {
std::string db_path = argv[1]; std::string db_path = argv[1];
int port = (argc > 2) ? std::stoi(argv[2]) : 8080; int port = (argc > 2) ? std::stoi(argv[2]) : 8080;
// Open the database // Create two DB instances
leveldb::DB* db; leveldb::DB* db_with_bloom;
leveldb::Options options; leveldb::DB* db_without_bloom;
options.create_if_missing = true;
options.primary_key = "id";
options.secondary_key = "age";
leveldb::Status status = leveldb::DB::Open(options, db_path, &db); // Configure options for DB with Bloom filter
if (!status.ok()) { leveldb::Options options_with_bloom;
std::cerr << "Unable to open/create database: " << status.ToString() options_with_bloom.filter_policy = leveldb::NewBloomFilterPolicy(20);
<< std::endl; options_with_bloom.create_if_missing = true;
options_with_bloom.primary_key = "id";
options_with_bloom.secondary_key = "age";
// Configure options for DB without Bloom filter
leveldb::Options options_without_bloom;
options_without_bloom.filter_policy = nullptr; // No Bloom filter
options_without_bloom.create_if_missing = true;
options_without_bloom.primary_key = "id";
// Open both databases
leveldb::Status status_with_bloom = leveldb::DB::Open(
options_with_bloom, db_path + "with_bloom", &db_with_bloom);
if (!status_with_bloom.ok()) {
std::cerr << "Unable to open/create database with Bloom filter: "
<< status_with_bloom.ToString() << std::endl;
return 1;
}
leveldb::Status status_without_bloom = leveldb::DB::Open(
options_without_bloom, db_path + "without_bloom", &db_without_bloom);
if (!status_without_bloom.ok()) {
std::cerr << "Unable to open/create database without Bloom filter: "
<< status_without_bloom.ToString() << std::endl;
delete db_with_bloom;
return 1; return 1;
} }
crow::SimpleApp app; crow::SimpleApp app;
// Primary key get - specify which DB to use with ?bloom=true/false parameter
CROW_ROUTE(app, "/db/get/<string>") CROW_ROUTE(app, "/db/get/<string>")
.methods("GET"_method)([db](const crow::request& req, .methods(
std::string primary_key) { "GET"_method)([db_with_bloom, db_without_bloom](
const crow::request& req, std::string primary_key) {
// Determine which DB to use
auto use_bloom = req.url_params.get("bloom");
leveldb::DB* db_to_use =
(use_bloom && std::string(use_bloom) == "false") ? db_without_bloom
: db_with_bloom;
// Get value by primary key // Get value by primary key
std::string value; std::string value;
leveldb::Status s = leveldb::Status s =
db->Get(leveldb::ReadOptions(), primary_key, &value); db_to_use->Get(leveldb::ReadOptions(), primary_key, &value);
if (s.ok()) { if (s.ok()) {
// Return the JSON value // Return the JSON value
@ -53,15 +83,27 @@ int main(int argc, char* argv[]) {
} }
}); });
// Define route for secondary key operations // Secondary key query - with parameter to select DB
CROW_ROUTE(app, "/db/query") CROW_ROUTE(app, "/db/query")
.methods("GET"_method)([db](const crow::request& req) { .methods("GET"_method)([db_with_bloom,
db_without_bloom](const crow::request& req) {
// Get query parameters // Get query parameters
auto secondary_key = req.url_params.get("key"); auto secondary_key = req.url_params.get("key");
auto limit_param = req.url_params.get("limit"); auto limit_param = req.url_params.get("limit");
auto use_secondary = req.url_params.get("use_secondary"); auto use_secondary = req.url_params.get("use_secondary");
int limit = 10;
// Determine which DB to use
leveldb::DB* db_to_use = nullptr;
if (use_secondary) {
std::cout << "Using Db with bloom " << std::endl;
db_to_use = db_with_bloom;
} else {
std::cout << "Using Db without bloom " << std::endl;
db_to_use = db_without_bloom;
}
int limit = 10;
if (limit_param) { if (limit_param) {
try { try {
limit = std::stoi(limit_param); limit = std::stoi(limit_param);
@ -73,7 +115,7 @@ int main(int argc, char* argv[]) {
if (!use_secondary) { if (!use_secondary) {
std::unique_ptr<leveldb::Iterator> it( std::unique_ptr<leveldb::Iterator> it(
db->NewIterator(leveldb::ReadOptions())); db_to_use->NewIterator(leveldb::ReadOptions()));
std::vector<std::string> results; std::vector<std::string> results;
int target_key_value = std::stoi(secondary_key); int target_key_value = std::stoi(secondary_key);
@ -118,10 +160,10 @@ int main(int argc, char* argv[]) {
"{\"error\": \"Missing 'key' parameter\"}"); "{\"error\": \"Missing 'key' parameter\"}");
} }
std::vector<leveldb::SKeyReturnVal> results; std::vector<leveldb::SecondaryKeyReturnVal> results;
leveldb::Status s = leveldb::Status s =
db->Get(leveldb::ReadOptions(), leveldb::Slice(secondary_key), db_to_use->Get(leveldb::ReadOptions(),
&results, limit); leveldb::Slice(secondary_key), &results, limit);
if (s.ok()) { if (s.ok()) {
std::string json_results = "["; std::string json_results = "[";
@ -138,26 +180,245 @@ int main(int argc, char* argv[]) {
} }
}); });
// Put to both DBs to keep them in sync
CROW_ROUTE(app, "/db/put") CROW_ROUTE(app, "/db/put")
.methods("POST"_method)([db](const crow::request& req) { .methods("POST"_method)([db_with_bloom,
db_without_bloom](const crow::request& req) {
auto body = req.body; auto body = req.body;
if (body.empty()) { if (body.empty()) {
return crow::response(400, "{\"error\": \"Empty request body\"}"); return crow::response(400, "{\"error\": \"Empty request body\"}");
} }
leveldb::Status s = db->Put(leveldb::WriteOptions(), body); // Write to both databases
if (s.ok()) { leveldb::Status s1 = db_with_bloom->Put(leveldb::WriteOptions(), body);
leveldb::Status s2 =
db_without_bloom->Put(leveldb::WriteOptions(), body);
if (s1.ok() && s2.ok()) {
return crow::response(200, "{\"status\": \"success\"}"); return crow::response(200, "{\"status\": \"success\"}");
} else { } else {
return crow::response(500, "{\"error\": \"" + s.ToString() + "\"}"); std::string error = "";
if (!s1.ok()) error += "With Bloom: " + s1.ToString();
if (!s2.ok()) {
if (!error.empty()) error += ", ";
error += "Without Bloom: " + s2.ToString();
}
return crow::response(500, "{\"error\": \"" + error + "\"}");
} }
}); });
// Add a new endpoint for stats/comparison
CROW_ROUTE(app, "/db/stats")
.methods("GET"_method)(
[db_with_bloom, db_without_bloom](const crow::request& req) {
// This would be expanded in a real implementation
// to return performance statistics, etc.
crow::json::wvalue stats;
stats["bloom_filter_enabled"]["status"] = "active";
stats["no_bloom_filter"]["status"] = "active";
return crow::response(200, stats);
});
// Bulk insertion endpoint
CROW_ROUTE(app, "/db/bulk-insert")
.methods("POST"_method)([db_with_bloom,
db_without_bloom](const crow::request& req) {
// Parse parameters from JSON body
rapidjson::Document doc;
if (doc.Parse(req.body.c_str()).HasParseError()) {
return crow::response(400, "{\"error\": \"Invalid JSON body\"}");
}
// Get number of records to insert
if (!doc.HasMember("numRecords") || !doc["numRecords"].IsInt()) {
return crow::response(
400,
"{\"error\": \"Missing or invalid 'numRecords' parameter\"}");
}
int numRecords = doc["numRecords"].GetInt();
// Get which DB to use (both, bloom only, or no-bloom only)
bool useBloom = true;
bool useNoBloom = true;
if (doc.HasMember("useBloom") && doc["useBloom"].IsBool()) {
useBloom = doc["useBloom"].GetBool();
}
if (doc.HasMember("useNoBloom") && doc["useNoBloom"].IsBool()) {
useNoBloom = doc["useNoBloom"].GetBool();
}
// Set up timing
auto startTime = std::chrono::high_resolution_clock::now();
int successCount = 0;
int errorCount = 0;
// Set up write options
leveldb::WriteOptions woptions;
// Insert data
for (int i = 0; i < numRecords; ++i) {
// Create sample JSON document
std::stringstream ss;
ss << "{\n \"id\": " << i << ",\n \"age\": " << (i % 50 + 10)
<< ",\n \"name\": \"User" << i << "\"\n}";
std::string json_string = ss.str();
// Insert into selected DBs
bool success = true;
if (useBloom) {
leveldb::Status s = db_with_bloom->Put(woptions, json_string);
if (!s.ok()) {
errorCount++;
success = false;
}
}
if (useNoBloom) {
leveldb::Status s = db_without_bloom->Put(woptions, json_string);
if (!s.ok()) {
errorCount++;
success = false;
}
}
if (success) {
successCount++;
}
}
// Calculate time taken
auto endTime = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
endTime - startTime);
// Build response
crow::json::wvalue result;
result["success"] = true;
result["recordsRequested"] = numRecords;
result["recordsInserted"] = successCount;
result["recordsFailed"] = errorCount;
result["timeMs"] = duration.count();
result["usedBloomFilter"] = useBloom;
result["usedNoBloomFilter"] = useNoBloom;
return crow::response(200, result);
});
// Also add an endpoint to run performance comparison tests
CROW_ROUTE(app, "/db/performance-test")
.methods("POST"_method)([db_with_bloom,
db_without_bloom](const crow::request& req) {
// Parse parameters
rapidjson::Document doc;
if (doc.Parse(req.body.c_str()).HasParseError()) {
return crow::response(400, "{\"error\": \"Invalid JSON body\"}");
}
// Get target age to search for
int targetAge = 30; // default
if (doc.HasMember("targetAge") && doc["targetAge"].IsInt()) {
targetAge = doc["targetAge"].GetInt();
}
// Results object
crow::json::wvalue results;
// Test with bloom filter
{
auto startWithBloom = std::chrono::high_resolution_clock::now();
std::vector<leveldb::SecondaryKeyReturnVal> values;
leveldb::ReadOptions roptions;
db_with_bloom->Get(roptions,
leveldb::Slice(std::to_string(targetAge)), &values,
1000);
auto endWithBloom = std::chrono::high_resolution_clock::now();
auto bloomDuration =
std::chrono::duration_cast<std::chrono::microseconds>(
endWithBloom - startWithBloom);
results["withBloomFilter"]["timeUs"] = bloomDuration.count();
results["withBloomFilter"]["recordsFound"] = values.size();
}
// Test without bloom filter
{
auto startNoBloom = std::chrono::high_resolution_clock::now();
std::vector<leveldb::SecondaryKeyReturnVal> values;
leveldb::ReadOptions roptions;
db_without_bloom->Get(roptions,
leveldb::Slice(std::to_string(targetAge)),
&values, 1000);
auto endNoBloom = std::chrono::high_resolution_clock::now();
auto noBloomDuration =
std::chrono::duration_cast<std::chrono::microseconds>(
endNoBloom - startNoBloom);
results["withoutBloomFilter"]["timeUs"] = noBloomDuration.count();
results["withoutBloomFilter"]["recordsFound"] = values.size();
}
// Full scan test on bloom filter DB (for comparison)
{
auto startScan = std::chrono::high_resolution_clock::now();
leveldb::ReadOptions roptions;
leveldb::Iterator* it = db_with_bloom->NewIterator(roptions);
int count = 0;
for (it->SeekToFirst(); it->Valid(); it->Next()) {
rapidjson::Document doc;
doc.Parse<0>(it->value().ToString().c_str());
if (!doc.HasParseError() && doc.HasMember("age") &&
doc["age"].IsInt()) {
if (doc["age"].GetInt() == targetAge) {
count++;
}
}
}
auto endScan = std::chrono::high_resolution_clock::now();
auto scanDuration =
std::chrono::duration_cast<std::chrono::microseconds>(endScan -
startScan);
results["fullScan"]["timeUs"] = scanDuration.count();
results["fullScan"]["recordsFound"] = count;
delete it;
}
// Calculate speedups
double bloomVsNoBloom =
std::stod(results["withoutBloomFilter"]["timeUs"].dump()) /
std::stod(results["withBloomFilter"]["timeUs"].dump());
double bloomVsScan =
std::stod(results["fullScan"]["timeUs"].dump()) /
std::stod(results["withBloomFilter"]["timeUs"].dump());
results["speedups"]["bloomVsNoBloom"] = bloomVsNoBloom;
results["speedups"]["bloomVsFullScan"] = bloomVsScan;
results["targetAge"] = targetAge;
return crow::response(200, results);
});
// Start the server // Start the server
std::cout << "Starting LevelDB HTTP server on port " << port << std::endl; std::cout << "Starting LevelDB HTTP server on port " << port << std::endl;
std::cout << "Database path: " << db_path << std::endl; std::cout << "Database paths:" << std::endl;
std::cout << " With Bloom filter: " << db_path + "_with_bloom" << std::endl;
std::cout << " Without Bloom filter: " << db_path + "_without_bloom"
<< std::endl;
app.port(port).multithreaded().run(); app.port(port).multithreaded().run();
delete db; // Clean up both DB instances
delete db_with_bloom;
delete db_without_bloom;
delete options_with_bloom
.filter_policy; // Don't forget to free the Bloom filter
return 0; return 0;
} }

View File

@ -44,25 +44,26 @@ struct LEVELDB_EXPORT Range {
Slice limit; // Not included in the range Slice limit; // Not included in the range
}; };
struct LEVELDB_EXPORT SKeyReturnVal { struct LEVELDB_EXPORT SecondaryKeyReturnVal {
std::string key; // Included in the range std::string key; // Included in the range
std::string value; // Not included in the range std::string value; // Not included in the range
uint64_t sequence_number; // presumably sequence_number that leveldb assigns uint64_t sequence_number; // presumably sequence_number that leveldb assigns
// to each KV pair, we use this for top_k sorting // to each KV pair, we use this for top_k sorting
static bool comp(const leveldb::SKeyReturnVal& a, static bool comp(const leveldb::SecondaryKeyReturnVal& a,
const leveldb::SKeyReturnVal& b) { const leveldb::SecondaryKeyReturnVal& b) {
return a.sequence_number < b.sequence_number ? false : true; return a.sequence_number < b.sequence_number ? false : true;
} }
void Push(std::vector<leveldb::SKeyReturnVal>* heap, void Push(std::vector<leveldb::SecondaryKeyReturnVal>* heap,
leveldb::SKeyReturnVal val) { leveldb::SecondaryKeyReturnVal val) {
heap->push_back(val); heap->push_back(val);
push_heap(heap->begin(), heap->end(), comp); push_heap(heap->begin(), heap->end(), comp);
} }
leveldb::SKeyReturnVal Pop(std::vector<leveldb::SKeyReturnVal>* heap) { leveldb::SecondaryKeyReturnVal Pop(
leveldb::SKeyReturnVal val = heap->front(); std::vector<leveldb::SecondaryKeyReturnVal>* heap) {
leveldb::SecondaryKeyReturnVal val = heap->front();
// This operation will move the smallest element to the end of the vector // This operation will move the smallest element to the end of the vector
pop_heap(heap->begin(), heap->end(), comp); pop_heap(heap->begin(), heap->end(), comp);
@ -129,7 +130,8 @@ class LEVELDB_EXPORT DB {
// New Get method for query on secondary Key // New Get method for query on secondary Key
virtual Status Get(const ReadOptions& options, const Slice& skey, virtual Status Get(const ReadOptions& options, const Slice& skey,
std::vector<SKeyReturnVal>* value, int top_k_outputs) { std::vector<SecondaryKeyReturnVal>* value,
int top_k_outputs) {
return Status::NotSupported("Get not implemented in ModelDB"); return Status::NotSupported("Get not implemented in ModelDB");
} }