mirror of
https://github.com/google/leveldb.git
synced 2025-06-07 18:02:42 +08:00
Refactor secondary key handling: rename SKeyReturnVal to SecondaryKeyReturnVal, update related methods and tests, and adjust MemTable structure for clarity.
This commit is contained in:
parent
2262f3ca43
commit
29c05db3f4
2
.gitignore
vendored
2
.gitignore
vendored
@ -9,4 +9,4 @@ build/
|
|||||||
out/
|
out/
|
||||||
|
|
||||||
# test db
|
# test db
|
||||||
test_level_db_idx
|
test_level_db_*
|
@ -76,7 +76,7 @@ void queryWithIndex(leveldb::DB* db, int targetAge, int numRecords,
|
|||||||
|
|
||||||
auto startWithIndex = high_resolution_clock::now();
|
auto startWithIndex = high_resolution_clock::now();
|
||||||
|
|
||||||
vector<leveldb::SKeyReturnVal> values;
|
vector<leveldb::SecondaryKeyReturnVal> values;
|
||||||
leveldb::Status s = db->Get(
|
leveldb::Status s = db->Get(
|
||||||
roptions, leveldb::Slice(std::to_string(targetAge)), &values, numRecords);
|
roptions, leveldb::Slice(std::to_string(targetAge)), &values, numRecords);
|
||||||
|
|
||||||
@ -146,7 +146,7 @@ void runComparison(leveldb::DB* db, int targetAge, int numRecords,
|
|||||||
std::cout << "PERFORMANCE COMPARISON\n";
|
std::cout << "PERFORMANCE COMPARISON\n";
|
||||||
std::cout << "==========================================\n";
|
std::cout << "==========================================\n";
|
||||||
|
|
||||||
vector<leveldb::SKeyReturnVal> values;
|
vector<leveldb::SecondaryKeyReturnVal> values;
|
||||||
auto startWithIndex = high_resolution_clock::now();
|
auto startWithIndex = high_resolution_clock::now();
|
||||||
db->Get(roptions, leveldb::Slice(std::to_string(targetAge)), &values,
|
db->Get(roptions, leveldb::Slice(std::to_string(targetAge)), &values,
|
||||||
numRecords);
|
numRecords);
|
||||||
@ -234,7 +234,7 @@ int main(int argc, char* argv[]) {
|
|||||||
// Setup database
|
// Setup database
|
||||||
leveldb::DB* db;
|
leveldb::DB* db;
|
||||||
leveldb::Options options;
|
leveldb::Options options;
|
||||||
options.filter_policy = leveldb::NewBloomFilterPolicy(10);
|
options.filter_policy = leveldb::NewBloomFilterPolicy(20);
|
||||||
options.primary_key = "id";
|
options.primary_key = "id";
|
||||||
options.secondary_key = "age";
|
options.secondary_key = "age";
|
||||||
options.create_if_missing = true;
|
options.create_if_missing = true;
|
||||||
|
@ -1174,12 +1174,13 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& key,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool NewestFirst(const SKeyReturnVal& a, const SKeyReturnVal& b) {
|
static bool NewestFirst(const SecondaryKeyReturnVal& a,
|
||||||
|
const SecondaryKeyReturnVal& b) {
|
||||||
return a.sequence_number < b.sequence_number ? false : true;
|
return a.sequence_number < b.sequence_number ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
|
Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
|
||||||
std::vector<SKeyReturnVal>* acc, int top_k_outputs) {
|
std::vector<SecondaryKeyReturnVal>* acc, int top_k_outputs) {
|
||||||
Status s;
|
Status s;
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
SequenceNumber snapshot;
|
SequenceNumber snapshot;
|
||||||
@ -1206,12 +1207,10 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& s_key,
|
|||||||
LookupKey lkey(s_key, snapshot);
|
LookupKey lkey(s_key, snapshot);
|
||||||
|
|
||||||
std::unordered_set<std::string> result_set;
|
std::unordered_set<std::string> result_set;
|
||||||
mem->Get(s_key, snapshot, acc, &s, this->options_.secondary_key,
|
mem->Get(s_key, snapshot, acc, &s, &result_set, top_k_outputs);
|
||||||
&result_set, top_k_outputs);
|
|
||||||
|
|
||||||
if (imm != nullptr && top_k_outputs - acc->size() > 0) {
|
if (imm != nullptr && top_k_outputs - acc->size() > 0) {
|
||||||
imm->Get(s_key, snapshot, acc, &s, this->options_.secondary_key,
|
imm->Get(s_key, snapshot, acc, &s, &result_set, top_k_outputs);
|
||||||
&result_set, top_k_outputs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (top_k_outputs > (int)(acc->size())) {
|
if (top_k_outputs > (int)(acc->size())) {
|
||||||
|
@ -48,7 +48,8 @@ class DBImpl : public DB {
|
|||||||
Status Get(const ReadOptions& options, const Slice& key,
|
Status Get(const ReadOptions& options, const Slice& key,
|
||||||
std::string* value) override;
|
std::string* value) override;
|
||||||
Status Get(const ReadOptions& options, const Slice& s_key,
|
Status Get(const ReadOptions& options, const Slice& s_key,
|
||||||
std::vector<SKeyReturnVal>* acc, int top_k_outputs) override;
|
std::vector<SecondaryKeyReturnVal>* acc,
|
||||||
|
int top_k_outputs) override;
|
||||||
Iterator* NewIterator(const ReadOptions&) override;
|
Iterator* NewIterator(const ReadOptions&) override;
|
||||||
const Snapshot* GetSnapshot() override;
|
const Snapshot* GetSnapshot() override;
|
||||||
void ReleaseSnapshot(const Snapshot* snapshot) override;
|
void ReleaseSnapshot(const Snapshot* snapshot) override;
|
||||||
|
@ -29,13 +29,13 @@ static Slice GetLengthPrefixedSlice(const char* data) {
|
|||||||
MemTable::MemTable(const InternalKeyComparator& comparator,
|
MemTable::MemTable(const InternalKeyComparator& comparator,
|
||||||
std::string secondary_key)
|
std::string secondary_key)
|
||||||
: comparator_(comparator), refs_(0), table_(comparator_, &arena_) {
|
: comparator_(comparator), refs_(0), table_(comparator_, &arena_) {
|
||||||
secAttribute = secondary_key;
|
secondary_attribute_ = secondary_key;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemTable::~MemTable() {
|
MemTable::~MemTable() {
|
||||||
assert(refs_ == 0);
|
assert(refs_ == 0);
|
||||||
for (SecMemTable::iterator it = secTable_.begin(); it != secTable_.end();
|
for (SecMemTable::iterator it = secondary_table_.begin();
|
||||||
it++) {
|
it != secondary_table_.end(); it++) {
|
||||||
std::pair<std::string, std::vector<std::string>*> pr = *it;
|
std::pair<std::string, std::vector<std::string>*> pr = *it;
|
||||||
|
|
||||||
std::vector<std::string>* invertedList = pr.second;
|
std::vector<std::string>* invertedList = pr.second;
|
||||||
@ -131,21 +131,20 @@ void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
|
|||||||
if (type == kTypeDeletion) {
|
if (type == kTypeDeletion) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::string secKey;
|
std::string extracted_secondary_key;
|
||||||
Status st =
|
Status st = ExtractKeyFromJSON(value.ToString().c_str(), secondary_attribute_,
|
||||||
ExtractKeyFromJSON(value.ToString().c_str(), secAttribute, &secKey);
|
&extracted_secondary_key);
|
||||||
if (!st.ok()) {
|
if (!st.ok()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
SecMemTable::const_iterator lookup = secTable_.find(secKey);
|
SecMemTable::const_iterator lookup =
|
||||||
if (lookup == secTable_.end()) {
|
secondary_table_.find(extracted_secondary_key);
|
||||||
|
if (lookup == secondary_table_.end()) {
|
||||||
std::vector<std::string>* invertedList = new std::vector<std::string>();
|
std::vector<std::string>* invertedList = new std::vector<std::string>();
|
||||||
invertedList->push_back(key.ToString());
|
invertedList->push_back(key.ToString());
|
||||||
|
secondary_table_.insert(
|
||||||
secTable_.insert(std::make_pair(secKey, invertedList));
|
std::make_pair(extracted_secondary_key, invertedList));
|
||||||
}
|
} else {
|
||||||
|
|
||||||
else {
|
|
||||||
std::pair<std::string, std::vector<std::string>*> pr = *lookup;
|
std::pair<std::string, std::vector<std::string>*> pr = *lookup;
|
||||||
pr.second->push_back(key.ToString());
|
pr.second->push_back(key.ToString());
|
||||||
}
|
}
|
||||||
@ -225,12 +224,11 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
|
void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
|
||||||
std::vector<SKeyReturnVal>* acc, Status* s,
|
std::vector<SecondaryKeyReturnVal>* acc, Status* s,
|
||||||
std::string secondary_key,
|
|
||||||
std::unordered_set<std::string>* result_set,
|
std::unordered_set<std::string>* result_set,
|
||||||
int top_k_output) {
|
int top_k_output) {
|
||||||
auto lookup = secTable_.find(skey.ToString());
|
auto lookup = secondary_table_.find(skey.ToString());
|
||||||
if (lookup == secTable_.end()) {
|
if (lookup == secondary_table_.end()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::pair<std::string, std::vector<std::string>*> pr = *lookup;
|
std::pair<std::string, std::vector<std::string>*> pr = *lookup;
|
||||||
@ -246,11 +244,11 @@ void MemTable::Get(const Slice& skey, SequenceNumber snapshot,
|
|||||||
if (!this->Get(lkey, &svalue, &s, &tag)) return;
|
if (!this->Get(lkey, &svalue, &s, &tag)) return;
|
||||||
if (s.IsNotFound()) return;
|
if (s.IsNotFound()) return;
|
||||||
|
|
||||||
Status st = ExtractKeyFromJSON(svalue, secAttribute, &secKeyVal);
|
Status st = ExtractKeyFromJSON(svalue, secondary_attribute_, &secKeyVal);
|
||||||
if (!st.ok()) return;
|
if (!st.ok()) return;
|
||||||
if (comparator_.comparator.user_comparator()->Compare(secKeyVal, skey) ==
|
if (comparator_.comparator.user_comparator()->Compare(secKeyVal, skey) ==
|
||||||
0) {
|
0) {
|
||||||
struct SKeyReturnVal newVal;
|
struct SecondaryKeyReturnVal newVal;
|
||||||
newVal.key = pr.second->at(i);
|
newVal.key = pr.second->at(i);
|
||||||
std::string temp;
|
std::string temp;
|
||||||
|
|
||||||
|
@ -72,8 +72,7 @@ class MemTable {
|
|||||||
// Get methods for Secondary Memtable
|
// Get methods for Secondary Memtable
|
||||||
bool Get(const LookupKey& key, std::string* value, Status* s, uint64_t* tag);
|
bool Get(const LookupKey& key, std::string* value, Status* s, uint64_t* tag);
|
||||||
void Get(const Slice& s_key, SequenceNumber snapshot,
|
void Get(const Slice& s_key, SequenceNumber snapshot,
|
||||||
std::vector<SKeyReturnVal>* value, Status* s,
|
std::vector<SecondaryKeyReturnVal>* value, Status* s,
|
||||||
std::string secondary_key,
|
|
||||||
std::unordered_set<std::string>* result_set, int top_k_value);
|
std::unordered_set<std::string>* result_set, int top_k_value);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -97,8 +96,8 @@ class MemTable {
|
|||||||
|
|
||||||
// SECONDARY MEMTABLE
|
// SECONDARY MEMTABLE
|
||||||
typedef btree::btree_map<std::string, std::vector<std::string>*> SecMemTable;
|
typedef btree::btree_map<std::string, std::vector<std::string>*> SecMemTable;
|
||||||
SecMemTable secTable_;
|
SecMemTable secondary_table_;
|
||||||
std::string secAttribute;
|
std::string secondary_attribute_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace leveldb
|
} // namespace leveldb
|
||||||
|
@ -266,7 +266,7 @@ struct SecSaver {
|
|||||||
SaverState state;
|
SaverState state;
|
||||||
const Comparator* ucmp;
|
const Comparator* ucmp;
|
||||||
Slice user_key;
|
Slice user_key;
|
||||||
std::vector<SKeyReturnVal>* acc;
|
std::vector<SecondaryKeyReturnVal>* acc;
|
||||||
std::unordered_set<std::string>* result_set;
|
std::unordered_set<std::string>* result_set;
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -302,7 +302,7 @@ static bool SecSaveValue(void* arg, const Slice& ikey, const Slice& v,
|
|||||||
if (s->ucmp->Compare(key, s->user_key) == 0) {
|
if (s->ucmp->Compare(key, s->user_key) == 0) {
|
||||||
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
|
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
|
||||||
if (s->state == kFound) {
|
if (s->state == kFound) {
|
||||||
struct SKeyReturnVal new_val;
|
struct SecondaryKeyReturnVal new_val;
|
||||||
Slice ukey = ExtractUserKey(ikey);
|
Slice ukey = ExtractUserKey(ikey);
|
||||||
if (s->result_set->find(ukey.ToString()) == s->result_set->end()) {
|
if (s->result_set->find(ukey.ToString()) == s->result_set->end()) {
|
||||||
new_val.key = ukey.ToString();
|
new_val.key = ukey.ToString();
|
||||||
@ -340,7 +340,8 @@ static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
|||||||
return a->number > b->number;
|
return a->number > b->number;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool NewestFirstSequenceNumber(SKeyReturnVal a, SKeyReturnVal b) {
|
static bool NewestFirstSequenceNumber(SecondaryKeyReturnVal a,
|
||||||
|
SecondaryKeyReturnVal b) {
|
||||||
return a.sequence_number > b.sequence_number;
|
return a.sequence_number > b.sequence_number;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -466,7 +467,7 @@ Status Version::Get(const ReadOptions& options, const LookupKey& k,
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status Version::Get(const ReadOptions& options, const LookupKey& k,
|
Status Version::Get(const ReadOptions& options, const LookupKey& k,
|
||||||
std::vector<SKeyReturnVal>* acc, GetStats* stats,
|
std::vector<SecondaryKeyReturnVal>* acc, GetStats* stats,
|
||||||
std::string secondary_key, int top_k_output,
|
std::string secondary_key, int top_k_output,
|
||||||
std::unordered_set<std::string>* result_set, DBImpl* db) {
|
std::unordered_set<std::string>* result_set, DBImpl* db) {
|
||||||
Slice ikey = k.internal_key();
|
Slice ikey = k.internal_key();
|
||||||
|
@ -81,7 +81,7 @@ class Version {
|
|||||||
GetStats* stats);
|
GetStats* stats);
|
||||||
|
|
||||||
Status Get(const ReadOptions&, const LookupKey& key,
|
Status Get(const ReadOptions&, const LookupKey& key,
|
||||||
std::vector<SKeyReturnVal>* value, GetStats* stats,
|
std::vector<SecondaryKeyReturnVal>* value, GetStats* stats,
|
||||||
std::string secondary_key, int top_k_outputs,
|
std::string secondary_key, int top_k_outputs,
|
||||||
std::unordered_set<std::string>* result_set, DBImpl* db);
|
std::unordered_set<std::string>* result_set, DBImpl* db);
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "leveldb/db.h"
|
#include "leveldb/db.h"
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
#include "leveldb/options.h"
|
#include "leveldb/options.h"
|
||||||
#include "leveldb/status.h"
|
#include "leveldb/status.h"
|
||||||
|
|
||||||
@ -19,29 +20,58 @@ int main(int argc, char* argv[]) {
|
|||||||
std::string db_path = argv[1];
|
std::string db_path = argv[1];
|
||||||
int port = (argc > 2) ? std::stoi(argv[2]) : 8080;
|
int port = (argc > 2) ? std::stoi(argv[2]) : 8080;
|
||||||
|
|
||||||
// Open the database
|
// Create two DB instances
|
||||||
leveldb::DB* db;
|
leveldb::DB* db_with_bloom;
|
||||||
leveldb::Options options;
|
leveldb::DB* db_without_bloom;
|
||||||
options.create_if_missing = true;
|
|
||||||
options.primary_key = "id";
|
|
||||||
options.secondary_key = "age";
|
|
||||||
|
|
||||||
leveldb::Status status = leveldb::DB::Open(options, db_path, &db);
|
// Configure options for DB with Bloom filter
|
||||||
if (!status.ok()) {
|
leveldb::Options options_with_bloom;
|
||||||
std::cerr << "Unable to open/create database: " << status.ToString()
|
options_with_bloom.filter_policy = leveldb::NewBloomFilterPolicy(20);
|
||||||
<< std::endl;
|
options_with_bloom.create_if_missing = true;
|
||||||
|
options_with_bloom.primary_key = "id";
|
||||||
|
options_with_bloom.secondary_key = "age";
|
||||||
|
|
||||||
|
// Configure options for DB without Bloom filter
|
||||||
|
leveldb::Options options_without_bloom;
|
||||||
|
options_without_bloom.filter_policy = nullptr; // No Bloom filter
|
||||||
|
options_without_bloom.create_if_missing = true;
|
||||||
|
options_without_bloom.primary_key = "id";
|
||||||
|
|
||||||
|
// Open both databases
|
||||||
|
leveldb::Status status_with_bloom = leveldb::DB::Open(
|
||||||
|
options_with_bloom, db_path + "with_bloom", &db_with_bloom);
|
||||||
|
if (!status_with_bloom.ok()) {
|
||||||
|
std::cerr << "Unable to open/create database with Bloom filter: "
|
||||||
|
<< status_with_bloom.ToString() << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
leveldb::Status status_without_bloom = leveldb::DB::Open(
|
||||||
|
options_without_bloom, db_path + "without_bloom", &db_without_bloom);
|
||||||
|
if (!status_without_bloom.ok()) {
|
||||||
|
std::cerr << "Unable to open/create database without Bloom filter: "
|
||||||
|
<< status_without_bloom.ToString() << std::endl;
|
||||||
|
delete db_with_bloom;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
crow::SimpleApp app;
|
crow::SimpleApp app;
|
||||||
|
|
||||||
|
// Primary key get - specify which DB to use with ?bloom=true/false parameter
|
||||||
CROW_ROUTE(app, "/db/get/<string>")
|
CROW_ROUTE(app, "/db/get/<string>")
|
||||||
.methods("GET"_method)([db](const crow::request& req,
|
.methods(
|
||||||
std::string primary_key) {
|
"GET"_method)([db_with_bloom, db_without_bloom](
|
||||||
|
const crow::request& req, std::string primary_key) {
|
||||||
|
// Determine which DB to use
|
||||||
|
auto use_bloom = req.url_params.get("bloom");
|
||||||
|
leveldb::DB* db_to_use =
|
||||||
|
(use_bloom && std::string(use_bloom) == "false") ? db_without_bloom
|
||||||
|
: db_with_bloom;
|
||||||
|
|
||||||
// Get value by primary key
|
// Get value by primary key
|
||||||
std::string value;
|
std::string value;
|
||||||
leveldb::Status s =
|
leveldb::Status s =
|
||||||
db->Get(leveldb::ReadOptions(), primary_key, &value);
|
db_to_use->Get(leveldb::ReadOptions(), primary_key, &value);
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// Return the JSON value
|
// Return the JSON value
|
||||||
@ -53,15 +83,27 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Define route for secondary key operations
|
// Secondary key query - with parameter to select DB
|
||||||
CROW_ROUTE(app, "/db/query")
|
CROW_ROUTE(app, "/db/query")
|
||||||
.methods("GET"_method)([db](const crow::request& req) {
|
.methods("GET"_method)([db_with_bloom,
|
||||||
|
db_without_bloom](const crow::request& req) {
|
||||||
// Get query parameters
|
// Get query parameters
|
||||||
auto secondary_key = req.url_params.get("key");
|
auto secondary_key = req.url_params.get("key");
|
||||||
auto limit_param = req.url_params.get("limit");
|
auto limit_param = req.url_params.get("limit");
|
||||||
auto use_secondary = req.url_params.get("use_secondary");
|
auto use_secondary = req.url_params.get("use_secondary");
|
||||||
int limit = 10;
|
|
||||||
|
|
||||||
|
// Determine which DB to use
|
||||||
|
|
||||||
|
leveldb::DB* db_to_use = nullptr;
|
||||||
|
if (use_secondary) {
|
||||||
|
std::cout << "Using Db with bloom " << std::endl;
|
||||||
|
db_to_use = db_with_bloom;
|
||||||
|
} else {
|
||||||
|
std::cout << "Using Db without bloom " << std::endl;
|
||||||
|
db_to_use = db_without_bloom;
|
||||||
|
}
|
||||||
|
|
||||||
|
int limit = 10;
|
||||||
if (limit_param) {
|
if (limit_param) {
|
||||||
try {
|
try {
|
||||||
limit = std::stoi(limit_param);
|
limit = std::stoi(limit_param);
|
||||||
@ -73,7 +115,7 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
if (!use_secondary) {
|
if (!use_secondary) {
|
||||||
std::unique_ptr<leveldb::Iterator> it(
|
std::unique_ptr<leveldb::Iterator> it(
|
||||||
db->NewIterator(leveldb::ReadOptions()));
|
db_to_use->NewIterator(leveldb::ReadOptions()));
|
||||||
std::vector<std::string> results;
|
std::vector<std::string> results;
|
||||||
int target_key_value = std::stoi(secondary_key);
|
int target_key_value = std::stoi(secondary_key);
|
||||||
|
|
||||||
@ -118,10 +160,10 @@ int main(int argc, char* argv[]) {
|
|||||||
"{\"error\": \"Missing 'key' parameter\"}");
|
"{\"error\": \"Missing 'key' parameter\"}");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<leveldb::SKeyReturnVal> results;
|
std::vector<leveldb::SecondaryKeyReturnVal> results;
|
||||||
leveldb::Status s =
|
leveldb::Status s =
|
||||||
db->Get(leveldb::ReadOptions(), leveldb::Slice(secondary_key),
|
db_to_use->Get(leveldb::ReadOptions(),
|
||||||
&results, limit);
|
leveldb::Slice(secondary_key), &results, limit);
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
std::string json_results = "[";
|
std::string json_results = "[";
|
||||||
@ -138,26 +180,245 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Put to both DBs to keep them in sync
|
||||||
CROW_ROUTE(app, "/db/put")
|
CROW_ROUTE(app, "/db/put")
|
||||||
.methods("POST"_method)([db](const crow::request& req) {
|
.methods("POST"_method)([db_with_bloom,
|
||||||
|
db_without_bloom](const crow::request& req) {
|
||||||
auto body = req.body;
|
auto body = req.body;
|
||||||
if (body.empty()) {
|
if (body.empty()) {
|
||||||
return crow::response(400, "{\"error\": \"Empty request body\"}");
|
return crow::response(400, "{\"error\": \"Empty request body\"}");
|
||||||
}
|
}
|
||||||
|
|
||||||
leveldb::Status s = db->Put(leveldb::WriteOptions(), body);
|
// Write to both databases
|
||||||
if (s.ok()) {
|
leveldb::Status s1 = db_with_bloom->Put(leveldb::WriteOptions(), body);
|
||||||
|
leveldb::Status s2 =
|
||||||
|
db_without_bloom->Put(leveldb::WriteOptions(), body);
|
||||||
|
|
||||||
|
if (s1.ok() && s2.ok()) {
|
||||||
return crow::response(200, "{\"status\": \"success\"}");
|
return crow::response(200, "{\"status\": \"success\"}");
|
||||||
} else {
|
} else {
|
||||||
return crow::response(500, "{\"error\": \"" + s.ToString() + "\"}");
|
std::string error = "";
|
||||||
|
if (!s1.ok()) error += "With Bloom: " + s1.ToString();
|
||||||
|
if (!s2.ok()) {
|
||||||
|
if (!error.empty()) error += ", ";
|
||||||
|
error += "Without Bloom: " + s2.ToString();
|
||||||
|
}
|
||||||
|
return crow::response(500, "{\"error\": \"" + error + "\"}");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Add a new endpoint for stats/comparison
|
||||||
|
CROW_ROUTE(app, "/db/stats")
|
||||||
|
.methods("GET"_method)(
|
||||||
|
[db_with_bloom, db_without_bloom](const crow::request& req) {
|
||||||
|
// This would be expanded in a real implementation
|
||||||
|
// to return performance statistics, etc.
|
||||||
|
crow::json::wvalue stats;
|
||||||
|
stats["bloom_filter_enabled"]["status"] = "active";
|
||||||
|
stats["no_bloom_filter"]["status"] = "active";
|
||||||
|
return crow::response(200, stats);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Bulk insertion endpoint
|
||||||
|
CROW_ROUTE(app, "/db/bulk-insert")
|
||||||
|
.methods("POST"_method)([db_with_bloom,
|
||||||
|
db_without_bloom](const crow::request& req) {
|
||||||
|
// Parse parameters from JSON body
|
||||||
|
rapidjson::Document doc;
|
||||||
|
if (doc.Parse(req.body.c_str()).HasParseError()) {
|
||||||
|
return crow::response(400, "{\"error\": \"Invalid JSON body\"}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get number of records to insert
|
||||||
|
if (!doc.HasMember("numRecords") || !doc["numRecords"].IsInt()) {
|
||||||
|
return crow::response(
|
||||||
|
400,
|
||||||
|
"{\"error\": \"Missing or invalid 'numRecords' parameter\"}");
|
||||||
|
}
|
||||||
|
int numRecords = doc["numRecords"].GetInt();
|
||||||
|
|
||||||
|
// Get which DB to use (both, bloom only, or no-bloom only)
|
||||||
|
bool useBloom = true;
|
||||||
|
bool useNoBloom = true;
|
||||||
|
if (doc.HasMember("useBloom") && doc["useBloom"].IsBool()) {
|
||||||
|
useBloom = doc["useBloom"].GetBool();
|
||||||
|
}
|
||||||
|
if (doc.HasMember("useNoBloom") && doc["useNoBloom"].IsBool()) {
|
||||||
|
useNoBloom = doc["useNoBloom"].GetBool();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up timing
|
||||||
|
auto startTime = std::chrono::high_resolution_clock::now();
|
||||||
|
int successCount = 0;
|
||||||
|
int errorCount = 0;
|
||||||
|
|
||||||
|
// Set up write options
|
||||||
|
leveldb::WriteOptions woptions;
|
||||||
|
|
||||||
|
// Insert data
|
||||||
|
for (int i = 0; i < numRecords; ++i) {
|
||||||
|
// Create sample JSON document
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "{\n \"id\": " << i << ",\n \"age\": " << (i % 50 + 10)
|
||||||
|
<< ",\n \"name\": \"User" << i << "\"\n}";
|
||||||
|
std::string json_string = ss.str();
|
||||||
|
|
||||||
|
// Insert into selected DBs
|
||||||
|
bool success = true;
|
||||||
|
if (useBloom) {
|
||||||
|
leveldb::Status s = db_with_bloom->Put(woptions, json_string);
|
||||||
|
if (!s.ok()) {
|
||||||
|
errorCount++;
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (useNoBloom) {
|
||||||
|
leveldb::Status s = db_without_bloom->Put(woptions, json_string);
|
||||||
|
if (!s.ok()) {
|
||||||
|
errorCount++;
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
successCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate time taken
|
||||||
|
auto endTime = std::chrono::high_resolution_clock::now();
|
||||||
|
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||||
|
endTime - startTime);
|
||||||
|
|
||||||
|
// Build response
|
||||||
|
crow::json::wvalue result;
|
||||||
|
result["success"] = true;
|
||||||
|
result["recordsRequested"] = numRecords;
|
||||||
|
result["recordsInserted"] = successCount;
|
||||||
|
result["recordsFailed"] = errorCount;
|
||||||
|
result["timeMs"] = duration.count();
|
||||||
|
result["usedBloomFilter"] = useBloom;
|
||||||
|
result["usedNoBloomFilter"] = useNoBloom;
|
||||||
|
|
||||||
|
return crow::response(200, result);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Also add an endpoint to run performance comparison tests
|
||||||
|
CROW_ROUTE(app, "/db/performance-test")
|
||||||
|
.methods("POST"_method)([db_with_bloom,
|
||||||
|
db_without_bloom](const crow::request& req) {
|
||||||
|
// Parse parameters
|
||||||
|
rapidjson::Document doc;
|
||||||
|
if (doc.Parse(req.body.c_str()).HasParseError()) {
|
||||||
|
return crow::response(400, "{\"error\": \"Invalid JSON body\"}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get target age to search for
|
||||||
|
int targetAge = 30; // default
|
||||||
|
if (doc.HasMember("targetAge") && doc["targetAge"].IsInt()) {
|
||||||
|
targetAge = doc["targetAge"].GetInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Results object
|
||||||
|
crow::json::wvalue results;
|
||||||
|
|
||||||
|
// Test with bloom filter
|
||||||
|
{
|
||||||
|
auto startWithBloom = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::vector<leveldb::SecondaryKeyReturnVal> values;
|
||||||
|
leveldb::ReadOptions roptions;
|
||||||
|
db_with_bloom->Get(roptions,
|
||||||
|
leveldb::Slice(std::to_string(targetAge)), &values,
|
||||||
|
1000);
|
||||||
|
|
||||||
|
auto endWithBloom = std::chrono::high_resolution_clock::now();
|
||||||
|
auto bloomDuration =
|
||||||
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||||
|
endWithBloom - startWithBloom);
|
||||||
|
|
||||||
|
results["withBloomFilter"]["timeUs"] = bloomDuration.count();
|
||||||
|
results["withBloomFilter"]["recordsFound"] = values.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test without bloom filter
|
||||||
|
{
|
||||||
|
auto startNoBloom = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::vector<leveldb::SecondaryKeyReturnVal> values;
|
||||||
|
leveldb::ReadOptions roptions;
|
||||||
|
db_without_bloom->Get(roptions,
|
||||||
|
leveldb::Slice(std::to_string(targetAge)),
|
||||||
|
&values, 1000);
|
||||||
|
|
||||||
|
auto endNoBloom = std::chrono::high_resolution_clock::now();
|
||||||
|
auto noBloomDuration =
|
||||||
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||||
|
endNoBloom - startNoBloom);
|
||||||
|
|
||||||
|
results["withoutBloomFilter"]["timeUs"] = noBloomDuration.count();
|
||||||
|
results["withoutBloomFilter"]["recordsFound"] = values.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Full scan test on bloom filter DB (for comparison)
|
||||||
|
{
|
||||||
|
auto startScan = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
leveldb::ReadOptions roptions;
|
||||||
|
leveldb::Iterator* it = db_with_bloom->NewIterator(roptions);
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||||
|
rapidjson::Document doc;
|
||||||
|
doc.Parse<0>(it->value().ToString().c_str());
|
||||||
|
if (!doc.HasParseError() && doc.HasMember("age") &&
|
||||||
|
doc["age"].IsInt()) {
|
||||||
|
if (doc["age"].GetInt() == targetAge) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto endScan = std::chrono::high_resolution_clock::now();
|
||||||
|
auto scanDuration =
|
||||||
|
std::chrono::duration_cast<std::chrono::microseconds>(endScan -
|
||||||
|
startScan);
|
||||||
|
|
||||||
|
results["fullScan"]["timeUs"] = scanDuration.count();
|
||||||
|
results["fullScan"]["recordsFound"] = count;
|
||||||
|
|
||||||
|
delete it;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate speedups
|
||||||
|
double bloomVsNoBloom =
|
||||||
|
std::stod(results["withoutBloomFilter"]["timeUs"].dump()) /
|
||||||
|
std::stod(results["withBloomFilter"]["timeUs"].dump());
|
||||||
|
double bloomVsScan =
|
||||||
|
std::stod(results["fullScan"]["timeUs"].dump()) /
|
||||||
|
std::stod(results["withBloomFilter"]["timeUs"].dump());
|
||||||
|
|
||||||
|
results["speedups"]["bloomVsNoBloom"] = bloomVsNoBloom;
|
||||||
|
results["speedups"]["bloomVsFullScan"] = bloomVsScan;
|
||||||
|
results["targetAge"] = targetAge;
|
||||||
|
|
||||||
|
return crow::response(200, results);
|
||||||
|
});
|
||||||
|
|
||||||
// Start the server
|
// Start the server
|
||||||
std::cout << "Starting LevelDB HTTP server on port " << port << std::endl;
|
std::cout << "Starting LevelDB HTTP server on port " << port << std::endl;
|
||||||
std::cout << "Database path: " << db_path << std::endl;
|
std::cout << "Database paths:" << std::endl;
|
||||||
|
std::cout << " With Bloom filter: " << db_path + "_with_bloom" << std::endl;
|
||||||
|
std::cout << " Without Bloom filter: " << db_path + "_without_bloom"
|
||||||
|
<< std::endl;
|
||||||
app.port(port).multithreaded().run();
|
app.port(port).multithreaded().run();
|
||||||
|
|
||||||
delete db;
|
// Clean up both DB instances
|
||||||
|
delete db_with_bloom;
|
||||||
|
delete db_without_bloom;
|
||||||
|
delete options_with_bloom
|
||||||
|
.filter_policy; // Don't forget to free the Bloom filter
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@ -44,25 +44,26 @@ struct LEVELDB_EXPORT Range {
|
|||||||
Slice limit; // Not included in the range
|
Slice limit; // Not included in the range
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LEVELDB_EXPORT SKeyReturnVal {
|
struct LEVELDB_EXPORT SecondaryKeyReturnVal {
|
||||||
std::string key; // Included in the range
|
std::string key; // Included in the range
|
||||||
std::string value; // Not included in the range
|
std::string value; // Not included in the range
|
||||||
uint64_t sequence_number; // presumably sequence_number that leveldb assigns
|
uint64_t sequence_number; // presumably sequence_number that leveldb assigns
|
||||||
// to each KV pair, we use this for top_k sorting
|
// to each KV pair, we use this for top_k sorting
|
||||||
|
|
||||||
static bool comp(const leveldb::SKeyReturnVal& a,
|
static bool comp(const leveldb::SecondaryKeyReturnVal& a,
|
||||||
const leveldb::SKeyReturnVal& b) {
|
const leveldb::SecondaryKeyReturnVal& b) {
|
||||||
return a.sequence_number < b.sequence_number ? false : true;
|
return a.sequence_number < b.sequence_number ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Push(std::vector<leveldb::SKeyReturnVal>* heap,
|
void Push(std::vector<leveldb::SecondaryKeyReturnVal>* heap,
|
||||||
leveldb::SKeyReturnVal val) {
|
leveldb::SecondaryKeyReturnVal val) {
|
||||||
heap->push_back(val);
|
heap->push_back(val);
|
||||||
push_heap(heap->begin(), heap->end(), comp);
|
push_heap(heap->begin(), heap->end(), comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
leveldb::SKeyReturnVal Pop(std::vector<leveldb::SKeyReturnVal>* heap) {
|
leveldb::SecondaryKeyReturnVal Pop(
|
||||||
leveldb::SKeyReturnVal val = heap->front();
|
std::vector<leveldb::SecondaryKeyReturnVal>* heap) {
|
||||||
|
leveldb::SecondaryKeyReturnVal val = heap->front();
|
||||||
|
|
||||||
// This operation will move the smallest element to the end of the vector
|
// This operation will move the smallest element to the end of the vector
|
||||||
pop_heap(heap->begin(), heap->end(), comp);
|
pop_heap(heap->begin(), heap->end(), comp);
|
||||||
@ -129,7 +130,8 @@ class LEVELDB_EXPORT DB {
|
|||||||
|
|
||||||
// New Get method for query on secondary Key
|
// New Get method for query on secondary Key
|
||||||
virtual Status Get(const ReadOptions& options, const Slice& skey,
|
virtual Status Get(const ReadOptions& options, const Slice& skey,
|
||||||
std::vector<SKeyReturnVal>* value, int top_k_outputs) {
|
std::vector<SecondaryKeyReturnVal>* value,
|
||||||
|
int top_k_outputs) {
|
||||||
return Status::NotSupported("Get not implemented in ModelDB");
|
return Status::NotSupported("Get not implemented in ModelDB");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user