leveldb 使用 version 来保存数据库的状态。
先看看一个重要的数据结果,sst file的META info
<db/version_edit.h>
struct FileMetaData {
int refs; //引用计数
int allowed_seeks; //允许的seeks次数
uint64_t number;//文件编号
uint64_t file_size; //文件大小
InternalKey smallest; //最小的key
InternalKey largest; //最大的key
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
};
这里面有一个很有意思的字段: allowed_seeks,代表了可以seek的次数,为0的时候表示这个文件需要被compaction.如何设置seeks次数呢?文件大小除以16k,不到100算100。
f->allowed_seeks = (f->file_size / 16384);
if (f->allowed_seeks < 100) f->allowed_seeks = 100;
原因,请看leveldb的注释:
// We arrange to automatically compact this file after a certain number of seeks. Let's assume:
// (1) One seek costs 10ms
// (2) Writing or reading 1MB costs 10ms (100MB/s)
// (3) A compaction of 1MB does 25MB of IO:
// 1MB read from this level
// 10-12MB read from next level (boundaries may be misaligned)
// 10-12MB written to next level
// This implies that 25 seeks cost the same as the compaction
// of 1MB of data. I.e., one seek costs approximately the
// same as the compaction of 40KB of data. We are a little
// conservative and allow approximately one seek for every 16KB
// of data before triggering a compaction.
接下来看Version的定义,version其实就是一系列的SST file的集合。
class Version {
public:
//生成iterator用于遍历
void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
//根据key来查询,若没有查到,更新GetStats
struct GetStats {
FileMetaData* seek_file;
int seek_file_level;
};
Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
GetStats* stats);
//是否需要进行compaction
bool UpdateStats(const GetStats& stats);
//引用计算,避免在被引用时候删除
void Ref();
void Unref();
//查询和key range有关的files
void GetOverlappingInputs(
int level,
const InternalKey* begin, // NULL means before all keys
const InternalKey* end, // NULL means after all keys
std::vector<FileMetaData*>* inputs);
//计算是否level对某个key range是否有overlap
bool OverlapInLevel(int level,
const Slice* smallest_user_key,
const Slice* largest_user_key);
//memtable output应该放到哪个level
int PickLevelForMemTableOutput(const Slice& smallest_user_key,
const Slice& largest_user_key);
//某个level的文件个数
int NumFiles(int level) const { return files_[level].size(); }
// Return a human readable string that describes this version's contents.
std::string DebugString() const;
private:
friend class Compaction;
friend class VersionSet;
class LevelFileNumIterator;
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
VersionSet* vset_; // VersionSet to which this Version belongs
Version* next_; // Next version in linked list
Version* prev_; // Previous version in linked list
int refs_; // Number of live refs to this version
//sst files
std::vector<FileMetaData*> files_[config::kNumLevels];
//下一个要被compaction的文件
FileMetaData* file_to_compact_;
int file_to_compact_level_;
//compaction score:>1表示要compaction
double compaction_score_;
int compaction_level_;
explicit Version(VersionSet* vset)
: vset_(vset), next_(this), prev_(this), refs_(0),
file_to_compact_(NULL),
file_to_compact_level_(-1),
compaction_score_(-1),
compaction_level_(-1) {
}
~Version();
// No copying allowed
Version(const Version&);
void operator=(const Version&);
};
那VersionSet呢?VersionSet 是version组成一个双向循环链表。
class VersionSet{
//. . .
Env* const env_;
const std::string dbname_;
const Options* const options_;
TableCache* const table_cache_;
const InternalKeyComparator icmp_;
uint64_t next_file_number_;
uint64_t manifest_file_number_;
uint64_t last_sequence_;
uint64_t log_number_;
WritableFile* descriptor_file_;
log::Writer* descriptor_log_;
Version dummy_versions_; // Head of circular doubly-linked list of versions.
Version* current_; // == dummy_versions_.prev_
//每层都有一个compact pointer用于指示下次从哪里开始compact,以用于实现循环compact
std::string compact_pointer_[config::kNumLevels];
//. . .
}
VersionEdit是version对象的变更记录,用于写入manifest.这样通过原始的version加上一系列的versionedit的记录,就可以恢复到最新状态。
class VersionEdit {
public:
VersionEdit() { Clear(); }
~VersionEdit() { }
void Clear();
void SetComparatorName(const Slice& name) {
has_comparator_ = true;
comparator_ = name.ToString();
}
void SetLogNumber(uint64_t num) {
has_log_number_ = true;
log_number_ = num;
}
void SetPrevLogNumber(uint64_t num) {
has_prev_log_number_ = true;
prev_log_number_ = num;
}
void SetNextFile(uint64_t num) {
has_next_file_number_ = true;
next_file_number_ = num;
}
void SetLastSequence(SequenceNumber seq) {
has_last_sequence_ = true;
last_sequence_ = seq;
}
void SetCompactPointer(int level, const InternalKey& key) {
compact_pointers_.push_back(std::make_pair(level, key));
}
//添加meta file
void AddFile(int level, uint64_t file,
uint64_t file_size,
const InternalKey& smallest,
const InternalKey& largest) {
FileMetaData f;
f.number = file;
f.file_size = file_size;
f.smallest = smallest;
f.largest = largest;
new_files_.push_back(std::make_pair(level, f));
}
//删除特定的文件
void DeleteFile(int level, uint64_t file) {
deleted_files_.insert(std::make_pair(level, file));
}
//编码,解码:用于写入manifest
void EncodeTo(std::string* dst) const;
Status DecodeFrom(const Slice& src);
std::string DebugString() const;
private:
friend class VersionSet;
typedef std::set< std::pair<int, uint64_t> > DeletedFileSet;
std::string comparator_;
uint64_t log_number_;
uint64_t prev_log_number_;
uint64_t next_file_number_;
SequenceNumber last_sequence_;
bool has_comparator_;
bool has_log_number_;
bool has_prev_log_number_;
bool has_next_file_number_;
bool has_last_sequence_;
std::vector< std::pair<int, InternalKey> > compact_pointers_;
DeletedFileSet deleted_files_;
std::vector< std::pair<int, FileMetaData> > new_files_;
};