Extract disk opening functions to separate module
parent
30907852c2
commit
dfd80626bd
|
@ -64,7 +64,7 @@ include_directories(
|
||||||
|
|
||||||
# libvitastor_blk.so
|
# libvitastor_blk.so
|
||||||
add_library(vitastor_blk SHARED
|
add_library(vitastor_blk SHARED
|
||||||
allocator.cpp blockstore.cpp blockstore_impl.cpp blockstore_init.cpp blockstore_open.cpp blockstore_journal.cpp blockstore_read.cpp
|
allocator.cpp blockstore.cpp blockstore_impl.cpp blockstore_disk.cpp blockstore_init.cpp blockstore_open.cpp blockstore_journal.cpp blockstore_read.cpp
|
||||||
blockstore_write.cpp blockstore_sync.cpp blockstore_stable.cpp blockstore_rollback.cpp blockstore_flush.cpp crc32c.c ringloop.cpp
|
blockstore_write.cpp blockstore_sync.cpp blockstore_stable.cpp blockstore_rollback.cpp blockstore_flush.cpp crc32c.c ringloop.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(vitastor_blk
|
target_link_libraries(vitastor_blk
|
||||||
|
|
|
@ -0,0 +1,322 @@
|
||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#include <sys/file.h>
|
||||||
|
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#include "blockstore_impl.h"
|
||||||
|
#include "blockstore_disk.h"
|
||||||
|
|
||||||
|
static uint32_t is_power_of_two(uint64_t value)
|
||||||
|
{
|
||||||
|
uint32_t l = 0;
|
||||||
|
while (value > 1)
|
||||||
|
{
|
||||||
|
if (value & 1)
|
||||||
|
{
|
||||||
|
return 64;
|
||||||
|
}
|
||||||
|
value = value >> 1;
|
||||||
|
l++;
|
||||||
|
}
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config)
|
||||||
|
{
|
||||||
|
// Parse
|
||||||
|
if (config["disable_device_lock"] == "true" || config["disable_device_lock"] == "1" || config["disable_device_lock"] == "yes")
|
||||||
|
{
|
||||||
|
disable_flock = true;
|
||||||
|
}
|
||||||
|
cfg_journal_size = strtoull(config["journal_size"].c_str(), NULL, 10);
|
||||||
|
data_device = config["data_device"];
|
||||||
|
data_offset = strtoull(config["data_offset"].c_str(), NULL, 10);
|
||||||
|
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
|
||||||
|
meta_device = config["meta_device"];
|
||||||
|
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
|
||||||
|
data_block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
||||||
|
journal_device = config["journal_device"];
|
||||||
|
journal_offset = strtoull(config["journal_offset"].c_str(), NULL, 10);
|
||||||
|
disk_alignment = strtoull(config["disk_alignment"].c_str(), NULL, 10);
|
||||||
|
journal_block_size = strtoull(config["journal_block_size"].c_str(), NULL, 10);
|
||||||
|
meta_block_size = strtoull(config["meta_block_size"].c_str(), NULL, 10);
|
||||||
|
bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10);
|
||||||
|
// Validate
|
||||||
|
if (!data_block_size)
|
||||||
|
{
|
||||||
|
data_block_size = (1 << DEFAULT_DATA_BLOCK_ORDER);
|
||||||
|
}
|
||||||
|
if ((block_order = is_power_of_two(data_block_size)) >= 64 || data_block_size < MIN_DATA_BLOCK_SIZE || data_block_size >= MAX_DATA_BLOCK_SIZE)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Bad block size");
|
||||||
|
}
|
||||||
|
if (!disk_alignment)
|
||||||
|
{
|
||||||
|
disk_alignment = 4096;
|
||||||
|
}
|
||||||
|
else if (disk_alignment % DIRECT_IO_ALIGNMENT)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("disk_alignment must be a multiple of "+std::to_string(DIRECT_IO_ALIGNMENT));
|
||||||
|
}
|
||||||
|
if (!journal_block_size)
|
||||||
|
{
|
||||||
|
journal_block_size = 4096;
|
||||||
|
}
|
||||||
|
else if (journal_block_size % DIRECT_IO_ALIGNMENT)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("journal_block_size must be a multiple of "+std::to_string(DIRECT_IO_ALIGNMENT));
|
||||||
|
}
|
||||||
|
if (!meta_block_size)
|
||||||
|
{
|
||||||
|
meta_block_size = 4096;
|
||||||
|
}
|
||||||
|
else if (meta_block_size % DIRECT_IO_ALIGNMENT)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("meta_block_size must be a multiple of "+std::to_string(DIRECT_IO_ALIGNMENT));
|
||||||
|
}
|
||||||
|
if (data_offset % disk_alignment)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("data_offset must be a multiple of disk_alignment = "+std::to_string(disk_alignment));
|
||||||
|
}
|
||||||
|
if (!bitmap_granularity)
|
||||||
|
{
|
||||||
|
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||||
|
}
|
||||||
|
else if (bitmap_granularity % disk_alignment)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Sparse write tracking granularity must be a multiple of disk_alignment = "+std::to_string(disk_alignment));
|
||||||
|
}
|
||||||
|
if (data_block_size % bitmap_granularity)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Block size must be a multiple of sparse write tracking granularity");
|
||||||
|
}
|
||||||
|
if (journal_device == meta_device || meta_device == "" && journal_device == data_device)
|
||||||
|
{
|
||||||
|
journal_device = "";
|
||||||
|
}
|
||||||
|
if (meta_device == data_device)
|
||||||
|
{
|
||||||
|
meta_device = "";
|
||||||
|
}
|
||||||
|
if (meta_offset % meta_block_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size));
|
||||||
|
}
|
||||||
|
if (journal_offset % journal_block_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
||||||
|
}
|
||||||
|
clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
|
||||||
|
clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_disk_t::calc_lengths()
|
||||||
|
{
|
||||||
|
// data
|
||||||
|
data_len = data_device_size - data_offset;
|
||||||
|
if (data_fd == meta_fd && data_offset < meta_offset)
|
||||||
|
{
|
||||||
|
data_len = meta_offset - data_offset;
|
||||||
|
}
|
||||||
|
if (data_fd == journal_fd && data_offset < journal_offset)
|
||||||
|
{
|
||||||
|
data_len = data_len < journal_offset-data_offset
|
||||||
|
? data_len : journal_offset-data_offset;
|
||||||
|
}
|
||||||
|
if (cfg_data_size != 0)
|
||||||
|
{
|
||||||
|
if (data_len < cfg_data_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Data area ("+std::to_string(data_len)+
|
||||||
|
" bytes) is smaller than configured size ("+std::to_string(cfg_data_size)+" bytes)");
|
||||||
|
}
|
||||||
|
data_len = cfg_data_size;
|
||||||
|
}
|
||||||
|
// meta
|
||||||
|
uint64_t meta_area_size = (meta_fd == data_fd ? data_device_size : meta_device_size) - meta_offset;
|
||||||
|
if (meta_fd == data_fd && meta_offset <= data_offset)
|
||||||
|
{
|
||||||
|
meta_area_size = data_offset - meta_offset;
|
||||||
|
}
|
||||||
|
if (meta_fd == journal_fd && meta_offset <= journal_offset)
|
||||||
|
{
|
||||||
|
meta_area_size = meta_area_size < journal_offset-meta_offset
|
||||||
|
? meta_area_size : journal_offset-meta_offset;
|
||||||
|
}
|
||||||
|
// journal
|
||||||
|
journal_len = (journal_fd == data_fd ? data_device_size : (journal_fd == meta_fd ? meta_device_size : journal_device_size)) - journal_offset;
|
||||||
|
if (journal_fd == data_fd && journal_offset <= data_offset)
|
||||||
|
{
|
||||||
|
journal_len = data_offset - journal_offset;
|
||||||
|
}
|
||||||
|
if (journal_fd == meta_fd && journal_offset <= meta_offset)
|
||||||
|
{
|
||||||
|
journal_len = journal_len < meta_offset-journal_offset
|
||||||
|
? journal_len : meta_offset-journal_offset;
|
||||||
|
}
|
||||||
|
// required metadata size
|
||||||
|
block_count = data_len / data_block_size;
|
||||||
|
meta_len = (1 + (block_count - 1 + meta_block_size / clean_entry_size) / (meta_block_size / clean_entry_size)) * meta_block_size;
|
||||||
|
if (meta_area_size < meta_len)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes");
|
||||||
|
}
|
||||||
|
// requested journal size
|
||||||
|
if (cfg_journal_size > journal_len)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Requested journal_size is too large");
|
||||||
|
}
|
||||||
|
else if (cfg_journal_size > 0)
|
||||||
|
{
|
||||||
|
journal_len = cfg_journal_size;
|
||||||
|
}
|
||||||
|
if (journal_len < MIN_JOURNAL_SIZE)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Journal is too small, need at least "+std::to_string(MIN_JOURNAL_SIZE)+" bytes");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_size(int fd, uint64_t *size, uint64_t *sectsize, std::string name)
|
||||||
|
{
|
||||||
|
int sect;
|
||||||
|
struct stat st;
|
||||||
|
if (fstat(fd, &st) < 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Failed to stat "+name);
|
||||||
|
}
|
||||||
|
if (S_ISREG(st.st_mode))
|
||||||
|
{
|
||||||
|
*size = st.st_size;
|
||||||
|
if (sectsize)
|
||||||
|
{
|
||||||
|
*sectsize = st.st_blksize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (S_ISBLK(st.st_mode))
|
||||||
|
{
|
||||||
|
if (ioctl(fd, BLKGETSIZE64, size) < 0 ||
|
||||||
|
ioctl(fd, BLKSSZGET, §) < 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Failed to get "+name+" size or block size: "+strerror(errno));
|
||||||
|
}
|
||||||
|
if (sectsize)
|
||||||
|
{
|
||||||
|
*sectsize = sect;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw std::runtime_error(name+" is neither a file nor a block device");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_disk_t::open_data()
|
||||||
|
{
|
||||||
|
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
|
||||||
|
if (data_fd == -1)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Failed to open data device");
|
||||||
|
}
|
||||||
|
check_size(data_fd, &data_device_size, &data_device_sect, "data device");
|
||||||
|
if (disk_alignment % data_device_sect)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(
|
||||||
|
"disk_alignment ("+std::to_string(disk_alignment)+
|
||||||
|
") is not a multiple of data device sector size ("+std::to_string(data_device_sect)+")"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (data_offset >= data_device_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("data_offset exceeds device size = "+std::to_string(data_device_size));
|
||||||
|
}
|
||||||
|
if (!disable_flock && flock(data_fd, LOCK_EX|LOCK_NB) != 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string("Failed to lock data device: ") + strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_disk_t::open_meta()
|
||||||
|
{
|
||||||
|
if (meta_device != "")
|
||||||
|
{
|
||||||
|
meta_offset = 0;
|
||||||
|
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
|
||||||
|
if (meta_fd == -1)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Failed to open metadata device");
|
||||||
|
}
|
||||||
|
check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device");
|
||||||
|
if (meta_offset >= meta_device_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_device_size));
|
||||||
|
}
|
||||||
|
if (!disable_flock && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
meta_fd = data_fd;
|
||||||
|
meta_device_sect = data_device_sect;
|
||||||
|
meta_device_size = 0;
|
||||||
|
if (meta_offset >= data_device_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(data_device_size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (meta_block_size % meta_device_sect)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(
|
||||||
|
"meta_block_size ("+std::to_string(meta_block_size)+
|
||||||
|
") is not a multiple of data device sector size ("+std::to_string(meta_device_sect)+")"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_disk_t::open_journal()
|
||||||
|
{
|
||||||
|
if (journal_device != "")
|
||||||
|
{
|
||||||
|
journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
|
||||||
|
if (journal_fd == -1)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Failed to open journal device");
|
||||||
|
}
|
||||||
|
check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
|
||||||
|
if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
journal_fd = meta_fd;
|
||||||
|
journal_device_sect = meta_device_sect;
|
||||||
|
journal_device_size = 0;
|
||||||
|
if (journal_offset >= data_device_size)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("journal_offset exceeds device size");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (journal_block_size % journal_device_sect)
|
||||||
|
{
|
||||||
|
throw std::runtime_error(
|
||||||
|
"journal_block_size ("+std::to_string(journal_block_size)+
|
||||||
|
") is not a multiple of journal device sector size ("+std::to_string(journal_device_sect)+")"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_disk_t::close_all()
|
||||||
|
{
|
||||||
|
if (data_fd >= 0)
|
||||||
|
close(data_fd);
|
||||||
|
if (meta_fd >= 0 && meta_fd != data_fd)
|
||||||
|
close(meta_fd);
|
||||||
|
if (journal_fd >= 0 && journal_fd != meta_fd)
|
||||||
|
close(journal_fd);
|
||||||
|
data_fd = meta_fd = journal_fd = -1;
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct blockstore_disk_t
|
||||||
|
{
|
||||||
|
std::string data_device, meta_device, journal_device;
|
||||||
|
uint32_t data_block_size;
|
||||||
|
uint64_t cfg_journal_size, cfg_data_size;
|
||||||
|
// Required write alignment and journal/metadata/data areas' location alignment
|
||||||
|
uint32_t disk_alignment = 4096;
|
||||||
|
// Journal block size - minimum_io_size of the journal device is the best choice
|
||||||
|
uint64_t journal_block_size = 4096;
|
||||||
|
// Metadata block size - minimum_io_size of the metadata device is the best choice
|
||||||
|
uint64_t meta_block_size = 4096;
|
||||||
|
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
|
||||||
|
uint64_t bitmap_granularity = 4096;
|
||||||
|
// By default, Blockstore locks all opened devices exclusively. This option can be used to disable locking
|
||||||
|
bool disable_flock = false;
|
||||||
|
|
||||||
|
int meta_fd = -1, data_fd = -1, journal_fd = -1;
|
||||||
|
uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len;
|
||||||
|
uint64_t data_offset, data_device_sect, data_device_size, data_len;
|
||||||
|
uint64_t journal_offset, journal_device_sect, journal_device_size, journal_len;
|
||||||
|
|
||||||
|
uint32_t block_order;
|
||||||
|
uint64_t block_count;
|
||||||
|
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
|
||||||
|
|
||||||
|
void parse_config(std::map<std::string, std::string> & config);
|
||||||
|
void open_data();
|
||||||
|
void open_meta();
|
||||||
|
void open_journal();
|
||||||
|
void calc_lengths();
|
||||||
|
void close_all();
|
||||||
|
};
|
|
@ -15,11 +15,11 @@ journal_flusher_t::journal_flusher_t(blockstore_impl_t *bs)
|
||||||
active_flushers = 0;
|
active_flushers = 0;
|
||||||
syncing_flushers = 0;
|
syncing_flushers = 0;
|
||||||
// FIXME: allow to configure flusher_start_threshold and journal_trim_interval
|
// FIXME: allow to configure flusher_start_threshold and journal_trim_interval
|
||||||
flusher_start_threshold = bs->journal_block_size / sizeof(journal_entry_stable);
|
flusher_start_threshold = bs->dsk.journal_block_size / sizeof(journal_entry_stable);
|
||||||
journal_trim_interval = 512;
|
journal_trim_interval = 512;
|
||||||
journal_trim_counter = bs->journal.flush_journal ? 1 : 0;
|
journal_trim_counter = bs->journal.flush_journal ? 1 : 0;
|
||||||
trim_wanted = bs->journal.flush_journal ? 1 : 0;
|
trim_wanted = bs->journal.flush_journal ? 1 : 0;
|
||||||
journal_superblock = bs->journal.inmemory ? bs->journal.buffer : memalign_or_die(MEM_ALIGNMENT, bs->journal_block_size);
|
journal_superblock = bs->journal.inmemory ? bs->journal.buffer : memalign_or_die(MEM_ALIGNMENT, bs->dsk.journal_block_size);
|
||||||
co = new journal_flusher_co[max_flusher_count];
|
co = new journal_flusher_co[max_flusher_count];
|
||||||
for (int i = 0; i < max_flusher_count; i++)
|
for (int i = 0; i < max_flusher_count; i++)
|
||||||
{
|
{
|
||||||
|
@ -486,28 +486,28 @@ resume_1:
|
||||||
bs->ringloop->wakeup();
|
bs->ringloop->wakeup();
|
||||||
}
|
}
|
||||||
// Reads completed, submit writes and set bitmap bits
|
// Reads completed, submit writes and set bitmap bits
|
||||||
if (bs->clean_entry_bitmap_size)
|
if (bs->dsk.clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
new_clean_bitmap = (bs->inmemory_meta
|
new_clean_bitmap = (bs->inmemory_meta
|
||||||
? (uint8_t*)meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry)
|
? (uint8_t*)meta_new.buf + meta_new.pos*bs->dsk.clean_entry_size + sizeof(clean_disk_entry)
|
||||||
: (uint8_t*)bs->clean_bitmap + (clean_loc >> bs->block_order)*(2*bs->clean_entry_bitmap_size));
|
: (uint8_t*)bs->clean_bitmap + (clean_loc >> bs->dsk.block_order)*(2*bs->dsk.clean_entry_bitmap_size));
|
||||||
if (clean_init_bitmap)
|
if (clean_init_bitmap)
|
||||||
{
|
{
|
||||||
memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size);
|
memset(new_clean_bitmap, 0, bs->dsk.clean_entry_bitmap_size);
|
||||||
bitmap_set(new_clean_bitmap, clean_bitmap_offset, clean_bitmap_len, bs->bitmap_granularity);
|
bitmap_set(new_clean_bitmap, clean_bitmap_offset, clean_bitmap_len, bs->dsk.bitmap_granularity);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (it = v.begin(); it != v.end(); it++)
|
for (it = v.begin(); it != v.end(); it++)
|
||||||
{
|
{
|
||||||
if (new_clean_bitmap)
|
if (new_clean_bitmap)
|
||||||
{
|
{
|
||||||
bitmap_set(new_clean_bitmap, it->offset, it->len, bs->bitmap_granularity);
|
bitmap_set(new_clean_bitmap, it->offset, it->len, bs->dsk.bitmap_granularity);
|
||||||
}
|
}
|
||||||
await_sqe(4);
|
await_sqe(4);
|
||||||
data->iov = (struct iovec){ it->buf, (size_t)it->len };
|
data->iov = (struct iovec){ it->buf, (size_t)it->len };
|
||||||
data->callback = simple_callback_w;
|
data->callback = simple_callback_w;
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
sqe, bs->data_fd, &data->iov, 1, bs->data_offset + clean_loc + it->offset
|
sqe, bs->dsk.data_fd, &data->iov, 1, bs->dsk.data_offset + clean_loc + it->offset
|
||||||
);
|
);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
}
|
}
|
||||||
|
@ -536,35 +536,35 @@ resume_1:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// zero out old metadata entry
|
// zero out old metadata entry
|
||||||
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->clean_entry_size, 0, bs->clean_entry_size);
|
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
|
||||||
await_sqe(15);
|
await_sqe(15);
|
||||||
data->iov = (struct iovec){ meta_old.buf, bs->meta_block_size };
|
data->iov = (struct iovec){ meta_old.buf, bs->dsk.meta_block_size };
|
||||||
data->callback = simple_callback_w;
|
data->callback = simple_callback_w;
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + meta_old.sector
|
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + meta_old.sector
|
||||||
);
|
);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
}
|
}
|
||||||
if (has_delete)
|
if (has_delete)
|
||||||
{
|
{
|
||||||
clean_disk_entry *new_entry = (clean_disk_entry*)((uint8_t*)meta_new.buf + meta_new.pos*bs->clean_entry_size);
|
clean_disk_entry *new_entry = (clean_disk_entry*)((uint8_t*)meta_new.buf + meta_new.pos*bs->dsk.clean_entry_size);
|
||||||
if (new_entry->oid.inode != 0 && new_entry->oid != cur.oid)
|
if (new_entry->oid.inode != 0 && new_entry->oid != cur.oid)
|
||||||
{
|
{
|
||||||
printf("Fatal error (metadata corruption or bug): tried to delete metadata entry %lu (%lx:%lx v%lu) while deleting %lx:%lx\n",
|
printf("Fatal error (metadata corruption or bug): tried to delete metadata entry %lu (%lx:%lx v%lu) while deleting %lx:%lx\n",
|
||||||
clean_loc >> bs->block_order, new_entry->oid.inode, new_entry->oid.stripe,
|
clean_loc >> bs->dsk.block_order, new_entry->oid.inode, new_entry->oid.stripe,
|
||||||
new_entry->version, cur.oid.inode, cur.oid.stripe);
|
new_entry->version, cur.oid.inode, cur.oid.stripe);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
// zero out new metadata entry
|
// zero out new metadata entry
|
||||||
memset((uint8_t*)meta_new.buf + meta_new.pos*bs->clean_entry_size, 0, bs->clean_entry_size);
|
memset((uint8_t*)meta_new.buf + meta_new.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
clean_disk_entry *new_entry = (clean_disk_entry*)((uint8_t*)meta_new.buf + meta_new.pos*bs->clean_entry_size);
|
clean_disk_entry *new_entry = (clean_disk_entry*)((uint8_t*)meta_new.buf + meta_new.pos*bs->dsk.clean_entry_size);
|
||||||
if (new_entry->oid.inode != 0 && new_entry->oid != cur.oid)
|
if (new_entry->oid.inode != 0 && new_entry->oid != cur.oid)
|
||||||
{
|
{
|
||||||
printf("Fatal error (metadata corruption or bug): tried to overwrite non-zero metadata entry %lu (%lx:%lx v%lu) with %lx:%lx v%lu\n",
|
printf("Fatal error (metadata corruption or bug): tried to overwrite non-zero metadata entry %lu (%lx:%lx v%lu) with %lx:%lx v%lu\n",
|
||||||
clean_loc >> bs->block_order, new_entry->oid.inode, new_entry->oid.stripe, new_entry->version,
|
clean_loc >> bs->dsk.block_order, new_entry->oid.inode, new_entry->oid.stripe, new_entry->version,
|
||||||
cur.oid.inode, cur.oid.stripe, cur.version);
|
cur.oid.inode, cur.oid.stripe, cur.version);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
@ -572,20 +572,20 @@ resume_1:
|
||||||
new_entry->version = cur.version;
|
new_entry->version = cur.version;
|
||||||
if (!bs->inmemory_meta)
|
if (!bs->inmemory_meta)
|
||||||
{
|
{
|
||||||
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size);
|
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
// copy latest external bitmap/attributes
|
// copy latest external bitmap/attributes
|
||||||
if (bs->clean_entry_bitmap_size)
|
if (bs->dsk.clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = bs->clean_entry_bitmap_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap;
|
void *bmp_ptr = bs->dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap;
|
||||||
memcpy((uint8_t*)(new_entry+1) + bs->clean_entry_bitmap_size, bmp_ptr, bs->clean_entry_bitmap_size);
|
memcpy((uint8_t*)(new_entry+1) + bs->dsk.clean_entry_bitmap_size, bmp_ptr, bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
await_sqe(6);
|
await_sqe(6);
|
||||||
data->iov = (struct iovec){ meta_new.buf, bs->meta_block_size };
|
data->iov = (struct iovec){ meta_new.buf, bs->dsk.meta_block_size };
|
||||||
data->callback = simple_callback_w;
|
data->callback = simple_callback_w;
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + meta_new.sector
|
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + meta_new.sector
|
||||||
);
|
);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
resume_7:
|
resume_7:
|
||||||
|
@ -669,9 +669,9 @@ resume_1:
|
||||||
.version = JOURNAL_VERSION,
|
.version = JOURNAL_VERSION,
|
||||||
};
|
};
|
||||||
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
|
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
|
||||||
data->iov = (struct iovec){ flusher->journal_superblock, bs->journal_block_size };
|
data->iov = (struct iovec){ flusher->journal_superblock, bs->dsk.journal_block_size };
|
||||||
data->callback = simple_callback_w;
|
data->callback = simple_callback_w;
|
||||||
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
resume_13:
|
resume_13:
|
||||||
if (wait_count > 0)
|
if (wait_count > 0)
|
||||||
|
@ -682,7 +682,7 @@ resume_1:
|
||||||
if (!bs->disable_journal_fsync)
|
if (!bs->disable_journal_fsync)
|
||||||
{
|
{
|
||||||
await_sqe(20);
|
await_sqe(20);
|
||||||
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = simple_callback_w;
|
data->callback = simple_callback_w;
|
||||||
resume_21:
|
resume_21:
|
||||||
|
@ -774,7 +774,7 @@ bool journal_flusher_co::scan_dirty(int wait_base)
|
||||||
data->iov = (struct iovec){ it->buf, (size_t)submit_len };
|
data->iov = (struct iovec){ it->buf, (size_t)submit_len };
|
||||||
data->callback = simple_callback_r;
|
data->callback = simple_callback_r;
|
||||||
my_uring_prep_readv(
|
my_uring_prep_readv(
|
||||||
sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + submit_offset
|
sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + submit_offset
|
||||||
);
|
);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
}
|
}
|
||||||
|
@ -825,8 +825,8 @@ bool journal_flusher_co::modify_meta_read(uint64_t meta_loc, flusher_meta_write_
|
||||||
// And yet another option is to use LSM trees for metadata, but it sophisticates everything a lot,
|
// And yet another option is to use LSM trees for metadata, but it sophisticates everything a lot,
|
||||||
// so I'll avoid it as long as I can.
|
// so I'll avoid it as long as I can.
|
||||||
wr.submitted = false;
|
wr.submitted = false;
|
||||||
wr.sector = ((meta_loc >> bs->block_order) / (bs->meta_block_size / bs->clean_entry_size)) * bs->meta_block_size;
|
wr.sector = ((meta_loc >> bs->dsk.block_order) / (bs->dsk.meta_block_size / bs->dsk.clean_entry_size)) * bs->dsk.meta_block_size;
|
||||||
wr.pos = ((meta_loc >> bs->block_order) % (bs->meta_block_size / bs->clean_entry_size));
|
wr.pos = ((meta_loc >> bs->dsk.block_order) % (bs->dsk.meta_block_size / bs->dsk.clean_entry_size));
|
||||||
if (bs->inmemory_meta)
|
if (bs->inmemory_meta)
|
||||||
{
|
{
|
||||||
wr.buf = (uint8_t*)bs->metadata_buffer + wr.sector;
|
wr.buf = (uint8_t*)bs->metadata_buffer + wr.sector;
|
||||||
|
@ -836,20 +836,20 @@ bool journal_flusher_co::modify_meta_read(uint64_t meta_loc, flusher_meta_write_
|
||||||
if (wr.it == flusher->meta_sectors.end())
|
if (wr.it == flusher->meta_sectors.end())
|
||||||
{
|
{
|
||||||
// Not in memory yet, read it
|
// Not in memory yet, read it
|
||||||
wr.buf = memalign_or_die(MEM_ALIGNMENT, bs->meta_block_size);
|
wr.buf = memalign_or_die(MEM_ALIGNMENT, bs->dsk.meta_block_size);
|
||||||
wr.it = flusher->meta_sectors.emplace(wr.sector, (meta_sector_t){
|
wr.it = flusher->meta_sectors.emplace(wr.sector, (meta_sector_t){
|
||||||
.offset = wr.sector,
|
.offset = wr.sector,
|
||||||
.len = bs->meta_block_size,
|
.len = bs->dsk.meta_block_size,
|
||||||
.state = 0, // 0 = not read yet
|
.state = 0, // 0 = not read yet
|
||||||
.buf = wr.buf,
|
.buf = wr.buf,
|
||||||
.usage_count = 1,
|
.usage_count = 1,
|
||||||
}).first;
|
}).first;
|
||||||
await_sqe(0);
|
await_sqe(0);
|
||||||
data->iov = (struct iovec){ wr.it->second.buf, bs->meta_block_size };
|
data->iov = (struct iovec){ wr.it->second.buf, bs->dsk.meta_block_size };
|
||||||
data->callback = simple_callback_r;
|
data->callback = simple_callback_r;
|
||||||
wr.submitted = true;
|
wr.submitted = true;
|
||||||
my_uring_prep_readv(
|
my_uring_prep_readv(
|
||||||
sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + wr.sector
|
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + wr.sector
|
||||||
);
|
);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
}
|
}
|
||||||
|
@ -867,11 +867,11 @@ void journal_flusher_co::update_clean_db()
|
||||||
{
|
{
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf("Free block %lu from %lx:%lx v%lu (new location is %lu)\n",
|
printf("Free block %lu from %lx:%lx v%lu (new location is %lu)\n",
|
||||||
old_clean_loc >> bs->block_order,
|
old_clean_loc >> bs->dsk.block_order,
|
||||||
cur.oid.inode, cur.oid.stripe, cur.version,
|
cur.oid.inode, cur.oid.stripe, cur.version,
|
||||||
clean_loc >> bs->block_order);
|
clean_loc >> bs->dsk.block_order);
|
||||||
#endif
|
#endif
|
||||||
bs->data_alloc->set(old_clean_loc >> bs->block_order, false);
|
bs->data_alloc->set(old_clean_loc >> bs->dsk.block_order, false);
|
||||||
}
|
}
|
||||||
auto & clean_db = bs->clean_db_shard(cur.oid);
|
auto & clean_db = bs->clean_db_shard(cur.oid);
|
||||||
if (has_delete)
|
if (has_delete)
|
||||||
|
@ -880,10 +880,10 @@ void journal_flusher_co::update_clean_db()
|
||||||
clean_db.erase(clean_it);
|
clean_db.erase(clean_it);
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf("Free block %lu from %lx:%lx v%lu (delete)\n",
|
printf("Free block %lu from %lx:%lx v%lu (delete)\n",
|
||||||
clean_loc >> bs->block_order,
|
clean_loc >> bs->dsk.block_order,
|
||||||
cur.oid.inode, cur.oid.stripe, cur.version);
|
cur.oid.inode, cur.oid.stripe, cur.version);
|
||||||
#endif
|
#endif
|
||||||
bs->data_alloc->set(clean_loc >> bs->block_order, false);
|
bs->data_alloc->set(clean_loc >> bs->dsk.block_order, false);
|
||||||
clean_loc = UINT64_MAX;
|
clean_loc = UINT64_MAX;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -932,7 +932,7 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
|
||||||
await_sqe(0);
|
await_sqe(0);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = simple_callback_w;
|
data->callback = simple_callback_w;
|
||||||
my_uring_prep_fsync(sqe, fsync_meta ? bs->meta_fd : bs->data_fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, fsync_meta ? bs->dsk.meta_fd : bs->dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||||
cur_sync->state = 1;
|
cur_sync->state = 1;
|
||||||
wait_count++;
|
wait_count++;
|
||||||
resume_2:
|
resume_2:
|
||||||
|
|
|
@ -11,25 +11,19 @@ blockstore_impl_t::blockstore_impl_t(blockstore_config_t & config, ring_loop_t *
|
||||||
ring_consumer.loop = [this]() { loop(); };
|
ring_consumer.loop = [this]() { loop(); };
|
||||||
ringloop->register_consumer(&ring_consumer);
|
ringloop->register_consumer(&ring_consumer);
|
||||||
initialized = 0;
|
initialized = 0;
|
||||||
data_fd = meta_fd = journal.fd = -1;
|
|
||||||
parse_config(config);
|
parse_config(config);
|
||||||
zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, data_block_size);
|
zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.data_block_size);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
open_data();
|
dsk.open_data();
|
||||||
open_meta();
|
dsk.open_meta();
|
||||||
open_journal();
|
dsk.open_journal();
|
||||||
calc_lengths();
|
calc_lengths();
|
||||||
data_alloc = new allocator(block_count);
|
data_alloc = new allocator(dsk.block_count);
|
||||||
}
|
}
|
||||||
catch (std::exception & e)
|
catch (std::exception & e)
|
||||||
{
|
{
|
||||||
if (data_fd >= 0)
|
dsk.close_all();
|
||||||
close(data_fd);
|
|
||||||
if (meta_fd >= 0 && meta_fd != data_fd)
|
|
||||||
close(meta_fd);
|
|
||||||
if (journal.fd >= 0 && journal.fd != meta_fd)
|
|
||||||
close(journal.fd);
|
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
flusher = new journal_flusher_t(this);
|
flusher = new journal_flusher_t(this);
|
||||||
|
@ -41,12 +35,7 @@ blockstore_impl_t::~blockstore_impl_t()
|
||||||
delete flusher;
|
delete flusher;
|
||||||
free(zero_object);
|
free(zero_object);
|
||||||
ringloop->unregister_consumer(&ring_consumer);
|
ringloop->unregister_consumer(&ring_consumer);
|
||||||
if (data_fd >= 0)
|
dsk.close_all();
|
||||||
close(data_fd);
|
|
||||||
if (meta_fd >= 0 && meta_fd != data_fd)
|
|
||||||
close(meta_fd);
|
|
||||||
if (journal.fd >= 0 && journal.fd != meta_fd)
|
|
||||||
close(journal.fd);
|
|
||||||
if (metadata_buffer)
|
if (metadata_buffer)
|
||||||
free(metadata_buffer);
|
free(metadata_buffer);
|
||||||
if (clean_bitmap)
|
if (clean_bitmap)
|
||||||
|
@ -343,9 +332,9 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
if (op->opcode < BS_OP_MIN || op->opcode > BS_OP_MAX ||
|
if (op->opcode < BS_OP_MIN || op->opcode > BS_OP_MAX ||
|
||||||
((op->opcode == BS_OP_READ || op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE) && (
|
((op->opcode == BS_OP_READ || op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE) && (
|
||||||
op->offset >= data_block_size ||
|
op->offset >= dsk.data_block_size ||
|
||||||
op->len > data_block_size-op->offset ||
|
op->len > dsk.data_block_size-op->offset ||
|
||||||
(op->len % disk_alignment)
|
(op->len % dsk.disk_alignment)
|
||||||
)) ||
|
)) ||
|
||||||
readonly && op->opcode != BS_OP_READ && op->opcode != BS_OP_LIST)
|
readonly && op->opcode != BS_OP_READ && op->opcode != BS_OP_LIST)
|
||||||
{
|
{
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "blockstore.h"
|
#include "blockstore.h"
|
||||||
|
#include "blockstore_disk.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
@ -218,23 +219,10 @@ struct pool_shard_settings_t
|
||||||
|
|
||||||
class blockstore_impl_t
|
class blockstore_impl_t
|
||||||
{
|
{
|
||||||
|
blockstore_disk_t dsk;
|
||||||
|
|
||||||
/******* OPTIONS *******/
|
/******* OPTIONS *******/
|
||||||
std::string data_device, meta_device, journal_device;
|
|
||||||
uint32_t data_block_size;
|
|
||||||
uint64_t meta_offset;
|
|
||||||
uint64_t data_offset;
|
|
||||||
uint64_t cfg_journal_size, cfg_data_size;
|
|
||||||
// Required write alignment and journal/metadata/data areas' location alignment
|
|
||||||
uint32_t disk_alignment = 4096;
|
|
||||||
// Journal block size - minimum_io_size of the journal device is the best choice
|
|
||||||
uint64_t journal_block_size = 4096;
|
|
||||||
// Metadata block size - minimum_io_size of the metadata device is the best choice
|
|
||||||
uint64_t meta_block_size = 4096;
|
|
||||||
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
|
|
||||||
uint64_t bitmap_granularity = 4096;
|
|
||||||
bool readonly = false;
|
bool readonly = false;
|
||||||
// By default, Blockstore locks all opened devices exclusively. This option can be used to disable locking
|
|
||||||
bool disable_flock = false;
|
|
||||||
// It is safe to disable fsync() if drive write cache is writethrough
|
// It is safe to disable fsync() if drive write cache is writethrough
|
||||||
bool disable_data_fsync = false, disable_meta_fsync = false, disable_journal_fsync = false;
|
bool disable_data_fsync = false, disable_meta_fsync = false, disable_journal_fsync = false;
|
||||||
// Enable if you want every operation to be executed with an "implicit fsync"
|
// Enable if you want every operation to be executed with an "implicit fsync"
|
||||||
|
@ -269,16 +257,6 @@ class blockstore_impl_t
|
||||||
allocator *data_alloc = NULL;
|
allocator *data_alloc = NULL;
|
||||||
uint8_t *zero_object;
|
uint8_t *zero_object;
|
||||||
|
|
||||||
uint32_t block_order;
|
|
||||||
uint64_t block_count;
|
|
||||||
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
|
|
||||||
|
|
||||||
int meta_fd;
|
|
||||||
int data_fd;
|
|
||||||
uint64_t meta_device_size, meta_len;
|
|
||||||
uint64_t data_device_size, data_len;
|
|
||||||
uint64_t data_device_sect, meta_device_sect, journal_device_sect;
|
|
||||||
|
|
||||||
void *metadata_buffer = NULL;
|
void *metadata_buffer = NULL;
|
||||||
|
|
||||||
struct journal_t journal;
|
struct journal_t journal;
|
||||||
|
@ -395,9 +373,9 @@ public:
|
||||||
// Print diagnostics to stdout
|
// Print diagnostics to stdout
|
||||||
void dump_diagnostics();
|
void dump_diagnostics();
|
||||||
|
|
||||||
inline uint32_t get_block_size() { return data_block_size; }
|
inline uint32_t get_block_size() { return dsk.data_block_size; }
|
||||||
inline uint64_t get_block_count() { return block_count; }
|
inline uint64_t get_block_count() { return dsk.block_count; }
|
||||||
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
|
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
|
||||||
inline uint32_t get_bitmap_granularity() { return disk_alignment; }
|
inline uint32_t get_bitmap_granularity() { return dsk.disk_alignment; }
|
||||||
inline uint64_t get_journal_size() { return journal.len; }
|
inline uint64_t get_journal_size() { return dsk.journal_len; }
|
||||||
};
|
};
|
||||||
|
|
|
@ -57,9 +57,9 @@ int blockstore_init_meta::loop()
|
||||||
throw std::runtime_error("Failed to allocate metadata read buffer");
|
throw std::runtime_error("Failed to allocate metadata read buffer");
|
||||||
// Read superblock
|
// Read superblock
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = { metadata_buffer, bs->meta_block_size };
|
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
|
||||||
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
||||||
my_uring_prep_readv(sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset);
|
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
submitted = 1;
|
submitted = 1;
|
||||||
resume_1:
|
resume_1:
|
||||||
|
@ -68,16 +68,16 @@ resume_1:
|
||||||
wait_state = 1;
|
wait_state = 1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (iszero((uint64_t*)metadata_buffer, bs->meta_block_size / sizeof(uint64_t)))
|
if (iszero((uint64_t*)metadata_buffer, bs->dsk.meta_block_size / sizeof(uint64_t)))
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)metadata_buffer;
|
blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)metadata_buffer;
|
||||||
hdr->zero = 0;
|
hdr->zero = 0;
|
||||||
hdr->magic = BLOCKSTORE_META_MAGIC_V1;
|
hdr->magic = BLOCKSTORE_META_MAGIC_V1;
|
||||||
hdr->version = BLOCKSTORE_META_VERSION_V1;
|
hdr->version = BLOCKSTORE_META_VERSION_V1;
|
||||||
hdr->meta_block_size = bs->meta_block_size;
|
hdr->meta_block_size = bs->dsk.meta_block_size;
|
||||||
hdr->data_block_size = bs->data_block_size;
|
hdr->data_block_size = bs->dsk.data_block_size;
|
||||||
hdr->bitmap_granularity = bs->bitmap_granularity;
|
hdr->bitmap_granularity = bs->dsk.bitmap_granularity;
|
||||||
}
|
}
|
||||||
if (bs->readonly)
|
if (bs->readonly)
|
||||||
{
|
{
|
||||||
|
@ -87,9 +87,9 @@ resume_1:
|
||||||
{
|
{
|
||||||
printf("Initializing metadata area\n");
|
printf("Initializing metadata area\n");
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = (struct iovec){ metadata_buffer, bs->meta_block_size };
|
data->iov = (struct iovec){ metadata_buffer, bs->dsk.meta_block_size };
|
||||||
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
||||||
my_uring_prep_writev(sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset);
|
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
submitted = 1;
|
submitted = 1;
|
||||||
resume_3:
|
resume_3:
|
||||||
|
@ -115,23 +115,23 @@ resume_1:
|
||||||
);
|
);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (hdr->meta_block_size != bs->meta_block_size ||
|
if (hdr->meta_block_size != bs->dsk.meta_block_size ||
|
||||||
hdr->data_block_size != bs->data_block_size ||
|
hdr->data_block_size != bs->dsk.data_block_size ||
|
||||||
hdr->bitmap_granularity != bs->bitmap_granularity)
|
hdr->bitmap_granularity != bs->dsk.bitmap_granularity)
|
||||||
{
|
{
|
||||||
printf(
|
printf(
|
||||||
"Configuration stored in metadata superblock"
|
"Configuration stored in metadata superblock"
|
||||||
" (meta_block_size=%u, data_block_size=%u, bitmap_granularity=%u)"
|
" (meta_block_size=%u, data_block_size=%u, bitmap_granularity=%u)"
|
||||||
" differs from OSD configuration (%lu/%u/%lu).\n",
|
" differs from OSD configuration (%lu/%u/%lu).\n",
|
||||||
hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity,
|
hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity,
|
||||||
bs->meta_block_size, bs->data_block_size, bs->bitmap_granularity
|
bs->dsk.meta_block_size, bs->dsk.data_block_size, bs->dsk.bitmap_granularity
|
||||||
);
|
);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Skip superblock
|
// Skip superblock
|
||||||
bs->meta_offset += bs->meta_block_size;
|
bs->dsk.meta_offset += bs->dsk.meta_block_size;
|
||||||
bs->meta_len -= bs->meta_block_size;
|
bs->dsk.meta_len -= bs->dsk.meta_block_size;
|
||||||
prev_done = 0;
|
prev_done = 0;
|
||||||
done_len = 0;
|
done_len = 0;
|
||||||
done_pos = 0;
|
done_pos = 0;
|
||||||
|
@ -145,23 +145,23 @@ resume_1:
|
||||||
wait_state = 2;
|
wait_state = 2;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (metadata_read < bs->meta_len)
|
if (metadata_read < bs->dsk.meta_len)
|
||||||
{
|
{
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = {
|
data->iov = {
|
||||||
(uint8_t*)metadata_buffer + (bs->inmemory_meta
|
(uint8_t*)metadata_buffer + (bs->inmemory_meta
|
||||||
? metadata_read
|
? metadata_read
|
||||||
: (prev == 1 ? bs->metadata_buf_size : 0)),
|
: (prev == 1 ? bs->metadata_buf_size : 0)),
|
||||||
bs->meta_len - metadata_read > bs->metadata_buf_size ? bs->metadata_buf_size : bs->meta_len - metadata_read,
|
bs->dsk.meta_len - metadata_read > bs->metadata_buf_size ? bs->metadata_buf_size : bs->dsk.meta_len - metadata_read,
|
||||||
};
|
};
|
||||||
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
||||||
if (!zero_on_init)
|
if (!zero_on_init)
|
||||||
my_uring_prep_readv(sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + metadata_read);
|
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + metadata_read);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Fill metadata with zeroes
|
// Fill metadata with zeroes
|
||||||
memset(data->iov.iov_base, 0, data->iov.iov_len);
|
memset(data->iov.iov_base, 0, data->iov.iov_len);
|
||||||
my_uring_prep_writev(sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + metadata_read);
|
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + metadata_read);
|
||||||
}
|
}
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
submitted = (prev == 1 ? 2 : 1);
|
submitted = (prev == 1 ? 2 : 1);
|
||||||
|
@ -172,11 +172,11 @@ resume_1:
|
||||||
void *done_buf = bs->inmemory_meta
|
void *done_buf = bs->inmemory_meta
|
||||||
? ((uint8_t*)metadata_buffer + done_pos)
|
? ((uint8_t*)metadata_buffer + done_pos)
|
||||||
: ((uint8_t*)metadata_buffer + (prev_done == 2 ? bs->metadata_buf_size : 0));
|
: ((uint8_t*)metadata_buffer + (prev_done == 2 ? bs->metadata_buf_size : 0));
|
||||||
unsigned count = bs->meta_block_size / bs->clean_entry_size;
|
unsigned count = bs->dsk.meta_block_size / bs->dsk.clean_entry_size;
|
||||||
for (int sector = 0; sector < done_len; sector += bs->meta_block_size)
|
for (int sector = 0; sector < done_len; sector += bs->dsk.meta_block_size)
|
||||||
{
|
{
|
||||||
// handle <count> entries
|
// handle <count> entries
|
||||||
handle_entries((uint8_t*)done_buf + sector, count, bs->block_order);
|
handle_entries((uint8_t*)done_buf + sector, count, bs->dsk.block_order);
|
||||||
done_cnt += count;
|
done_cnt += count;
|
||||||
}
|
}
|
||||||
prev_done = 0;
|
prev_done = 0;
|
||||||
|
@ -188,7 +188,7 @@ resume_1:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// metadata read finished
|
// metadata read finished
|
||||||
printf("Metadata entries loaded: %lu, free blocks: %lu / %lu\n", entries_loaded, bs->data_alloc->get_free_count(), bs->block_count);
|
printf("Metadata entries loaded: %lu, free blocks: %lu / %lu\n", entries_loaded, bs->data_alloc->get_free_count(), bs->dsk.block_count);
|
||||||
if (!bs->inmemory_meta)
|
if (!bs->inmemory_meta)
|
||||||
{
|
{
|
||||||
free(metadata_buffer);
|
free(metadata_buffer);
|
||||||
|
@ -197,7 +197,7 @@ resume_1:
|
||||||
if (zero_on_init && !bs->disable_meta_fsync)
|
if (zero_on_init && !bs->disable_meta_fsync)
|
||||||
{
|
{
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
my_uring_prep_fsync(sqe, bs->meta_fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, bs->dsk.meta_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
data->callback = [this](ring_data_t *data) { handle_event(data); };
|
||||||
submitted = 1;
|
submitted = 1;
|
||||||
|
@ -216,10 +216,10 @@ void blockstore_init_meta::handle_entries(void* entries, unsigned count, int blo
|
||||||
{
|
{
|
||||||
for (unsigned i = 0; i < count; i++)
|
for (unsigned i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)entries + i*bs->clean_entry_size);
|
clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)entries + i*bs->dsk.clean_entry_size);
|
||||||
if (!bs->inmemory_meta && bs->clean_entry_bitmap_size)
|
if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
memcpy(bs->clean_bitmap + (done_cnt+i)*2*bs->clean_entry_bitmap_size, &entry->bitmap, 2*bs->clean_entry_bitmap_size);
|
memcpy(bs->clean_bitmap + (done_cnt+i)*2*bs->dsk.clean_entry_bitmap_size, &entry->bitmap, 2*bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
if (entry->oid.inode > 0)
|
if (entry->oid.inode > 0)
|
||||||
{
|
{
|
||||||
|
@ -240,7 +240,7 @@ void blockstore_init_meta::handle_entries(void* entries, unsigned count, int blo
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bs->inode_space_stats[entry->oid.inode] += bs->data_block_size;
|
bs->inode_space_stats[entry->oid.inode] += bs->dsk.data_block_size;
|
||||||
}
|
}
|
||||||
entries_loaded++;
|
entries_loaded++;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
@ -328,7 +328,7 @@ int blockstore_init_journal::loop()
|
||||||
data = ((ring_data_t*)sqe->user_data);
|
data = ((ring_data_t*)sqe->user_data);
|
||||||
data->iov = { submitted_buf, bs->journal.block_size };
|
data->iov = { submitted_buf, bs->journal.block_size };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
wait_count = 1;
|
wait_count = 1;
|
||||||
resume_1:
|
resume_1:
|
||||||
|
@ -367,7 +367,7 @@ resume_1:
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = (struct iovec){ submitted_buf, 2*bs->journal.block_size };
|
data->iov = (struct iovec){ submitted_buf, 2*bs->journal.block_size };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
resume_6:
|
resume_6:
|
||||||
|
@ -379,7 +379,7 @@ resume_1:
|
||||||
if (!bs->disable_journal_fsync)
|
if (!bs->disable_journal_fsync)
|
||||||
{
|
{
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
wait_count++;
|
wait_count++;
|
||||||
|
@ -448,7 +448,7 @@ resume_1:
|
||||||
end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE,
|
end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE,
|
||||||
};
|
};
|
||||||
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
|
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
|
||||||
my_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
}
|
}
|
||||||
while (done.size() > 0)
|
while (done.size() > 0)
|
||||||
|
@ -463,7 +463,7 @@ resume_1:
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = { init_write_buf, bs->journal.block_size };
|
data->iov = { init_write_buf, bs->journal.block_size };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
resume_7:
|
resume_7:
|
||||||
|
@ -477,7 +477,7 @@ resume_1:
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
bs->ringloop->submit();
|
bs->ringloop->submit();
|
||||||
}
|
}
|
||||||
|
@ -544,7 +544,7 @@ resume_1:
|
||||||
? bs->journal.len-bs->journal.block_size - (bs->journal.next_free-bs->journal.used_start)
|
? bs->journal.len-bs->journal.block_size - (bs->journal.next_free-bs->journal.used_start)
|
||||||
: bs->journal.used_start - bs->journal.next_free),
|
: bs->journal.used_start - bs->journal.next_free),
|
||||||
bs->journal.used_start, bs->journal.next_free,
|
bs->journal.used_start, bs->journal.next_free,
|
||||||
bs->data_alloc->get_free_count(), bs->block_count
|
bs->data_alloc->get_free_count(), bs->dsk.block_count
|
||||||
);
|
);
|
||||||
bs->journal.crc32_last = crc32_last;
|
bs->journal.crc32_last = crc32_last;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -669,9 +669,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
};
|
};
|
||||||
void *bmp = NULL;
|
void *bmp = NULL;
|
||||||
void *bmp_from = (uint8_t*)je + sizeof(journal_entry_small_write);
|
void *bmp_from = (uint8_t*)je + sizeof(journal_entry_small_write);
|
||||||
if (bs->clean_entry_bitmap_size <= sizeof(void*))
|
if (bs->dsk.clean_entry_bitmap_size <= sizeof(void*))
|
||||||
{
|
{
|
||||||
memcpy(&bmp, bmp_from, bs->clean_entry_bitmap_size);
|
memcpy(&bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -679,8 +679,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
// allocations for entry bitmaps. This can only be fixed by using
|
// allocations for entry bitmaps. This can only be fixed by using
|
||||||
// a patched map with dynamic entry size, but not the btree_map,
|
// a patched map with dynamic entry size, but not the btree_map,
|
||||||
// because it doesn't keep iterators valid all the time.
|
// because it doesn't keep iterators valid all the time.
|
||||||
bmp = malloc_or_die(bs->clean_entry_bitmap_size);
|
bmp = malloc_or_die(bs->dsk.clean_entry_bitmap_size);
|
||||||
memcpy(bmp, bmp_from, bs->clean_entry_bitmap_size);
|
memcpy(bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
bs->dirty_db.emplace(ov, (dirty_entry){
|
bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
.state = (BS_ST_SMALL_WRITE | BS_ST_SYNCED),
|
.state = (BS_ST_SMALL_WRITE | BS_ST_SYNCED),
|
||||||
|
@ -712,7 +712,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
printf(
|
printf(
|
||||||
"je_big_write%s oid=%lx:%lx ver=%lu loc=%lu\n",
|
"je_big_write%s oid=%lx:%lx ver=%lu loc=%lu\n",
|
||||||
je->type == JE_BIG_WRITE_INSTANT ? "_instant" : "",
|
je->type == JE_BIG_WRITE_INSTANT ? "_instant" : "",
|
||||||
je->big_write.oid.inode, je->big_write.oid.stripe, je->big_write.version, je->big_write.location >> bs->block_order
|
je->big_write.oid.inode, je->big_write.oid.stripe, je->big_write.version, je->big_write.location >> bs->dsk.block_order
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
auto dirty_it = bs->dirty_db.upper_bound((obj_ver_id){
|
auto dirty_it = bs->dirty_db.upper_bound((obj_ver_id){
|
||||||
|
@ -750,9 +750,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
};
|
};
|
||||||
void *bmp = NULL;
|
void *bmp = NULL;
|
||||||
void *bmp_from = (uint8_t*)je + sizeof(journal_entry_big_write);
|
void *bmp_from = (uint8_t*)je + sizeof(journal_entry_big_write);
|
||||||
if (bs->clean_entry_bitmap_size <= sizeof(void*))
|
if (bs->dsk.clean_entry_bitmap_size <= sizeof(void*))
|
||||||
{
|
{
|
||||||
memcpy(&bmp, bmp_from, bs->clean_entry_bitmap_size);
|
memcpy(&bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -760,8 +760,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
// allocations for entry bitmaps. This can only be fixed by using
|
// allocations for entry bitmaps. This can only be fixed by using
|
||||||
// a patched map with dynamic entry size, but not the btree_map,
|
// a patched map with dynamic entry size, but not the btree_map,
|
||||||
// because it doesn't keep iterators valid all the time.
|
// because it doesn't keep iterators valid all the time.
|
||||||
bmp = malloc_or_die(bs->clean_entry_bitmap_size);
|
bmp = malloc_or_die(bs->dsk.clean_entry_bitmap_size);
|
||||||
memcpy(bmp, bmp_from, bs->clean_entry_bitmap_size);
|
memcpy(bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
|
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
.state = (BS_ST_BIG_WRITE | BS_ST_SYNCED),
|
.state = (BS_ST_BIG_WRITE | BS_ST_SYNCED),
|
||||||
|
@ -772,7 +772,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.journal_sector = proc_pos,
|
.journal_sector = proc_pos,
|
||||||
.bitmap = bmp,
|
.bitmap = bmp,
|
||||||
}).first;
|
}).first;
|
||||||
if (bs->data_alloc->get(je->big_write.location >> bs->block_order))
|
if (bs->data_alloc->get(je->big_write.location >> bs->dsk.block_order))
|
||||||
{
|
{
|
||||||
// This is probably a big_write that's already flushed and freed, but it may
|
// This is probably a big_write that's already flushed and freed, but it may
|
||||||
// also indicate a bug. So we remember such entries and recheck them afterwards.
|
// also indicate a bug. So we remember such entries and recheck them afterwards.
|
||||||
|
@ -785,11 +785,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
"Allocate block (journal) %lu: %lx:%lx v%lu\n",
|
"Allocate block (journal) %lu: %lx:%lx v%lu\n",
|
||||||
je->big_write.location >> bs->block_order,
|
je->big_write.location >> bs->dsk.block_order,
|
||||||
ov.oid.inode, ov.oid.stripe, ov.version
|
ov.oid.inode, ov.oid.stripe, ov.version
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
bs->data_alloc->set(je->big_write.location >> bs->block_order, true);
|
bs->data_alloc->set(je->big_write.location >> bs->dsk.block_order, true);
|
||||||
}
|
}
|
||||||
bs->journal.used_sectors[proc_pos]++;
|
bs->journal.used_sectors[proc_pos]++;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
@ -913,8 +913,8 @@ void blockstore_init_journal::erase_dirty_object(blockstore_dirty_db_t::iterator
|
||||||
if (exists && clean_loc == UINT64_MAX)
|
if (exists && clean_loc == UINT64_MAX)
|
||||||
{
|
{
|
||||||
auto & sp = bs->inode_space_stats[oid.inode];
|
auto & sp = bs->inode_space_stats[oid.inode];
|
||||||
if (sp > bs->data_block_size)
|
if (sp > bs->dsk.data_block_size)
|
||||||
sp -= bs->data_block_size;
|
sp -= bs->dsk.data_block_size;
|
||||||
else
|
else
|
||||||
bs->inode_space_stats.erase(oid.inode);
|
bs->inode_space_stats.erase(oid.inode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,7 +175,7 @@ void blockstore_impl_t::prepare_journal_sector_write(int cur_sector, blockstore_
|
||||||
};
|
};
|
||||||
data->callback = [this, flush_id = journal.submit_id](ring_data_t *data) { handle_journal_write(data, flush_id); };
|
data->callback = [this, flush_id = journal.submit_id](ring_data_t *data) { handle_journal_write(data, flush_id); };
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
sqe, journal.fd, &data->iov, 1, journal.offset + journal.sector_info[cur_sector].offset
|
sqe, dsk.journal_fd, &data->iov, 1, journal.offset + journal.sector_info[cur_sector].offset
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
journal.sector_info[cur_sector].dirty = false;
|
journal.sector_info[cur_sector].dirty = false;
|
||||||
|
|
|
@ -164,7 +164,6 @@ inline bool operator < (const pending_journaling_t & a, const pending_journaling
|
||||||
struct journal_t
|
struct journal_t
|
||||||
{
|
{
|
||||||
int fd;
|
int fd;
|
||||||
uint64_t device_size;
|
|
||||||
bool inmemory = false;
|
bool inmemory = false;
|
||||||
bool flush_journal = false;
|
bool flush_journal = false;
|
||||||
void *buffer = NULL;
|
void *buffer = NULL;
|
||||||
|
|
|
@ -4,23 +4,10 @@
|
||||||
#include <sys/file.h>
|
#include <sys/file.h>
|
||||||
#include "blockstore_impl.h"
|
#include "blockstore_impl.h"
|
||||||
|
|
||||||
static uint32_t is_power_of_two(uint64_t value)
|
|
||||||
{
|
|
||||||
uint32_t l = 0;
|
|
||||||
while (value > 1)
|
|
||||||
{
|
|
||||||
if (value & 1)
|
|
||||||
{
|
|
||||||
return 64;
|
|
||||||
}
|
|
||||||
value = value >> 1;
|
|
||||||
l++;
|
|
||||||
}
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
|
|
||||||
void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
{
|
{
|
||||||
|
// Common disk options
|
||||||
|
dsk.parse_config(config);
|
||||||
// Parse
|
// Parse
|
||||||
if (config["readonly"] == "true" || config["readonly"] == "1" || config["readonly"] == "yes")
|
if (config["readonly"] == "true" || config["readonly"] == "1" || config["readonly"] == "yes")
|
||||||
{
|
{
|
||||||
|
@ -38,10 +25,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
{
|
{
|
||||||
disable_journal_fsync = true;
|
disable_journal_fsync = true;
|
||||||
}
|
}
|
||||||
if (config["disable_device_lock"] == "true" || config["disable_device_lock"] == "1" || config["disable_device_lock"] == "yes")
|
|
||||||
{
|
|
||||||
disable_flock = true;
|
|
||||||
}
|
|
||||||
if (config["flush_journal"] == "true" || config["flush_journal"] == "1" || config["flush_journal"] == "yes")
|
if (config["flush_journal"] == "true" || config["flush_journal"] == "1" || config["flush_journal"] == "yes")
|
||||||
{
|
{
|
||||||
// Only flush journal and exit
|
// Only flush journal and exit
|
||||||
|
@ -56,24 +39,11 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
immediate_commit = IMMEDIATE_SMALL;
|
immediate_commit = IMMEDIATE_SMALL;
|
||||||
}
|
}
|
||||||
metadata_buf_size = strtoull(config["meta_buf_size"].c_str(), NULL, 10);
|
metadata_buf_size = strtoull(config["meta_buf_size"].c_str(), NULL, 10);
|
||||||
cfg_journal_size = strtoull(config["journal_size"].c_str(), NULL, 10);
|
|
||||||
data_device = config["data_device"];
|
|
||||||
data_offset = strtoull(config["data_offset"].c_str(), NULL, 10);
|
|
||||||
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
|
|
||||||
meta_device = config["meta_device"];
|
|
||||||
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
|
|
||||||
data_block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
|
||||||
inmemory_meta = config["inmemory_metadata"] != "false";
|
inmemory_meta = config["inmemory_metadata"] != "false";
|
||||||
journal_device = config["journal_device"];
|
|
||||||
journal.offset = strtoull(config["journal_offset"].c_str(), NULL, 10);
|
|
||||||
journal.sector_count = strtoull(config["journal_sector_buffer_count"].c_str(), NULL, 10);
|
journal.sector_count = strtoull(config["journal_sector_buffer_count"].c_str(), NULL, 10);
|
||||||
journal.no_same_sector_overwrites = config["journal_no_same_sector_overwrites"] == "true" ||
|
journal.no_same_sector_overwrites = config["journal_no_same_sector_overwrites"] == "true" ||
|
||||||
config["journal_no_same_sector_overwrites"] == "1" || config["journal_no_same_sector_overwrites"] == "yes";
|
config["journal_no_same_sector_overwrites"] == "1" || config["journal_no_same_sector_overwrites"] == "yes";
|
||||||
journal.inmemory = config["inmemory_journal"] != "false";
|
journal.inmemory = config["inmemory_journal"] != "false";
|
||||||
disk_alignment = strtoull(config["disk_alignment"].c_str(), NULL, 10);
|
|
||||||
journal_block_size = strtoull(config["journal_block_size"].c_str(), NULL, 10);
|
|
||||||
meta_block_size = strtoull(config["meta_block_size"].c_str(), NULL, 10);
|
|
||||||
bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10);
|
|
||||||
max_flusher_count = strtoull(config["max_flusher_count"].c_str(), NULL, 10);
|
max_flusher_count = strtoull(config["max_flusher_count"].c_str(), NULL, 10);
|
||||||
if (!max_flusher_count)
|
if (!max_flusher_count)
|
||||||
max_flusher_count = strtoull(config["flusher_count"].c_str(), NULL, 10);
|
max_flusher_count = strtoull(config["flusher_count"].c_str(), NULL, 10);
|
||||||
|
@ -85,14 +55,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
throttle_target_parallelism = strtoull(config["throttle_target_parallelism"].c_str(), NULL, 10);
|
throttle_target_parallelism = strtoull(config["throttle_target_parallelism"].c_str(), NULL, 10);
|
||||||
throttle_threshold_us = strtoull(config["throttle_threshold_us"].c_str(), NULL, 10);
|
throttle_threshold_us = strtoull(config["throttle_threshold_us"].c_str(), NULL, 10);
|
||||||
// Validate
|
// Validate
|
||||||
if (!data_block_size)
|
|
||||||
{
|
|
||||||
data_block_size = (1 << DEFAULT_DATA_BLOCK_ORDER);
|
|
||||||
}
|
|
||||||
if ((block_order = is_power_of_two(data_block_size)) >= 64 || data_block_size < MIN_DATA_BLOCK_SIZE || data_block_size >= MAX_DATA_BLOCK_SIZE)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Bad block size");
|
|
||||||
}
|
|
||||||
if (!max_flusher_count)
|
if (!max_flusher_count)
|
||||||
{
|
{
|
||||||
max_flusher_count = 256;
|
max_flusher_count = 256;
|
||||||
|
@ -105,62 +67,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
{
|
{
|
||||||
max_write_iodepth = 128;
|
max_write_iodepth = 128;
|
||||||
}
|
}
|
||||||
if (!disk_alignment)
|
|
||||||
{
|
|
||||||
disk_alignment = 4096;
|
|
||||||
}
|
|
||||||
else if (disk_alignment % DIRECT_IO_ALIGNMENT)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("disk_alignment must be a multiple of "+std::to_string(DIRECT_IO_ALIGNMENT));
|
|
||||||
}
|
|
||||||
if (!journal_block_size)
|
|
||||||
{
|
|
||||||
journal_block_size = 4096;
|
|
||||||
}
|
|
||||||
else if (journal_block_size % DIRECT_IO_ALIGNMENT)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("journal_block_size must be a multiple of "+std::to_string(DIRECT_IO_ALIGNMENT));
|
|
||||||
}
|
|
||||||
if (!meta_block_size)
|
|
||||||
{
|
|
||||||
meta_block_size = 4096;
|
|
||||||
}
|
|
||||||
else if (meta_block_size % DIRECT_IO_ALIGNMENT)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("meta_block_size must be a multiple of "+std::to_string(DIRECT_IO_ALIGNMENT));
|
|
||||||
}
|
|
||||||
if (data_offset % disk_alignment)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("data_offset must be a multiple of disk_alignment = "+std::to_string(disk_alignment));
|
|
||||||
}
|
|
||||||
if (!bitmap_granularity)
|
|
||||||
{
|
|
||||||
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
|
||||||
}
|
|
||||||
else if (bitmap_granularity % disk_alignment)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Sparse write tracking granularity must be a multiple of disk_alignment = "+std::to_string(disk_alignment));
|
|
||||||
}
|
|
||||||
if (data_block_size % bitmap_granularity)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Block size must be a multiple of sparse write tracking granularity");
|
|
||||||
}
|
|
||||||
if (journal_device == meta_device || meta_device == "" && journal_device == data_device)
|
|
||||||
{
|
|
||||||
journal_device = "";
|
|
||||||
}
|
|
||||||
if (meta_device == data_device)
|
|
||||||
{
|
|
||||||
meta_device = "";
|
|
||||||
}
|
|
||||||
if (meta_offset % meta_block_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size));
|
|
||||||
}
|
|
||||||
if (journal.offset % journal_block_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
|
||||||
}
|
|
||||||
if (journal.sector_count < 2)
|
if (journal.sector_count < 2)
|
||||||
{
|
{
|
||||||
journal.sector_count = 32;
|
journal.sector_count = 32;
|
||||||
|
@ -169,11 +75,11 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
{
|
{
|
||||||
metadata_buf_size = 4*1024*1024;
|
metadata_buf_size = 4*1024*1024;
|
||||||
}
|
}
|
||||||
if (meta_device == "")
|
if (dsk.meta_device == "")
|
||||||
{
|
{
|
||||||
disable_meta_fsync = disable_data_fsync;
|
disable_meta_fsync = disable_data_fsync;
|
||||||
}
|
}
|
||||||
if (journal_device == "")
|
if (dsk.journal_device == "")
|
||||||
{
|
{
|
||||||
disable_journal_fsync = disable_meta_fsync;
|
disable_journal_fsync = disable_meta_fsync;
|
||||||
}
|
}
|
||||||
|
@ -202,238 +108,46 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
throttle_threshold_us = 50;
|
throttle_threshold_us = 50;
|
||||||
}
|
}
|
||||||
// init some fields
|
// init some fields
|
||||||
clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
|
journal.block_size = dsk.journal_block_size;
|
||||||
clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
|
journal.next_free = dsk.journal_block_size;
|
||||||
journal.block_size = journal_block_size;
|
journal.used_start = dsk.journal_block_size;
|
||||||
journal.next_free = journal_block_size;
|
|
||||||
journal.used_start = journal_block_size;
|
|
||||||
// no free space because sector is initially unmapped
|
// no free space because sector is initially unmapped
|
||||||
journal.in_sector_pos = journal_block_size;
|
journal.in_sector_pos = dsk.journal_block_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void blockstore_impl_t::calc_lengths()
|
void blockstore_impl_t::calc_lengths()
|
||||||
{
|
{
|
||||||
// data
|
dsk.calc_lengths();
|
||||||
data_len = data_device_size - data_offset;
|
journal.len = dsk.journal_len;
|
||||||
if (data_fd == meta_fd && data_offset < meta_offset)
|
journal.block_size = dsk.journal_block_size;
|
||||||
{
|
journal.offset = dsk.journal_offset;
|
||||||
data_len = meta_offset - data_offset;
|
|
||||||
}
|
|
||||||
if (data_fd == journal.fd && data_offset < journal.offset)
|
|
||||||
{
|
|
||||||
data_len = data_len < journal.offset-data_offset
|
|
||||||
? data_len : journal.offset-data_offset;
|
|
||||||
}
|
|
||||||
if (cfg_data_size != 0)
|
|
||||||
{
|
|
||||||
if (data_len < cfg_data_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Data area ("+std::to_string(data_len)+
|
|
||||||
" bytes) is less than configured size ("+std::to_string(cfg_data_size)+" bytes)");
|
|
||||||
}
|
|
||||||
data_len = cfg_data_size;
|
|
||||||
}
|
|
||||||
// meta
|
|
||||||
uint64_t meta_area_size = (meta_fd == data_fd ? data_device_size : meta_device_size) - meta_offset;
|
|
||||||
if (meta_fd == data_fd && meta_offset <= data_offset)
|
|
||||||
{
|
|
||||||
meta_area_size = data_offset - meta_offset;
|
|
||||||
}
|
|
||||||
if (meta_fd == journal.fd && meta_offset <= journal.offset)
|
|
||||||
{
|
|
||||||
meta_area_size = meta_area_size < journal.offset-meta_offset
|
|
||||||
? meta_area_size : journal.offset-meta_offset;
|
|
||||||
}
|
|
||||||
// journal
|
|
||||||
journal.len = (journal.fd == data_fd ? data_device_size : (journal.fd == meta_fd ? meta_device_size : journal.device_size)) - journal.offset;
|
|
||||||
if (journal.fd == data_fd && journal.offset <= data_offset)
|
|
||||||
{
|
|
||||||
journal.len = data_offset - journal.offset;
|
|
||||||
}
|
|
||||||
if (journal.fd == meta_fd && journal.offset <= meta_offset)
|
|
||||||
{
|
|
||||||
journal.len = journal.len < meta_offset-journal.offset
|
|
||||||
? journal.len : meta_offset-journal.offset;
|
|
||||||
}
|
|
||||||
// required metadata size
|
|
||||||
block_count = data_len / data_block_size;
|
|
||||||
meta_len = (1 + (block_count - 1 + meta_block_size / clean_entry_size) / (meta_block_size / clean_entry_size)) * meta_block_size;
|
|
||||||
if (meta_area_size < meta_len)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes");
|
|
||||||
}
|
|
||||||
if (inmemory_meta)
|
if (inmemory_meta)
|
||||||
{
|
{
|
||||||
metadata_buffer = memalign(MEM_ALIGNMENT, meta_len);
|
metadata_buffer = memalign(MEM_ALIGNMENT, dsk.meta_len);
|
||||||
if (!metadata_buffer)
|
if (!metadata_buffer)
|
||||||
throw std::runtime_error("Failed to allocate memory for the metadata");
|
throw std::runtime_error("Failed to allocate memory for the metadata");
|
||||||
}
|
}
|
||||||
else if (clean_entry_bitmap_size)
|
else if (dsk.clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
clean_bitmap = (uint8_t*)malloc(block_count * 2*clean_entry_bitmap_size);
|
clean_bitmap = (uint8_t*)malloc(dsk.block_count * 2*dsk.clean_entry_bitmap_size);
|
||||||
if (!clean_bitmap)
|
if (!clean_bitmap)
|
||||||
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
|
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
|
||||||
}
|
}
|
||||||
// requested journal size
|
|
||||||
if (cfg_journal_size > journal.len)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Requested journal_size is too large");
|
|
||||||
}
|
|
||||||
else if (cfg_journal_size > 0)
|
|
||||||
{
|
|
||||||
journal.len = cfg_journal_size;
|
|
||||||
}
|
|
||||||
if (journal.len < MIN_JOURNAL_SIZE)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Journal is too small, need at least "+std::to_string(MIN_JOURNAL_SIZE)+" bytes");
|
|
||||||
}
|
|
||||||
if (journal.inmemory)
|
if (journal.inmemory)
|
||||||
{
|
{
|
||||||
journal.buffer = memalign(MEM_ALIGNMENT, journal.len);
|
journal.buffer = memalign(MEM_ALIGNMENT, journal.len);
|
||||||
if (!journal.buffer)
|
if (!journal.buffer)
|
||||||
throw std::runtime_error("Failed to allocate memory for journal");
|
throw std::runtime_error("Failed to allocate memory for journal");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static void check_size(int fd, uint64_t *size, uint64_t *sectsize, std::string name)
|
|
||||||
{
|
|
||||||
int sect;
|
|
||||||
struct stat st;
|
|
||||||
if (fstat(fd, &st) < 0)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Failed to stat "+name);
|
|
||||||
}
|
|
||||||
if (S_ISREG(st.st_mode))
|
|
||||||
{
|
|
||||||
*size = st.st_size;
|
|
||||||
if (sectsize)
|
|
||||||
{
|
|
||||||
*sectsize = st.st_blksize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (S_ISBLK(st.st_mode))
|
|
||||||
{
|
|
||||||
if (ioctl(fd, BLKGETSIZE64, size) < 0 ||
|
|
||||||
ioctl(fd, BLKSSZGET, §) < 0)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Failed to get "+name+" size or block size: "+strerror(errno));
|
|
||||||
}
|
|
||||||
if (sectsize)
|
|
||||||
{
|
|
||||||
*sectsize = sect;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw std::runtime_error(name+" is neither a file nor a block device");
|
journal.sector_buf = (uint8_t*)memalign(MEM_ALIGNMENT, journal.sector_count * dsk.journal_block_size);
|
||||||
}
|
if (!journal.sector_buf)
|
||||||
}
|
throw std::bad_alloc();
|
||||||
|
|
||||||
void blockstore_impl_t::open_data()
|
|
||||||
{
|
|
||||||
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
|
|
||||||
if (data_fd == -1)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Failed to open data device");
|
|
||||||
}
|
|
||||||
check_size(data_fd, &data_device_size, &data_device_sect, "data device");
|
|
||||||
if (disk_alignment % data_device_sect)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
"disk_alignment ("+std::to_string(disk_alignment)+
|
|
||||||
") is not a multiple of data device sector size ("+std::to_string(data_device_sect)+")"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (data_offset >= data_device_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("data_offset exceeds device size = "+std::to_string(data_device_size));
|
|
||||||
}
|
|
||||||
if (!disable_flock && flock(data_fd, LOCK_EX|LOCK_NB) != 0)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(std::string("Failed to lock data device: ") + strerror(errno));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void blockstore_impl_t::open_meta()
|
|
||||||
{
|
|
||||||
if (meta_device != "")
|
|
||||||
{
|
|
||||||
meta_offset = 0;
|
|
||||||
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
|
|
||||||
if (meta_fd == -1)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Failed to open metadata device");
|
|
||||||
}
|
|
||||||
check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device");
|
|
||||||
if (meta_offset >= meta_device_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_device_size));
|
|
||||||
}
|
|
||||||
if (!disable_flock && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
meta_fd = data_fd;
|
|
||||||
meta_device_sect = data_device_sect;
|
|
||||||
meta_device_size = 0;
|
|
||||||
if (meta_offset >= data_device_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(data_device_size));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (meta_block_size % meta_device_sect)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
"meta_block_size ("+std::to_string(meta_block_size)+
|
|
||||||
") is not a multiple of data device sector size ("+std::to_string(meta_device_sect)+")"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void blockstore_impl_t::open_journal()
|
|
||||||
{
|
|
||||||
if (journal_device != "")
|
|
||||||
{
|
|
||||||
journal.fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
|
|
||||||
if (journal.fd == -1)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Failed to open journal device");
|
|
||||||
}
|
|
||||||
check_size(journal.fd, &journal.device_size, &journal_device_sect, "journal device");
|
|
||||||
if (!disable_flock && flock(journal.fd, LOCK_EX|LOCK_NB) != 0)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
journal.fd = meta_fd;
|
|
||||||
journal_device_sect = meta_device_sect;
|
|
||||||
journal.device_size = 0;
|
|
||||||
if (journal.offset >= data_device_size)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("journal_offset exceeds device size");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
journal.sector_info = (journal_sector_info_t*)calloc(journal.sector_count, sizeof(journal_sector_info_t));
|
journal.sector_info = (journal_sector_info_t*)calloc(journal.sector_count, sizeof(journal_sector_info_t));
|
||||||
if (!journal.sector_info)
|
if (!journal.sector_info)
|
||||||
{
|
{
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
}
|
}
|
||||||
if (!journal.inmemory)
|
|
||||||
{
|
|
||||||
journal.sector_buf = (uint8_t*)memalign(MEM_ALIGNMENT, journal.sector_count * journal_block_size);
|
|
||||||
if (!journal.sector_buf)
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
if (journal_block_size % journal_device_sect)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
"journal_block_size ("+std::to_string(journal_block_size)+
|
|
||||||
") is not a multiple of journal device sector size ("+std::to_string(journal_device_sect)+")"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,9 +32,9 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
|
||||||
PRIV(op)->pending_ops++;
|
PRIV(op)->pending_ops++;
|
||||||
my_uring_prep_readv(
|
my_uring_prep_readv(
|
||||||
sqe,
|
sqe,
|
||||||
IS_JOURNAL(item_state) ? journal.fd : data_fd,
|
IS_JOURNAL(item_state) ? dsk.journal_fd : dsk.data_fd,
|
||||||
&data->iov, 1,
|
&data->iov, 1,
|
||||||
(IS_JOURNAL(item_state) ? journal.offset : data_offset) + offset
|
(IS_JOURNAL(item_state) ? dsk.journal_offset : dsk.data_offset) + offset
|
||||||
);
|
);
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -97,15 +97,15 @@ endwhile:
|
||||||
uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offset)
|
uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offset)
|
||||||
{
|
{
|
||||||
uint8_t *clean_entry_bitmap;
|
uint8_t *clean_entry_bitmap;
|
||||||
uint64_t meta_loc = block_loc >> block_order;
|
uint64_t meta_loc = block_loc >> dsk.block_order;
|
||||||
if (inmemory_meta)
|
if (inmemory_meta)
|
||||||
{
|
{
|
||||||
uint64_t sector = (meta_loc / (meta_block_size / clean_entry_size)) * meta_block_size;
|
uint64_t sector = (meta_loc / (dsk.meta_block_size / dsk.clean_entry_size)) * dsk.meta_block_size;
|
||||||
uint64_t pos = (meta_loc % (meta_block_size / clean_entry_size));
|
uint64_t pos = (meta_loc % (dsk.meta_block_size / dsk.clean_entry_size));
|
||||||
clean_entry_bitmap = ((uint8_t*)metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry) + offset);
|
clean_entry_bitmap = ((uint8_t*)metadata_buffer + sector + pos*dsk.clean_entry_size + sizeof(clean_disk_entry) + offset);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*2*clean_entry_bitmap_size + offset);
|
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*2*dsk.clean_entry_bitmap_size + offset);
|
||||||
return clean_entry_bitmap;
|
return clean_entry_bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,8 +152,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
result_version = dirty_it->first.version;
|
result_version = dirty_it->first.version;
|
||||||
if (read_op->bitmap)
|
if (read_op->bitmap)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
|
void *bmp_ptr = (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
|
||||||
memcpy(read_op->bitmap, bmp_ptr, clean_entry_bitmap_size);
|
memcpy(read_op->bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
||||||
|
@ -178,15 +178,15 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
result_version = clean_it->second.version;
|
result_version = clean_it->second.version;
|
||||||
if (read_op->bitmap)
|
if (read_op->bitmap)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, dsk.clean_entry_bitmap_size);
|
||||||
memcpy(read_op->bitmap, bmp_ptr, clean_entry_bitmap_size);
|
memcpy(read_op->bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fulfilled < read_op->len)
|
if (fulfilled < read_op->len)
|
||||||
{
|
{
|
||||||
if (!clean_entry_bitmap_size)
|
if (!dsk.clean_entry_bitmap_size)
|
||||||
{
|
{
|
||||||
if (!fulfill_read(read_op, fulfilled, 0, data_block_size, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0, clean_it->second.location))
|
if (!fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0, clean_it->second.location))
|
||||||
{
|
{
|
||||||
// need to wait. undo added requests, don't dequeue op
|
// need to wait. undo added requests, don't dequeue op
|
||||||
PRIV(read_op)->read_vec.clear();
|
PRIV(read_op)->read_vec.clear();
|
||||||
|
@ -196,7 +196,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint8_t *clean_entry_bitmap = get_clean_entry_bitmap(clean_it->second.location, 0);
|
uint8_t *clean_entry_bitmap = get_clean_entry_bitmap(clean_it->second.location, 0);
|
||||||
uint64_t bmp_start = 0, bmp_end = 0, bmp_size = data_block_size/bitmap_granularity;
|
uint64_t bmp_start = 0, bmp_end = 0, bmp_size = dsk.data_block_size/dsk.bitmap_granularity;
|
||||||
while (bmp_start < bmp_size)
|
while (bmp_start < bmp_size)
|
||||||
{
|
{
|
||||||
while (!(clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7))) && bmp_end < bmp_size)
|
while (!(clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7))) && bmp_end < bmp_size)
|
||||||
|
@ -206,8 +206,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
if (bmp_end > bmp_start)
|
if (bmp_end > bmp_start)
|
||||||
{
|
{
|
||||||
// fill with zeroes
|
// fill with zeroes
|
||||||
assert(fulfill_read(read_op, fulfilled, bmp_start * bitmap_granularity,
|
assert(fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
|
||||||
bmp_end * bitmap_granularity, (BS_ST_DELETE | BS_ST_STABLE), 0, 0));
|
bmp_end * dsk.bitmap_granularity, (BS_ST_DELETE | BS_ST_STABLE), 0, 0));
|
||||||
}
|
}
|
||||||
bmp_start = bmp_end;
|
bmp_start = bmp_end;
|
||||||
while (clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7)) && bmp_end < bmp_size)
|
while (clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7)) && bmp_end < bmp_size)
|
||||||
|
@ -216,9 +216,9 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
}
|
}
|
||||||
if (bmp_end > bmp_start)
|
if (bmp_end > bmp_start)
|
||||||
{
|
{
|
||||||
if (!fulfill_read(read_op, fulfilled, bmp_start * bitmap_granularity,
|
if (!fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
|
||||||
bmp_end * bitmap_granularity, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0,
|
bmp_end * dsk.bitmap_granularity, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0,
|
||||||
clean_it->second.location + bmp_start * bitmap_granularity))
|
clean_it->second.location + bmp_start * dsk.bitmap_granularity))
|
||||||
{
|
{
|
||||||
// need to wait. undo added requests, don't dequeue op
|
// need to wait. undo added requests, don't dequeue op
|
||||||
PRIV(read_op)->read_vec.clear();
|
PRIV(read_op)->read_vec.clear();
|
||||||
|
@ -233,7 +233,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
else if (fulfilled < read_op->len)
|
else if (fulfilled < read_op->len)
|
||||||
{
|
{
|
||||||
// fill remaining parts with zeroes
|
// fill remaining parts with zeroes
|
||||||
assert(fulfill_read(read_op, fulfilled, 0, data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0));
|
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0));
|
||||||
}
|
}
|
||||||
assert(fulfilled == read_op->len);
|
assert(fulfilled == read_op->len);
|
||||||
read_op->version = result_version;
|
read_op->version = result_version;
|
||||||
|
@ -288,8 +288,8 @@ int blockstore_impl_t::read_bitmap(object_id oid, uint64_t target_version, void
|
||||||
*result_version = dirty_it->first.version;
|
*result_version = dirty_it->first.version;
|
||||||
if (bitmap)
|
if (bitmap)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
|
void *bmp_ptr = (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
|
||||||
memcpy(bitmap, bmp_ptr, clean_entry_bitmap_size);
|
memcpy(bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -306,14 +306,14 @@ int blockstore_impl_t::read_bitmap(object_id oid, uint64_t target_version, void
|
||||||
*result_version = clean_it->second.version;
|
*result_version = clean_it->second.version;
|
||||||
if (bitmap)
|
if (bitmap)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, dsk.clean_entry_bitmap_size);
|
||||||
memcpy(bitmap, bmp_ptr, clean_entry_bitmap_size);
|
memcpy(bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (result_version)
|
if (result_version)
|
||||||
*result_version = 0;
|
*result_version = 0;
|
||||||
if (bitmap)
|
if (bitmap)
|
||||||
memset(bitmap, 0, clean_entry_bitmap_size);
|
memset(bitmap, 0, dsk.clean_entry_bitmap_size);
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,7 +112,7 @@ resume_2:
|
||||||
if (!disable_journal_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
|
@ -217,10 +217,10 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||||
dirty_it->second.location != UINT64_MAX)
|
dirty_it->second.location != UINT64_MAX)
|
||||||
{
|
{
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf("Free block %lu from %lx:%lx v%lu\n", dirty_it->second.location >> block_order,
|
printf("Free block %lu from %lx:%lx v%lu\n", dirty_it->second.location >> dsk.block_order,
|
||||||
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version);
|
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version);
|
||||||
#endif
|
#endif
|
||||||
data_alloc->set(dirty_it->second.location >> block_order, false);
|
data_alloc->set(dirty_it->second.location >> dsk.block_order, false);
|
||||||
}
|
}
|
||||||
int used = --journal.used_sectors[dirty_it->second.journal_sector];
|
int used = --journal.used_sectors[dirty_it->second.journal_sector];
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
@ -233,7 +233,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||||
{
|
{
|
||||||
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
||||||
}
|
}
|
||||||
if (clean_entry_bitmap_size > sizeof(void*))
|
if (dsk.clean_entry_bitmap_size > sizeof(void*))
|
||||||
{
|
{
|
||||||
free(dirty_it->second.bitmap);
|
free(dirty_it->second.bitmap);
|
||||||
dirty_it->second.bitmap = NULL;
|
dirty_it->second.bitmap = NULL;
|
||||||
|
|
|
@ -137,7 +137,7 @@ resume_2:
|
||||||
if (!disable_journal_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
|
@ -195,14 +195,14 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
||||||
}
|
}
|
||||||
if (!exists)
|
if (!exists)
|
||||||
{
|
{
|
||||||
inode_space_stats[dirty_it->first.oid.inode] += data_block_size;
|
inode_space_stats[dirty_it->first.oid.inode] += dsk.data_block_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IS_DELETE(dirty_it->second.state))
|
else if (IS_DELETE(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
auto & sp = inode_space_stats[dirty_it->first.oid.inode];
|
auto & sp = inode_space_stats[dirty_it->first.oid.inode];
|
||||||
if (sp > data_block_size)
|
if (sp > dsk.data_block_size)
|
||||||
sp -= data_block_size;
|
sp -= dsk.data_block_size;
|
||||||
else
|
else
|
||||||
inode_space_stats.erase(dirty_it->first.oid.inode);
|
inode_space_stats.erase(dirty_it->first.oid.inode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,7 +60,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
if (!disable_data_fsync)
|
if (!disable_data_fsync)
|
||||||
{
|
{
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
|
@ -79,7 +79,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
// Check space in the journal and journal memory buffers
|
// Check space in the journal and journal memory buffers
|
||||||
blockstore_journal_check_t space_check(this);
|
blockstore_journal_check_t space_check(this);
|
||||||
if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
|
if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
|
||||||
sizeof(journal_entry_big_write) + clean_entry_bitmap_size, JOURNAL_STABILIZE_RESERVATION))
|
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, JOURNAL_STABILIZE_RESERVATION))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
int s = 0;
|
int s = 0;
|
||||||
while (it != PRIV(op)->sync_big_writes.end())
|
while (it != PRIV(op)->sync_big_writes.end())
|
||||||
{
|
{
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_big_write) + clean_entry_bitmap_size) &&
|
if (!journal.entry_fits(sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal.cur_sector, op);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
|
@ -99,7 +99,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
auto & dirty_entry = dirty_db.at(*it);
|
auto & dirty_entry = dirty_db.at(*it);
|
||||||
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
||||||
journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write) + clean_entry_bitmap_size
|
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size
|
||||||
);
|
);
|
||||||
dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -115,8 +115,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
je->offset = dirty_entry.offset;
|
je->offset = dirty_entry.offset;
|
||||||
je->len = dirty_entry.len;
|
je->len = dirty_entry.len;
|
||||||
je->location = dirty_entry.location;
|
je->location = dirty_entry.location;
|
||||||
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*)
|
memcpy((void*)(je+1), (dsk.clean_entry_bitmap_size > sizeof(void*)
|
||||||
? dirty_entry.bitmap : &dirty_entry.bitmap), clean_entry_bitmap_size);
|
? dirty_entry.bitmap : &dirty_entry.bitmap), dsk.clean_entry_bitmap_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
it++;
|
it++;
|
||||||
|
@ -132,7 +132,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
if (!disable_journal_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
|
|
|
@ -10,9 +10,9 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
bool wait_big = false, wait_del = false;
|
bool wait_big = false, wait_del = false;
|
||||||
void *bmp = NULL;
|
void *bmp = NULL;
|
||||||
uint64_t version = 1;
|
uint64_t version = 1;
|
||||||
if (!is_del && clean_entry_bitmap_size > sizeof(void*))
|
if (!is_del && dsk.clean_entry_bitmap_size > sizeof(void*))
|
||||||
{
|
{
|
||||||
bmp = calloc_or_die(1, clean_entry_bitmap_size);
|
bmp = calloc_or_die(1, dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
if (dirty_db.size() > 0)
|
if (dirty_db.size() > 0)
|
||||||
{
|
{
|
||||||
|
@ -32,8 +32,8 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
: ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
|
: ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
|
||||||
if (!is_del && !deleted)
|
if (!is_del && !deleted)
|
||||||
{
|
{
|
||||||
if (clean_entry_bitmap_size > sizeof(void*))
|
if (dsk.clean_entry_bitmap_size > sizeof(void*))
|
||||||
memcpy(bmp, dirty_it->second.bitmap, clean_entry_bitmap_size);
|
memcpy(bmp, dirty_it->second.bitmap, dsk.clean_entry_bitmap_size);
|
||||||
else
|
else
|
||||||
bmp = dirty_it->second.bitmap;
|
bmp = dirty_it->second.bitmap;
|
||||||
}
|
}
|
||||||
|
@ -48,8 +48,8 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
version = clean_it->second.version + 1;
|
version = clean_it->second.version + 1;
|
||||||
if (!is_del)
|
if (!is_del)
|
||||||
{
|
{
|
||||||
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, dsk.clean_entry_bitmap_size);
|
||||||
memcpy((clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, clean_entry_bitmap_size);
|
memcpy((dsk.clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, dsk.clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -90,14 +90,14 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
// Invalid version requested
|
// Invalid version requested
|
||||||
op->retval = -EEXIST;
|
op->retval = -EEXIST;
|
||||||
if (!is_del && clean_entry_bitmap_size > sizeof(void*))
|
if (!is_del && dsk.clean_entry_bitmap_size > sizeof(void*))
|
||||||
{
|
{
|
||||||
free(bmp);
|
free(bmp);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (wait_big && !is_del && !deleted && op->len < data_block_size &&
|
if (wait_big && !is_del && !deleted && op->len < dsk.data_block_size &&
|
||||||
immediate_commit != IMMEDIATE_ALL)
|
immediate_commit != IMMEDIATE_ALL)
|
||||||
{
|
{
|
||||||
// Issue an additional sync so that the previous big write can reach the journal
|
// Issue an additional sync so that the previous big write can reach the journal
|
||||||
|
@ -122,7 +122,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
state = BS_ST_DELETE | BS_ST_IN_FLIGHT;
|
state = BS_ST_DELETE | BS_ST_IN_FLIGHT;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
state = (op->len == data_block_size || deleted ? BS_ST_BIG_WRITE : BS_ST_SMALL_WRITE);
|
state = (op->len == dsk.data_block_size || deleted ? BS_ST_BIG_WRITE : BS_ST_SMALL_WRITE);
|
||||||
if (state == BS_ST_SMALL_WRITE && throttle_small_writes)
|
if (state == BS_ST_SMALL_WRITE && throttle_small_writes)
|
||||||
clock_gettime(CLOCK_REALTIME, &PRIV(op)->tv_begin);
|
clock_gettime(CLOCK_REALTIME, &PRIV(op)->tv_begin);
|
||||||
if (wait_del)
|
if (wait_del)
|
||||||
|
@ -136,9 +136,9 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
if (op->bitmap)
|
if (op->bitmap)
|
||||||
{
|
{
|
||||||
// Only allow to overwrite part of the object bitmap respective to the write's offset/len
|
// Only allow to overwrite part of the object bitmap respective to the write's offset/len
|
||||||
uint8_t *bmp_ptr = (uint8_t*)(clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp);
|
uint8_t *bmp_ptr = (uint8_t*)(dsk.clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp);
|
||||||
uint32_t bit = op->offset/bitmap_granularity;
|
uint32_t bit = op->offset/dsk.bitmap_granularity;
|
||||||
uint32_t bits_left = op->len/bitmap_granularity;
|
uint32_t bits_left = op->len/dsk.bitmap_granularity;
|
||||||
while (!(bit % 8) && bits_left > 8)
|
while (!(bit % 8) && bits_left > 8)
|
||||||
{
|
{
|
||||||
// Copy bytes
|
// Copy bytes
|
||||||
|
@ -175,7 +175,7 @@ void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_
|
||||||
{
|
{
|
||||||
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
||||||
{
|
{
|
||||||
if (clean_entry_bitmap_size > sizeof(void*))
|
if (dsk.clean_entry_bitmap_size > sizeof(void*))
|
||||||
free(dirty_it->second.bitmap);
|
free(dirty_it->second.bitmap);
|
||||||
dirty_db.erase(dirty_it++);
|
dirty_db.erase(dirty_it++);
|
||||||
}
|
}
|
||||||
|
@ -251,7 +251,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
blockstore_journal_check_t space_check(this);
|
blockstore_journal_check_t space_check(this);
|
||||||
if (!space_check.check_available(op, unsynced_big_write_count + 1,
|
if (!space_check.check_available(op, unsynced_big_write_count + 1,
|
||||||
sizeof(journal_entry_big_write) + clean_entry_bitmap_size, JOURNAL_STABILIZE_RESERVATION))
|
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, JOURNAL_STABILIZE_RESERVATION))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -271,7 +271,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
}
|
}
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
write_iodepth++;
|
write_iodepth++;
|
||||||
dirty_it->second.location = loc << block_order;
|
dirty_it->second.location = loc << dsk.block_order;
|
||||||
dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_SUBMITTED;
|
dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_SUBMITTED;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
|
@ -280,9 +280,9 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
data_alloc->set(loc, true);
|
data_alloc->set(loc, true);
|
||||||
uint64_t stripe_offset = (op->offset % bitmap_granularity);
|
uint64_t stripe_offset = (op->offset % dsk.bitmap_granularity);
|
||||||
uint64_t stripe_end = (op->offset + op->len) % bitmap_granularity;
|
uint64_t stripe_end = (op->offset + op->len) % dsk.bitmap_granularity;
|
||||||
// Zero fill up to bitmap_granularity
|
// Zero fill up to dsk.bitmap_granularity
|
||||||
int vcnt = 0;
|
int vcnt = 0;
|
||||||
if (stripe_offset)
|
if (stripe_offset)
|
||||||
{
|
{
|
||||||
|
@ -291,13 +291,13 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
PRIV(op)->iov_zerofill[vcnt++] = (struct iovec){ op->buf, op->len };
|
PRIV(op)->iov_zerofill[vcnt++] = (struct iovec){ op->buf, op->len };
|
||||||
if (stripe_end)
|
if (stripe_end)
|
||||||
{
|
{
|
||||||
stripe_end = bitmap_granularity - stripe_end;
|
stripe_end = dsk.bitmap_granularity - stripe_end;
|
||||||
PRIV(op)->iov_zerofill[vcnt++] = (struct iovec){ zero_object, stripe_end };
|
PRIV(op)->iov_zerofill[vcnt++] = (struct iovec){ zero_object, stripe_end };
|
||||||
}
|
}
|
||||||
data->iov.iov_len = op->len + stripe_offset + stripe_end; // to check it in the callback
|
data->iov.iov_len = op->len + stripe_offset + stripe_end; // to check it in the callback
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
sqe, data_fd, PRIV(op)->iov_zerofill, vcnt, data_offset + (loc << block_order) + op->offset - stripe_offset
|
sqe, dsk.data_fd, PRIV(op)->iov_zerofill, vcnt, dsk.data_offset + (loc << dsk.block_order) + op->offset - stripe_offset
|
||||||
);
|
);
|
||||||
PRIV(op)->pending_ops = 1;
|
PRIV(op)->pending_ops = 1;
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
|
@ -319,9 +319,9 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
blockstore_journal_check_t space_check(this);
|
blockstore_journal_check_t space_check(this);
|
||||||
if (unsynced_big_write_count &&
|
if (unsynced_big_write_count &&
|
||||||
!space_check.check_available(op, unsynced_big_write_count,
|
!space_check.check_available(op, unsynced_big_write_count,
|
||||||
sizeof(journal_entry_big_write) + clean_entry_bitmap_size, 0)
|
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0)
|
||||||
|| !space_check.check_available(op, 1,
|
|| !space_check.check_available(op, 1,
|
||||||
sizeof(journal_entry_small_write) + clean_entry_bitmap_size, op->len + JOURNAL_STABILIZE_RESERVATION))
|
sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size, op->len + JOURNAL_STABILIZE_RESERVATION))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -329,7 +329,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
BS_SUBMIT_CHECK_SQES(
|
BS_SUBMIT_CHECK_SQES(
|
||||||
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
||||||
(immediate_commit != IMMEDIATE_NONE ||
|
(immediate_commit != IMMEDIATE_NONE ||
|
||||||
!journal.entry_fits(sizeof(journal_entry_small_write) + clean_entry_bitmap_size) ? 1 : 0) +
|
!journal.entry_fits(sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size) ? 1 : 0) +
|
||||||
(op->len > 0 ? 1 : 0)
|
(op->len > 0 ? 1 : 0)
|
||||||
);
|
);
|
||||||
write_iodepth++;
|
write_iodepth++;
|
||||||
|
@ -337,7 +337,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
if (immediate_commit == IMMEDIATE_NONE)
|
if (immediate_commit == IMMEDIATE_NONE)
|
||||||
{
|
{
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_small_write) + clean_entry_bitmap_size))
|
if (!journal.entry_fits(sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size))
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal.cur_sector, op);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
}
|
}
|
||||||
|
@ -349,7 +349,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
// Then pre-fill journal entry
|
// Then pre-fill journal entry
|
||||||
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
||||||
sizeof(journal_entry_small_write) + clean_entry_bitmap_size
|
sizeof(journal_entry_small_write) + dsk.clean_entry_bitmap_size
|
||||||
);
|
);
|
||||||
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -361,14 +361,14 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
// Figure out where data will be
|
// Figure out where data will be
|
||||||
journal.next_free = (journal.next_free + op->len) <= journal.len ? journal.next_free : journal_block_size;
|
journal.next_free = (journal.next_free + op->len) <= journal.len ? journal.next_free : dsk.journal_block_size;
|
||||||
je->oid = op->oid;
|
je->oid = op->oid;
|
||||||
je->version = op->version;
|
je->version = op->version;
|
||||||
je->offset = op->offset;
|
je->offset = op->offset;
|
||||||
je->len = op->len;
|
je->len = op->len;
|
||||||
je->data_offset = journal.next_free;
|
je->data_offset = journal.next_free;
|
||||||
je->crc32_data = crc32c(0, op->buf, op->len);
|
je->crc32_data = crc32c(0, op->buf, op->len);
|
||||||
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
|
memcpy((void*)(je+1), (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), dsk.clean_entry_bitmap_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
if (immediate_commit != IMMEDIATE_NONE)
|
if (immediate_commit != IMMEDIATE_NONE)
|
||||||
|
@ -387,7 +387,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
data2->iov = (struct iovec){ op->buf, op->len };
|
data2->iov = (struct iovec){ op->buf, op->len };
|
||||||
data2->callback = cb;
|
data2->callback = cb;
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
sqe2, journal.fd, &data2->iov, 1, journal.offset + journal.next_free
|
sqe2, dsk.journal_fd, &data2->iov, 1, journal.offset + journal.next_free
|
||||||
);
|
);
|
||||||
PRIV(op)->pending_ops++;
|
PRIV(op)->pending_ops++;
|
||||||
}
|
}
|
||||||
|
@ -400,7 +400,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
journal.next_free += op->len;
|
journal.next_free += op->len;
|
||||||
if (journal.next_free >= journal.len)
|
if (journal.next_free >= journal.len)
|
||||||
{
|
{
|
||||||
journal.next_free = journal_block_size;
|
journal.next_free = dsk.journal_block_size;
|
||||||
}
|
}
|
||||||
if (!PRIV(op)->pending_ops)
|
if (!PRIV(op)->pending_ops)
|
||||||
{
|
{
|
||||||
|
@ -440,7 +440,7 @@ resume_2:
|
||||||
assert(dirty_it != dirty_db.end());
|
assert(dirty_it != dirty_db.end());
|
||||||
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write) + clean_entry_bitmap_size
|
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size
|
||||||
);
|
);
|
||||||
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -456,7 +456,7 @@ resume_2:
|
||||||
je->offset = op->offset;
|
je->offset = op->offset;
|
||||||
je->len = op->len;
|
je->len = op->len;
|
||||||
je->location = dirty_it->second.location;
|
je->location = dirty_it->second.location;
|
||||||
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
|
memcpy((void*)(je+1), (dsk.clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), dsk.clean_entry_bitmap_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
prepare_journal_sector_write(journal.cur_sector, op);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
|
@ -634,7 +634,7 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
|
||||||
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
||||||
BS_SUBMIT_CHECK_SQES(
|
BS_SUBMIT_CHECK_SQES(
|
||||||
(immediate_commit != IMMEDIATE_NONE ||
|
(immediate_commit != IMMEDIATE_NONE ||
|
||||||
(journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
(dsk.journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty) ? 1 : 0
|
journal.sector_info[journal.cur_sector].dirty) ? 1 : 0
|
||||||
);
|
);
|
||||||
if (write_iodepth >= max_write_iodepth)
|
if (write_iodepth >= max_write_iodepth)
|
||||||
|
@ -645,7 +645,7 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
|
||||||
// Prepare journal sector write
|
// Prepare journal sector write
|
||||||
if (immediate_commit == IMMEDIATE_NONE)
|
if (immediate_commit == IMMEDIATE_NONE)
|
||||||
{
|
{
|
||||||
if ((journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
if ((dsk.journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal.cur_sector, op);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
|
|
Loading…
Reference in New Issue