forked from vitalif/vitastor
Add (empty) osd_primary.cpp, rename osd_read to osd_receive, add FIXMEs for fsync
parent
1447c44b68
commit
47663bd1dc
6
Makefile
6
Makefile
|
@ -24,10 +24,10 @@ libblockstore.so: $(BLOCKSTORE_OBJS)
|
|||
libfio_blockstore.so: ./libblockstore.so fio_engine.cpp json11.o
|
||||
g++ $(CXXFLAGS) -shared -o libfio_blockstore.so fio_engine.cpp json11.o ./libblockstore.so -ltcmalloc_minimal -luring
|
||||
|
||||
OSD_OBJS := osd.o osd_exec_secondary.o osd_read.o osd_send.o osd_peering.o osd_peering_pg.o json11.o
|
||||
OSD_OBJS := osd.o osd_exec_secondary.o osd_receive.o osd_send.o osd_peering.o osd_peering_pg.o osd_primary.o json11.o
|
||||
osd_exec_secondary.o: osd_exec_secondary.cpp osd.h osd_ops.h
|
||||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
osd_read.o: osd_read.cpp osd.h osd_ops.h
|
||||
osd_receive.o: osd_receive.cpp osd.h osd_ops.h
|
||||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
osd_send.o: osd_send.cpp osd.h osd_ops.h
|
||||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
|
@ -35,6 +35,8 @@ osd_peering.o: osd_peering.cpp osd.h osd_ops.h osd_peering_pg.h
|
|||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
osd_peering_pg.o: osd_peering_pg.cpp object_id.h osd_peering_pg.h
|
||||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
osd_primary.o: osd_primary.cpp osd.h osd_ops.h osd_peering_pg.h
|
||||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
osd.o: osd.cpp osd.h osd_ops.h osd_peering_pg.h
|
||||
g++ $(CXXFLAGS) -c -o $@ $<
|
||||
osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h $(OSD_OBJS)
|
||||
|
|
|
@ -110,7 +110,7 @@ void blockstore_impl_t::loop()
|
|||
auto op_ptr = cur;
|
||||
auto op = *(cur++);
|
||||
// FIXME: This needs some simplification
|
||||
// Writes should not block reads if the ring is not full and if reads don't depend on them
|
||||
// Writes should not block reads if the ring is not full and reads don't depend on them
|
||||
// In all other cases we should stop submission
|
||||
if (PRIV(op)->wait_for)
|
||||
{
|
||||
|
|
|
@ -254,6 +254,7 @@ resume_1:
|
|||
if (!bs->disable_journal_fsync)
|
||||
{
|
||||
GET_SQE();
|
||||
// FIXME Wait for completion of writes before issuing an fsync
|
||||
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback;
|
||||
|
@ -337,6 +338,7 @@ resume_1:
|
|||
data->iov = { 0 };
|
||||
data->callback = simple_callback;
|
||||
wait_count++;
|
||||
// FIXME Wait for completion of writes before issuing an fsync
|
||||
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
||||
}
|
||||
bs->ringloop->submit();
|
||||
|
|
|
@ -63,6 +63,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
|||
}
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
// FIXME: Wait for completion of writes before issuing an fsync
|
||||
// Fsync and write requests posted at the same time can be reordered
|
||||
ring_data_t *data = ((ring_data_t*)sqes[s]->user_data);
|
||||
my_uring_prep_fsync(sqes[s++], journal.fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
|
@ -81,6 +83,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
|||
// 1st step: fsync data
|
||||
if (!disable_data_fsync)
|
||||
{
|
||||
// FIXME Wait for completion of writes before issuing an fsync
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
|
@ -150,6 +153,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
|||
// ... And a journal fsync
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
// FIXME Wait for completion of writes before issuing an fsync
|
||||
my_uring_prep_fsync(sqe[s], journal.fd, IORING_FSYNC_DATASYNC);
|
||||
struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data);
|
||||
data->iov = { 0 };
|
||||
|
|
13
osd.cpp
13
osd.cpp
|
@ -278,18 +278,9 @@ void osd_t::exec_op(osd_op_t *cur_op)
|
|||
{
|
||||
exec_show_config(cur_op);
|
||||
}
|
||||
else if (cur_op->op.hdr.opcode == OSD_OP_READ)
|
||||
else if (cur_op->op.hdr.opcode == OSD_OP_READ || cur_op->op.hdr.opcode == OSD_OP_WRITE)
|
||||
{
|
||||
// Primary OSD also works with individual stripes, but they're twice the size of the blockstore's stripe
|
||||
// - convert offset & len to stripe number
|
||||
// - fail operation if offset & len span multiple stripes
|
||||
// - calc stripe hash and determine PG
|
||||
// - check if this is our PG
|
||||
// - redirect or fail operation if not
|
||||
// - determine whether we need to read A and B or just A or just B or A + parity or B + parity
|
||||
// and determine read ranges for both objects
|
||||
// - send read requests
|
||||
// - reconstruct result
|
||||
exec_primary(cur_op);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
6
osd.h
6
osd.h
|
@ -164,10 +164,16 @@ class osd_t
|
|||
|
||||
// op execution
|
||||
void exec_op(osd_op_t *cur_op);
|
||||
|
||||
// secondary ops
|
||||
void exec_sync_stab_all(osd_op_t *cur_op);
|
||||
void exec_show_config(osd_op_t *cur_op);
|
||||
void exec_secondary(osd_op_t *cur_op);
|
||||
void secondary_op_callback(osd_op_t *cur_op);
|
||||
|
||||
// primary ops
|
||||
void exec_primary(osd_op_t *cur_op);
|
||||
void make_primary_reply(osd_op_t *op);
|
||||
public:
|
||||
osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringloop);
|
||||
~osd_t();
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
#include "osd.h"
|
||||
|
||||
void osd_t::exec_primary(osd_op_t *cur_op)
|
||||
{
|
||||
// read: read directly or read paired stripe(s), reconstruct, return
|
||||
// write: read paired stripe(s), modify, write
|
||||
// nuance: take care to read the same version from paired stripes!
|
||||
// if there are no write requests in progress we're good (stripes must be in sync)
|
||||
// and... remember the last readable version during a write request
|
||||
// and... postpone other write requests to the same stripe until the completion of previous ones
|
||||
//
|
||||
// sync: sync peers, get unstable versions from somewhere, stabilize them
|
||||
|
||||
}
|
||||
|
||||
void osd_t::make_primary_reply(osd_op_t *op)
|
||||
{
|
||||
op->reply.hdr.magic = SECONDARY_OSD_REPLY_MAGIC;
|
||||
op->reply.hdr.id = op->op.hdr.id;
|
||||
op->reply.hdr.opcode = op->op.hdr.opcode;
|
||||
}
|
Loading…
Reference in New Issue