Prevent two parallel flushes of the same object

blocking-uring-test
Vitaliy Filippov 2019-11-15 13:58:35 +03:00
parent 71194f014a
commit c38c8ab029
4 changed files with 83 additions and 21 deletions

View File

@ -33,7 +33,6 @@
#define ST_J_WRITTEN 3 #define ST_J_WRITTEN 3
#define ST_J_SYNCED 4 #define ST_J_SYNCED 4
#define ST_J_STABLE 5 #define ST_J_STABLE 5
#define ST_J_MOVE_READ_SUBMITTED 6
#define ST_J_MOVE_WRITE_SUBMITTED 7 #define ST_J_MOVE_WRITE_SUBMITTED 7
#define ST_J_MOVE_SYNCED 8 #define ST_J_MOVE_SYNCED 8
@ -109,6 +108,11 @@ inline bool operator == (const object_id & a, const object_id & b)
return a.inode == b.inode && a.stripe == b.stripe; return a.inode == b.inode && a.stripe == b.stripe;
} }
inline bool operator != (const object_id & a, const object_id & b)
{
return a.inode != b.inode || a.stripe != b.stripe;
}
inline bool operator < (const object_id & a, const object_id & b) inline bool operator < (const object_id & a, const object_id & b)
{ {
return a.inode < b.inode || a.inode == b.inode && a.stripe < b.stripe; return a.inode < b.inode || a.inode == b.inode && a.stripe < b.stripe;

View File

@ -46,6 +46,34 @@ void journal_flusher_t::loop()
} }
} }
void journal_flusher_t::queue_flush(obj_ver_id ov)
{
auto it = flush_versions.find(ov.oid);
if (it != flush_versions.end())
{
it->second = ov.version;
}
else
{
flush_versions[ov.oid] = ov.version;
flush_queue.push_back(ov.oid);
}
}
void journal_flusher_t::unshift_flush(obj_ver_id ov)
{
auto it = flush_versions.find(ov.oid);
if (it != flush_versions.end())
{
it->second = ov.version;
}
else
{
flush_versions[ov.oid] = ov.version;
flush_queue.push_front(ov.oid);
}
}
#define await_sqe(label) \ #define await_sqe(label) \
resume_##label:\ resume_##label:\
sqe = bs->get_sqe();\ sqe = bs->get_sqe();\
@ -85,11 +113,26 @@ void journal_flusher_co::loop()
resume_0: resume_0:
if (!flusher->flush_queue.size()) if (!flusher->flush_queue.size())
return; return;
cur = flusher->flush_queue.front(); cur.oid = flusher->flush_queue.front();
cur.version = flusher->flush_versions[cur.oid];
flusher->flush_queue.pop_front(); flusher->flush_queue.pop_front();
flusher->flush_versions.erase(cur.oid);
dirty_end = bs->dirty_db.find(cur); dirty_end = bs->dirty_db.find(cur);
if (dirty_end != bs->dirty_db.end()) if (dirty_end != bs->dirty_db.end())
{ {
repeat_it = flusher->sync_to_repeat.find(cur.oid);
if (repeat_it != flusher->sync_to_repeat.end())
{
// We don't flush different parts of history of the same object in parallel
// So we check if someone is already flushing this object
// In that case we set sync_to_repeat to 2 and pick another object
// Another coroutine will see this "2" and re-queue the object after it finishes
repeat_it->second = cur.version;
wait_state = 0;
goto resume_0;
}
else
repeat_it->second = 0;
dirty_it = dirty_end; dirty_it = dirty_end;
flusher->active_flushers++; flusher->active_flushers++;
flusher->active_until_sync++; flusher->active_until_sync++;
@ -99,7 +142,7 @@ resume_0:
skip_copy = false; skip_copy = false;
do do
{ {
if (dirty_it->second.state == ST_J_STABLE) if (dirty_it->second.state == ST_J_STABLE && !skip_copy)
{ {
// First we submit all reads // First we submit all reads
offset = dirty_it->second.offset; offset = dirty_it->second.offset;
@ -127,8 +170,6 @@ resume_0:
break; break;
} }
} }
// So subsequent stabilizers don't flush the entry again
dirty_it->second.state = ST_J_MOVE_READ_SUBMITTED;
} }
else if (dirty_it->second.state == ST_D_STABLE) else if (dirty_it->second.state == ST_D_STABLE)
{ {
@ -139,16 +180,10 @@ resume_0:
} }
skip_copy = true; skip_copy = true;
} }
else if (IS_STABLE(dirty_it->second.state)) else if (!IS_STABLE(dirty_it->second.state))
{
// Other coroutine is already flushing it, stop
break;
}
else
{ {
throw new std::runtime_error("BUG: Unexpected dirty_entry state during flush: " + std::to_string(dirty_it->second.state)); throw new std::runtime_error("BUG: Unexpected dirty_entry state during flush: " + std::to_string(dirty_it->second.state));
} }
dirty_start = dirty_it;
dirty_it--; dirty_it--;
} while (dirty_it != bs->dirty_db.begin() && dirty_it->first.oid == cur.oid); } while (dirty_it != bs->dirty_db.begin() && dirty_it->first.oid == cur.oid);
if (wait_count == 0 && clean_loc == UINT64_MAX) if (wait_count == 0 && clean_loc == UINT64_MAX)
@ -156,6 +191,13 @@ resume_0:
// Nothing to flush // Nothing to flush
flusher->active_flushers--; flusher->active_flushers--;
flusher->active_until_sync--; flusher->active_until_sync--;
repeat_it = flusher->sync_to_repeat.find(cur.oid);
if (repeat_it->second != 0)
{
// Requeue version
flusher->unshift_flush({ .oid = cur.oid, .version = repeat_it->second });
}
flusher->sync_to_repeat.erase(repeat_it);
wait_state = 0; wait_state = 0;
goto resume_0; goto resume_0;
} }
@ -313,24 +355,34 @@ resume_0:
.version = cur.version, .version = cur.version,
.location = clean_loc, .location = clean_loc,
}; };
for (dirty_it = dirty_start; dirty_it != dirty_end; dirty_it++) dirty_it = dirty_end;
do
{ {
if (IS_BIG_WRITE(dirty_it->second.state) && dirty_it->second.location != clean_loc) if (IS_BIG_WRITE(dirty_it->second.state) && dirty_it->second.location != clean_loc)
{ {
allocator_set(bs->data_alloc, dirty_it->second.location >> bs->block_order, false); allocator_set(bs->data_alloc, dirty_it->second.location >> bs->block_order, false);
} }
int used = --bs->journal.used_sectors[dirty_it->second.journal_sector]; int used = --bs->journal.used_sectors[dirty_it->second.journal_sector];
if (used == 1) if (used == 0)
{ {
bs->journal.used_sectors.erase(dirty_it->second.journal_sector); bs->journal.used_sectors.erase(dirty_it->second.journal_sector);
} }
} dirty_it--;
// Then, basically, remove the whole version range from dirty_db... } while (dirty_it != bs->dirty_db.begin() && dirty_it->first.oid == cur.oid);
// FIXME not until dirty_start, until other object. And wait for previous flushes. // Then, basically, remove everything up to the current version from dirty_db...
bs->dirty_db.erase(dirty_start, std::next(dirty_end)); if (dirty_it->first.oid != cur.oid)
dirty_it++;
bs->dirty_db.erase(dirty_it, std::next(dirty_end));
// FIXME: ...and clear unused part of the journal (with some interval, not for every flushed op) // FIXME: ...and clear unused part of the journal (with some interval, not for every flushed op)
wait_state = 0; wait_state = 0;
flusher->active_flushers--; flusher->active_flushers--;
repeat_it = flusher->sync_to_repeat.find(cur.oid);
if (repeat_it->second != 0)
{
// Requeue version
flusher->unshift_flush({ .oid = cur.oid, .version = repeat_it->second });
}
flusher->sync_to_repeat.erase(repeat_it);
goto resume_0; goto resume_0;
} }
} }

View File

@ -36,6 +36,7 @@ class journal_flusher_co
std::vector<copy_buffer_t>::iterator it; std::vector<copy_buffer_t>::iterator it;
uint64_t offset, len, submit_len, clean_loc, meta_sector, meta_pos; uint64_t offset, len, submit_len, clean_loc, meta_sector, meta_pos;
std::map<uint64_t, meta_sector_t>::iterator meta_it; std::map<uint64_t, meta_sector_t>::iterator meta_it;
std::map<object_id, uint64_t>::iterator repeat_it;
std::function<void(ring_data_t*)> simple_callback; std::function<void(ring_data_t*)> simple_callback;
std::list<flusher_sync_t>::iterator cur_sync; std::list<flusher_sync_t>::iterator cur_sync;
friend class journal_flusher_t; friend class journal_flusher_t;
@ -56,10 +57,15 @@ class journal_flusher_t
int active_flushers, active_until_sync; int active_flushers, active_until_sync;
std::list<flusher_sync_t> syncs; std::list<flusher_sync_t> syncs;
public: std::map<object_id, uint64_t> sync_to_repeat;
std::map<uint64_t, meta_sector_t> meta_sectors; std::map<uint64_t, meta_sector_t> meta_sectors;
std::deque<obj_ver_id> flush_queue; std::deque<object_id> flush_queue;
std::map<object_id, uint64_t> flush_versions;
public:
journal_flusher_t(int flusher_count, blockstore *bs); journal_flusher_t(int flusher_count, blockstore *bs);
~journal_flusher_t(); ~journal_flusher_t();
void loop(); void loop();
void queue_flush(obj_ver_id oid);
void unshift_flush(obj_ver_id oid);
}; };

View File

@ -139,7 +139,7 @@ void blockstore::handle_stable_event(ring_data_t *data, blockstore_operation *op
} }
dirty_it--; dirty_it--;
} while (dirty_it != dirty_db.begin() && dirty_it->first.oid == v->oid); } while (dirty_it != dirty_db.begin() && dirty_it->first.oid == v->oid);
flusher->flush_queue.push_back(*v); flusher->queue_flush(*v);
} }
} }
// Acknowledge op // Acknowledge op