Browse Source

Implement PG state locking and PG moving in response to etcd events

trace-sqes
Vitaliy Filippov 1 year ago
parent
commit
7b57eeeeb3
  1. 2
      Makefile
  2. 18
      osd.cpp
  3. 70
      osd.h
  4. 742
      osd_cluster.cpp
  5. 6
      osd_flush.cpp
  6. 37
      osd_http.cpp
  7. 6
      osd_http.h
  8. 2
      osd_main.cpp
  9. 94
      osd_peering.cpp
  10. 41
      osd_peering_pg.cpp
  11. 22
      osd_peering_pg.h
  12. 3
      osd_peering_pg_test.cpp
  13. 8
      osd_primary.cpp

2
Makefile

@ -67,6 +67,8 @@ rw_blocking.o: rw_blocking.cpp rw_blocking.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_test: osd_test.cpp osd_ops.h rw_blocking.o
g++ $(CXXFLAGS) -o osd_test osd_test.cpp rw_blocking.o -ltcmalloc_minimal
osd_peering_pg_test: osd_peering_pg_test.cpp osd_peering_pg.o
g++ $(CXXFLAGS) -o $@ $< osd_peering_pg.o -ltcmalloc_minimal
libfio_sec_osd.so: fio_sec_osd.cpp osd_ops.h rw_blocking.o
g++ $(CXXFLAGS) -ltcmalloc_minimal -shared -o libfio_sec_osd.so fio_sec_osd.cpp rw_blocking.o -luring

18
osd.cpp

@ -106,10 +106,8 @@ void osd_t::parse_config(blockstore_config_t & config)
int major, minor;
if (sscanf(config["etcd_version"].c_str(), "%d.%d", &major, &minor) < 2)
throw std::runtime_error("etcd_version should be in the form MAJOR.MINOR (for example, 3.2)");
if (major < 3 || major == 3 && minor < 2)
throw std::runtime_error("Your etcd is too old, minimum required version is 3.2");
else if (major == 3 && minor == 2)
etcd_api_path = "/v3alpha";
if (major < 3 || major == 3 && minor < 3)
throw std::runtime_error("Your etcd is too old, minimum required version is 3.3");
else if (major == 3 && minor == 3)
etcd_api_path = "/v3beta";
else
@ -117,10 +115,6 @@ void osd_t::parse_config(blockstore_config_t & config)
}
else
etcd_api_path = "/v3";
if ((pos = etcd_address.find(':')) >= 0)
etcd_host = etcd_address.substr(0, pos);
else
etcd_host = etcd_address;
etcd_report_interval = strtoull(config["etcd_report_interval"].c_str(), NULL, 10);
if (etcd_report_interval <= 0)
etcd_report_interval = 30;
@ -153,12 +147,10 @@ void osd_t::parse_config(blockstore_config_t & config)
peer_connect_interval = strtoull(config["peer_connect_interval"].c_str(), NULL, 10);
if (!peer_connect_interval)
peer_connect_interval = 5;
http_request_timeout = strtoull(config["http_request_timeout"].c_str(), NULL, 10);
if (!http_request_timeout)
http_request_timeout = 5;
peer_connect_timeout = strtoull(config["peer_connect_timeout"].c_str(), NULL, 10);
if (!peer_connect_timeout)
peer_connect_timeout = 5;
log_level = strtoull(config["log_level"].c_str(), NULL, 10);
}
void osd_t::bind_socket()
@ -394,13 +386,13 @@ void osd_t::stop_client(int peer_fd)
if (cl.osd_num)
{
// Reload configuration from etcd when the connection is dropped
printf("[%lu] Stopping client %d (OSD peer %lu)\n", osd_num, peer_fd, cl.osd_num);
printf("[OSD %lu] Stopping client %d (OSD peer %lu)\n", osd_num, peer_fd, cl.osd_num);
peer_states.erase(cl.osd_num);
repeer_pgs(cl.osd_num);
}
else
{
printf("[%lu] Stopping client %d (regular client)\n", osd_num, peer_fd);
printf("[OSD %lu] Stopping client %d (regular client)\n", osd_num, peer_fd);
}
}
clients.erase(it);

70
osd.h

@ -18,6 +18,7 @@
#include "timerfd_manager.h"
#include "osd_ops.h"
#include "osd_peering_pg.h"
#include "osd_http.h"
#include "json11/json11.hpp"
#define OSD_OP_IN 0
@ -49,10 +50,6 @@
#define MAX_RECOVERY_QUEUE 2048
#define DEFAULT_RECOVERY_QUEUE 4
#define MAX_ETCD_ATTEMPTS 5
#define ETCD_START_INTERVAL 5000
#define ETCD_RETRY_INTERVAL 1000
//#define OSD_STUB
extern const char* osd_op_names[];
@ -189,16 +186,30 @@ struct osd_wanted_peer_t
int address_index;
};
struct http_response_t;
struct pg_config_t
{
bool exists;
osd_num_t primary;
std::vector<osd_num_t> target_set;
std::vector<std::vector<osd_num_t>> target_history;
bool pause;
osd_num_t cur_primary;
int cur_state;
};
struct websocket_t;
struct json_kv_t
{
std::string key;
json11::Json value;
};
class osd_t
{
// config
blockstore_config_t config;
std::string etcd_address, etcd_host, etcd_prefix, etcd_api_path;
// FIXME Allow multiple etcd addresses and select random address
std::string etcd_address, etcd_prefix, etcd_api_path;
int etcd_report_interval = 30;
bool readonly = false;
@ -214,16 +225,26 @@ class osd_t
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // sync every 5 seconds
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
int peer_connect_interval = 5;
int http_request_timeout = 5;
int peer_connect_timeout = 5;
int log_level = 0;
// peer OSDs
// cluster state
std::string etcd_lease_id, etcd_watch_revision;
std::string etcd_lease_id;
int etcd_watches_initialised = 0;
uint64_t etcd_watch_revision = 0;
websocket_t *etcd_watch_ws = NULL;
std::map<osd_num_t, json11::Json> peer_states;
std::map<osd_num_t, osd_wanted_peer_t> wanted_peers;
bool loading_peer_config = false;
int etcd_failed_attempts = 0;
std::map<pg_num_t, pg_config_t> pg_config;
std::set<pg_num_t> pg_state_dirty;
bool pg_config_applied = false;
bool etcd_reporting_pg_state = false;
bool etcd_reporting_stats = false;
// peers and PGs
std::map<uint64_t, int> osd_peer_fds;
std::map<pg_num_t, pg_t> pgs;
@ -267,26 +288,36 @@ class osd_t
uint64_t subop_stat_count[2][OSD_OP_MAX+1] = { 0 };
// cluster connection
void http_request(std::string host, std::string request, bool streaming, std::function<void(const http_response_t *response)> callback);
void http_request_json(std::string host, std::string request, std::function<void(std::string, json11::Json data)> callback);
websocket_t* open_websocket(std::string host, std::string path, std::function<void(const http_response_t *msg)> callback);
void etcd_call(std::string api, json11::Json payload, std::function<void(std::string, json11::Json)> callback);
void etcd_txn(json11::Json txn, std::function<void(std::string, json11::Json)> callback);
void http_request(const std::string & host, const std::string & request,
const http_options_t & options, std::function<void(const http_response_t *response)> callback);
void http_request_json(const std::string & host, const std::string & request, int timeout,
std::function<void(std::string, json11::Json data)> callback);
websocket_t* open_websocket(const std::string & host, const std::string & path, int timeout,
std::function<void(const http_response_t *msg)> callback);
void etcd_call(std::string api, json11::Json payload, int timeout, std::function<void(std::string, json11::Json)> callback);
void etcd_txn(json11::Json txn, int timeout, std::function<void(std::string, json11::Json)> callback);
json_kv_t parse_etcd_kv(const json11::Json & kv_json);
void parse_config(blockstore_config_t & config);
void init_cluster();
void start_etcd_watcher();
void load_global_config();
void bind_socket();
void acquire_lease();
void create_state();
json11::Json get_osd_state();
void create_osd_state();
void renew_lease();
void print_stats();
void reset_stats();
json11::Json get_status();
json11::Json get_statistics();
void report_statistics();
void report_pg_state(pg_t & pg);
void report_pg_states();
void load_pgs();
void parse_pgs(const json11::Json & pg_config, const std::map<pg_num_t, json11::Json> & pg_history);
void parse_pg_state(const std::string & key, const json11::Json & value);
void apply_pg_count();
void apply_pg_config();
void load_and_connect_peers();
void parse_etcd_osd_state(const std::string & key, const json11::Json & value);
// event loop, socket read/write
void loop();
@ -313,6 +344,7 @@ class osd_t
void repeer_pgs(osd_num_t osd_num);
void start_pg_peering(pg_num_t pg_num);
void submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps);
void discard_list_subop(osd_op_t *list_op);
bool stop_pg(pg_num_t pg_num);
void finish_stop_pg(pg_t & pg);
@ -356,7 +388,7 @@ class osd_t
public:
osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringloop);
~osd_t();
void force_stop();
void force_stop(int exitcode);
bool shutdown();
};

742
osd_cluster.cpp
File diff suppressed because it is too large
View File

6
osd_flush.cpp

@ -129,12 +129,16 @@ void osd_t::handle_flush_op(pg_num_t pg_num, pg_flush_batch_t *fb, osd_num_t osd
if (!pg.flush_actions.size())
{
pg.state = pg.state & ~PG_HAS_UNCLEAN;
pg.print_state();
report_pg_state(pg);
}
for (osd_op_t *op: continue_ops)
{
continue_primary_write(op);
}
if (pg.inflight == 0 && (pg.state & PG_STOPPING))
{
finish_stop_pg(pg);
}
}
}

37
osd_http.cpp

@ -18,6 +18,7 @@ static std::string trim(const std::string & in);
static std::string ws_format_frame(int type, uint64_t size);
static bool ws_parse_frame(std::string & buf, int & type, std::string & res);
// FIXME: Use keepalive
struct http_co_t
{
ring_loop_t *ringloop;
@ -66,15 +67,19 @@ struct http_co_t
#define HTTP_CO_WEBSOCKET 5
#define HTTP_CO_CHUNKED 6
void osd_t::http_request(std::string host, std::string request, bool streaming, std::function<void(const http_response_t *response)> callback)
#define DEFAULT_TIMEOUT 5000
// FIXME: Remove osd_t dependency from here
void osd_t::http_request(const std::string & host, const std::string & request,
const http_options_t & options, std::function<void(const http_response_t *response)> callback)
{
http_co_t *handler = new http_co_t();
handler->ringloop = ringloop;
handler->epoll_fd = epoll_fd;
handler->epoll_handlers = &epoll_handlers;
handler->request_timeout = http_request_timeout;
handler->request_timeout = options.timeout < 0 ? 0 : (options.timeout == 0 ? DEFAULT_TIMEOUT : options.timeout);
handler->want_streaming = options.want_streaming;
handler->tfd = tfd;
handler->want_streaming = streaming;
handler->host = host;
handler->request = request;
handler->callback = callback;
@ -82,10 +87,10 @@ void osd_t::http_request(std::string host, std::string request, bool streaming,
handler->start_connection();
}
void osd_t::http_request_json(std::string host, std::string request,
std::function<void(std::string, json11::Json r)> callback)
void osd_t::http_request_json(const std::string & host, const std::string & request,
int timeout, std::function<void(std::string, json11::Json r)> callback)
{
http_request(host, request, false, [this, callback](const http_response_t* res)
http_request(host, request, { .timeout = timeout }, [this, callback](const http_response_t* res)
{
if (res->error_code != 0)
{
@ -108,7 +113,8 @@ void osd_t::http_request_json(std::string host, std::string request,
});
}
websocket_t* osd_t::open_websocket(std::string host, std::string path, std::function<void(const http_response_t *msg)> callback)
websocket_t* osd_t::open_websocket(const std::string & host, const std::string & path,
int timeout, std::function<void(const http_response_t *msg)> callback)
{
std::string request = "GET "+path+" HTTP/1.1\r\n"
"Host: "+host+"\r\n"
@ -121,9 +127,9 @@ websocket_t* osd_t::open_websocket(std::string host, std::string path, std::func
handler->ringloop = ringloop;
handler->epoll_fd = epoll_fd;
handler->epoll_handlers = &epoll_handlers;
handler->request_timeout = http_request_timeout;
handler->tfd = tfd;
handler->request_timeout = timeout < 0 ? -1 : (timeout == 0 ? DEFAULT_TIMEOUT : timeout);
handler->want_streaming = false;
handler->tfd = tfd;
handler->host = host;
handler->request = request;
handler->callback = callback;
@ -215,11 +221,11 @@ void http_co_t::start_connection()
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
if (request_timeout > 0)
{
timeout_id = tfd->set_timer(1000*request_timeout, false, [this](int timer_id)
timeout_id = tfd->set_timer(request_timeout, false, [this](int timer_id)
{
if (response.length() == 0)
{
parsed.error_code = EIO;
parsed.error_code = ETIME;
}
delete this;
});
@ -440,8 +446,8 @@ void http_co_t::handle_read()
if (!len)
{
// Zero length chunk indicates EOF
delete this;
return;
parsed.eof = true;
break;
}
if (response.size() < pos+2+len+2)
{
@ -454,6 +460,11 @@ void http_co_t::handle_read()
{
response = response.substr(prev);
}
if (parsed.eof)
{
delete this;
return;
}
if (want_streaming && parsed.body.size() > 0)
{
callback(&parsed);

6
osd_http.h

@ -10,6 +10,12 @@
#define WS_PING 9
#define WS_PONG 10
struct http_options_t
{
int timeout;
bool want_streaming;
};
struct http_response_t
{
bool eof = false;

2
osd_main.cpp

@ -10,7 +10,7 @@ static void handle_sigint(int sig)
if (osd && !force_stopping)
{
force_stopping = true;
osd->force_stop();
osd->force_stop(0);
return;
}
exit(0);

94
osd_peering.cpp

@ -168,12 +168,13 @@ void osd_t::handle_peers()
{
if (!p.second.peering_state->list_ops.size())
{
p.second.calc_object_states();
p.second.calc_object_states(log_level);
report_pg_state(p.second);
incomplete_objects += p.second.incomplete_objects.size();
misplaced_objects += p.second.misplaced_objects.size();
// FIXME: degraded objects may currently include misplaced, too! Report them separately?
degraded_objects += p.second.degraded_objects.size();
if (p.second.state & PG_HAS_UNCLEAN)
if (p.second.state & (PG_ACTIVE | PG_HAS_UNCLEAN) == (PG_ACTIVE | PG_HAS_UNCLEAN))
peering_state = peering_state | OSD_FLUSHING_PGS;
else
peering_state = peering_state | OSD_RECOVERING;
@ -195,7 +196,7 @@ void osd_t::handle_peers()
bool still = false;
for (auto & p: pgs)
{
if (p.second.state & PG_HAS_UNCLEAN)
if (p.second.state & (PG_ACTIVE | PG_HAS_UNCLEAN) == (PG_ACTIVE | PG_HAS_UNCLEAN))
{
if (!p.second.flush_batch)
{
@ -224,7 +225,7 @@ void osd_t::repeer_pgs(osd_num_t peer_osd)
for (auto & p: pgs)
{
bool repeer = false;
if (p.second.state != PG_OFFLINE)
if (p.second.state & (PG_PEERING | PG_ACTIVE | PG_INCOMPLETE))
{
for (osd_num_t pg_osd: p.second.all_peers)
{
@ -239,7 +240,6 @@ void osd_t::repeer_pgs(osd_num_t peer_osd)
// Repeer this pg
printf("[PG %u] Repeer because of OSD %lu\n", p.second.pg_num, peer_osd);
start_pg_peering(p.second.pg_num);
peering_state |= OSD_PEERING_PGS;
}
}
}
@ -250,7 +250,8 @@ void osd_t::start_pg_peering(pg_num_t pg_num)
{
auto & pg = pgs[pg_num];
pg.state = PG_PEERING;
pg.print_state();
this->peering_state |= OSD_PEERING_PGS;
report_pg_state(pg);
// Reset PG state
pg.state_dict.clear();
incomplete_objects -= pg.incomplete_objects.size();
@ -312,14 +313,14 @@ void osd_t::start_pg_peering(pg_num_t pg_num)
if (!found)
{
pg.state = PG_INCOMPLETE;
pg.print_state();
report_pg_state(pg);
}
}
}
if (pg.pg_cursize < pg.pg_minsize)
{
pg.state = PG_INCOMPLETE;
pg.print_state();
report_pg_state(pg);
}
std::set<osd_num_t> cur_peers;
for (auto peer_osd: pg.all_peers)
@ -342,25 +343,7 @@ void osd_t::start_pg_peering(pg_num_t pg_num)
if (pg.state == PG_INCOMPLETE || cur_peers.find(it->first) == cur_peers.end())
{
// Discard the result after completion, which, chances are, will be unsuccessful
auto list_op = it->second;
if (list_op->peer_fd == 0)
{
// Self
list_op->bs_op->callback = [list_op](blockstore_op_t *bs_op)
{
if (list_op->bs_op->buf)
free(list_op->bs_op->buf);
delete list_op;
};
}
else
{
// Peer
list_op->callback = [](osd_op_t *list_op)
{
delete list_op;
};
}
discard_list_subop(it->second);
pg.peering_state->list_ops.erase(it);
it = pg.peering_state->list_ops.begin();
}
@ -491,6 +474,28 @@ void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps)
}
}
void osd_t::discard_list_subop(osd_op_t *list_op)
{
if (list_op->peer_fd == 0)
{
// Self
list_op->bs_op->callback = [list_op](blockstore_op_t *bs_op)
{
if (list_op->bs_op->buf)
free(list_op->bs_op->buf);
delete list_op;
};
}
else
{
// Peer
list_op->callback = [](osd_op_t *list_op)
{
delete list_op;
};
}
}
bool osd_t::stop_pg(pg_num_t pg_num)
{
auto pg_it = pgs.find(pg_num);
@ -499,19 +504,52 @@ bool osd_t::stop_pg(pg_num_t pg_num)
return false;
}
auto & pg = pg_it->second;
if (pg.peering_state)
{
// Stop peering
for (auto it = pg.peering_state->list_ops.begin(); it != pg.peering_state->list_ops.end();)
{
discard_list_subop(it->second);
}
for (auto it = pg.peering_state->list_results.begin(); it != pg.peering_state->list_results.end();)
{
if (it->second.buf)
{
free(it->second.buf);
}
}
delete pg.peering_state;
pg.peering_state = NULL;
}
if (!(pg.state & PG_ACTIVE))
{
return false;
}
pg.state = pg.state & ~PG_ACTIVE | PG_STOPPING;
if (pg.inflight == 0)
if (pg.inflight == 0 && !pg.flush_batch)
{
finish_stop_pg(pg);
}
else
{
report_pg_state(pg);
}
return true;
}
void osd_t::finish_stop_pg(pg_t & pg)
{
pg.state = PG_OFFLINE;
report_pg_state(pg);
}
void osd_t::report_pg_state(pg_t & pg)
{
pg.print_state();
this->pg_state_dirty.insert(pg.pg_num);
if (pg.state == PG_OFFLINE && !this->pg_config_applied)
{
apply_pg_config();
}
report_pg_states();
}

41
osd_peering_pg.cpp

@ -43,6 +43,7 @@ struct pg_obj_state_check_t
uint64_t n_copies = 0, has_roles = 0, n_roles = 0, n_stable = 0, n_mismatched = 0;
uint64_t n_unstable = 0, n_buggy = 0;
pg_osd_set_t osd_set;
int log_level;
void walk();
void start_object();
@ -198,14 +199,16 @@ void pg_obj_state_check_t::finish_object()
}
if (n_roles < pg->pg_minsize)
{
printf("Object is incomplete: inode=%lu stripe=%lu version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
for (int i = ver_start; i < ver_end; i++)
if (log_level > 1)
{
printf("Present on: osd %lu, role %ld%s\n", list[i].osd_num, (list[i].oid.stripe & STRIPE_MASK), list[i].is_stable ? " (stable)" : "");
printf("Object is incomplete: inode=%lu stripe=%lu version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
for (int i = ver_start; i < ver_end; i++)
{
printf("Present on: osd %lu, role %ld%s\n", list[i].osd_num, (list[i].oid.stripe & STRIPE_MASK), list[i].is_stable ? " (stable)" : "");
}
}
if (0)
if (log_level > 2)
{
// For future debug level
for (int i = obj_start; i < obj_end; i++)
{
printf("v%lu present on: osd %lu, role %ld%s\n", list[i].version, list[i].osd_num, (list[i].oid.stripe & STRIPE_MASK), list[i].is_stable ? " (stable)" : "");
@ -216,10 +219,13 @@ void pg_obj_state_check_t::finish_object()
}
else if (n_roles < pg->pg_cursize)
{
printf("Object is degraded: inode=%lu stripe=%lu version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
for (int i = ver_start; i < ver_end; i++)
if (log_level > 1)
{
printf("Present on: osd %lu, role %ld%s\n", list[i].osd_num, (list[i].oid.stripe & STRIPE_MASK), list[i].is_stable ? " (stable)" : "");
printf("Object is degraded: inode=%lu stripe=%lu version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
for (int i = ver_start; i < ver_end; i++)
{
printf("Present on: osd %lu, role %ld%s\n", list[i].osd_num, (list[i].oid.stripe & STRIPE_MASK), list[i].is_stable ? " (stable)" : "");
}
}
state = OBJ_DEGRADED;
pg->state = pg->state | PG_HAS_DEGRADED;
@ -321,10 +327,11 @@ void pg_obj_state_check_t::finish_object()
}
// FIXME: Write at least some tests for this function
void pg_t::calc_object_states()
void pg_t::calc_object_states(int log_level)
{
// Copy all object lists into one array
pg_obj_state_check_t st;
st.log_level = log_level;
st.pg = this;
auto ps = peering_state;
for (auto it: ps->list_results)
@ -352,12 +359,10 @@ void pg_t::calc_object_states()
std::sort(st.list.begin(), st.list.end());
// Walk over it and check object states
st.walk();
print_state();
}
void pg_t::print_state()
{
// FIXME Immediately report state on each change
printf(
"[PG %u] is %s%s%s%s%s%s%s%s%s (%lu objects)\n", pg_num,
(state & PG_OFFLINE) ? "offline" : "",
@ -373,13 +378,15 @@ void pg_t::print_state()
);
}
const int pg_state_bit_count = 10;
const int pg_state_bit_count = 12;
const int pg_state_bits[10] = {
PG_OFFLINE,
const int pg_state_bits[12] = {
PG_STARTING,
PG_PEERING,
PG_INCOMPLETE,
PG_ACTIVE,
PG_STOPPING,
PG_OFFLINE,
PG_DEGRADED,
PG_HAS_INCOMPLETE,
PG_HAS_DEGRADED,
@ -387,11 +394,13 @@ const int pg_state_bits[10] = {
PG_HAS_UNCLEAN,
};
const char *pg_state_names[10] = {
"offline",
const char *pg_state_names[12] = {
"starting",
"peering",
"incomplete",
"active",
"stopping",
"offline",
"degraded",
"has_incomplete",
"has_degraded",

22
osd_peering_pg.h

@ -9,18 +9,20 @@
#include "osd_ops.h"
// Placement group states
// STARTING -> [acquire lock] -> PEERING -> INCOMPLETE|ACTIVE -> STOPPING -> OFFLINE -> [release lock]
// Exactly one of these:
#define PG_OFFLINE (1<<0)
#define PG_STARTING (1<<0)
#define PG_PEERING (1<<1)
#define PG_INCOMPLETE (1<<2)
#define PG_ACTIVE (1<<3)
#define PG_STOPPING (1<<4)
#define PG_OFFLINE (1<<5)
// Plus any of these:
#define PG_DEGRADED (1<<5)
#define PG_HAS_INCOMPLETE (1<<6)
#define PG_HAS_DEGRADED (1<<7)
#define PG_HAS_MISPLACED (1<<8)
#define PG_HAS_UNCLEAN (1<<9)
#define PG_DEGRADED (1<<6)
#define PG_HAS_INCOMPLETE (1<<7)
#define PG_HAS_DEGRADED (1<<8)
#define PG_HAS_MISPLACED (1<<9)
#define PG_HAS_UNCLEAN (1<<10)
// FIXME: Safe default that doesn't depend on pg_stripe_size or pg_block_size
#define STRIPE_MASK ((uint64_t)4096 - 1)
@ -33,8 +35,8 @@
#define OBJ_NEEDS_ROLLBACK 0x20000
#define OBJ_BUGGY 0x80000
extern const int pg_state_bits[10];
extern const char *pg_state_names[10];
extern const int pg_state_bits[];
extern const char *pg_state_names[];
extern const int pg_state_bit_count;
struct pg_obj_loc_t
@ -96,7 +98,7 @@ struct pg_flush_batch_t
struct pg_t
{
int state = PG_OFFLINE;
int state = 0;
uint64_t pg_cursize = 3, pg_size = 3, pg_minsize = 2;
pg_num_t pg_num;
uint64_t clean_count = 0, total_count = 0;
@ -122,7 +124,7 @@ struct pg_t
int inflight = 0; // including write_queue
std::multimap<object_id, osd_op_t*> write_queue;
void calc_object_states();
void calc_object_states(int log_level);
void print_state();
};

3
osd_peering_pg_test.cpp

@ -1,6 +1,7 @@
#define _LARGEFILE64_SOURCE
#include "osd_peering_pg.h"
#define STRIPE_SHIFT 12
/**
* TODO tests for object & pg state calculation.
@ -43,7 +44,7 @@ int main(int argc, char *argv[])
}
pg.peering_state->list_results[osd_num] = r;
}
pg.calc_object_states();
pg.calc_object_states(0);
printf("deviation variants=%ld clean=%lu\n", pg.state_dict.size(), pg.clean_count);
for (auto it: pg.state_dict)
{

8
osd_primary.cpp

@ -51,7 +51,7 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
auto & pg = pgs[cur_op->op_data->pg_num];
int n = --pg.inflight;
assert(n >= 0);
if (n == 0 && (pg.state & PG_STOPPING))
if ((pg.state & PG_STOPPING) && n == 0 && !pg.flush_batch)
{
finish_stop_pg(pg);
}
@ -513,7 +513,7 @@ resume_8:
if (!pg.incomplete_objects.size())
{
pg.state = pg.state & ~PG_HAS_INCOMPLETE;
pg.print_state();
report_pg_state(pg);
}
}
else if (op_data->object_state->state & OBJ_DEGRADED)
@ -523,7 +523,7 @@ resume_8:
if (!pg.degraded_objects.size())
{
pg.state = pg.state & ~PG_HAS_DEGRADED;
pg.print_state();
report_pg_state(pg);
}
}
else if (op_data->object_state->state & OBJ_MISPLACED)
@ -533,7 +533,7 @@ resume_8:
if (!pg.misplaced_objects.size())
{
pg.state = pg.state & ~PG_HAS_MISPLACED;
pg.print_state();
report_pg_state(pg);
}
}
else

Loading…
Cancel
Save