Fix vitastor-disk prepare bugs

Vitaliy Filippov 2022-08-18 02:28:43 +03:00
parent c0d5e83fb8
commit 1407db9c08
5 changed files with 52 additions and 34 deletions

View File

@ -219,7 +219,7 @@ void blockstore_disk_t::open_data()
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR); data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
if (data_fd == -1) if (data_fd == -1)
{ {
throw std::runtime_error("Failed to open data device"); throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno)));
} }
check_size(data_fd, &data_device_size, &data_device_sect, "data device"); check_size(data_fd, &data_device_size, &data_device_sect, "data device");
if (disk_alignment % data_device_sect) if (disk_alignment % data_device_sect)
@ -243,11 +243,10 @@ void blockstore_disk_t::open_meta()
{ {
if (meta_device != data_device) if (meta_device != data_device)
{ {
meta_offset = 0;
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR); meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
if (meta_fd == -1) if (meta_fd == -1)
{ {
throw std::runtime_error("Failed to open metadata device"); throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
} }
check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device"); check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device");
if (meta_offset >= meta_device_size) if (meta_offset >= meta_device_size)
@ -285,7 +284,7 @@ void blockstore_disk_t::open_journal()
journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR); journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
if (journal_fd == -1) if (journal_fd == -1)
{ {
throw std::runtime_error("Failed to open journal device"); throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno)));
} }
check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device"); check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0) if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)

View File

@ -12,13 +12,13 @@ static const char *help_text =
"\n" "\n"
"vitastor-disk prepare [OPTIONS] [devices...]\n" "vitastor-disk prepare [OPTIONS] [devices...]\n"
" Initialize disk(s) for Vitastor OSD(s).\n" " Initialize disk(s) for Vitastor OSD(s).\n"
" There are two forms of this command. In the first form, you pass <devices> which\n" " There are two modes of this command. In the first mode, you pass <devices> which\n"
" must be raw disks (not partitions). They are partitioned automatically and OSDs\n" " must be raw disks (not partitions). They are partitioned automatically and OSDs\n"
" are initialized on all of them.\n" " are initialized on all of them.\n"
" In the second form, you omit <devices> and pass --data_device, --journal_device\n" " In the second mode, you omit <devices> and pass --data_device, --journal_device\n"
" and/or --meta_device which must be already existing partitions. In this case\n" " and/or --meta_device which must be already existing partitions identified by their\n"
" a single OSD is created.\n" " GPT partition UUIDs. In this case a single OSD is created.\n"
" Requires `vitastor-cli`, `blkid`, `sfdisk` and `partprobe` (from parted) utilities.\n" " Requires `vitastor-cli`, `wipefs`, `sfdisk` and `partprobe` (from parted) utilities.\n"
" OPTIONS may include:\n" " OPTIONS may include:\n"
" --hybrid\n" " --hybrid\n"
" Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for\n" " Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for\n"
@ -30,6 +30,7 @@ static const char *help_text =
" --data_device <DEV> Create a single OSD using partition <DEV> for data\n" " --data_device <DEV> Create a single OSD using partition <DEV> for data\n"
" --meta_device <DEV> Create a single OSD using partition <DEV> for metadata\n" " --meta_device <DEV> Create a single OSD using partition <DEV> for metadata\n"
" --journal_device <DEV> Create a single OSD using partition <DEV> for journal\n" " --journal_device <DEV> Create a single OSD using partition <DEV> for journal\n"
" --force Bypass checks on data/meta/journal partitions\n"
" --journal_size 1G/32M Set journal size\n" " --journal_size 1G/32M Set journal size\n"
" --object_size 1M/128k Set blockstore object size\n" " --object_size 1M/128k Set blockstore object size\n"
" --disable_ssd_cache 1 Disable cache and fsyncs for SSD journal and metadata\n" " --disable_ssd_cache 1 Disable cache and fsyncs for SSD journal and metadata\n"
@ -43,8 +44,8 @@ static const char *help_text =
" metadata size to ease possible future extension. The default is to allocate\n" " metadata size to ease possible future extension. The default is to allocate\n"
" 2 times more space and at least 1G. Use this option to override.\n" " 2 times more space and at least 1G. Use this option to override.\n"
" --max_other 10%\n" " --max_other 10%\n"
" Use disks for OSD data even if they already have non-Vitastor partitions,\n" " In the automatic mode, use disks for OSD data even if they already have non-Vitastor\n"
" but only if these take up no more than this percent of disk space.\n" " partitions, but only if these take up no more than this percent of disk space.\n"
"\n" "\n"
"vitastor-disk upgrade-simple <UNIT_FILE|OSD_NUMBER>\n" "vitastor-disk upgrade-simple <UNIT_FILE|OSD_NUMBER>\n"
" Upgrade an OSD created by old (0.7.1 and older) make-osd.sh or make-osd-hybrid.js scripts.\n" " Upgrade an OSD created by old (0.7.1 and older) make-osd.sh or make-osd-hybrid.js scripts.\n"
@ -149,7 +150,7 @@ int main(int argc, char *argv[])
{ {
self.options["hybrid"] = "1"; self.options["hybrid"] = "1";
} }
else if (!strcmp(argv[i], "--help")) else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
{ {
cmd.insert(cmd.begin(), (char*)"help"); cmd.insert(cmd.begin(), (char*)"help");
} }

View File

@ -30,6 +30,12 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
const auto & dev = all_devs[i]; const auto & dev = all_devs[i];
if (dev == "") if (dev == "")
continue; continue;
if (dev.substr(0, 22) != "/dev/disk/by-partuuid/")
{
// Partitions should be identified by GPT partition UUID
fprintf(stderr, "%s does not start with /dev/disk/by-partuuid/. Partitions should be identified by GPT partition UUIDs\n", dev.c_str());
return 1;
}
std::string real_dev = realpath_str(dev, false); std::string real_dev = realpath_str(dev, false);
if (real_dev == "") if (real_dev == "")
return 1; return 1;
@ -42,11 +48,11 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
return 1; return 1;
} }
if (i == 0 && is_hdd == -1) if (i == 0 && is_hdd == -1)
is_hdd = read_file("/sys/block/"+parent_dev+"/queue/rotational") == "1"; is_hdd = trim(read_file("/sys/block/"+parent_dev+"/queue/rotational")) == "1";
std::string out; std::string out;
if (shell_exec({ "blkid", "-D", "-p", dev }, "", &out, NULL) == 0) if (shell_exec({ "wipefs", dev }, "", &out, NULL) != 0 || out != "")
{ {
fprintf(stderr, "%s contains data, not creating OSD without --force. blkid -D -p says:\n%s", dev.c_str(), out.c_str()); fprintf(stderr, "%s contains data, not creating OSD without --force. wipefs shows:\n%s", dev.c_str(), out.c_str());
return 1; return 1;
} }
json11::Json sb = read_osd_superblock(dev, false); json11::Json sb = read_osd_superblock(dev, false);
@ -143,17 +149,28 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
} }
dsk.close_all(); dsk.close_all();
// Write superblocks // Write superblocks
if (!write_osd_superblock(options["data_device"], sb) || bool sep_m = options["meta_device"] != "" &&
options["meta_device"] != "" && options["meta_device"] != options["data_device"];
options["meta_device"] != options["data_device"] && bool sep_j = options["journal_device"] != "" &&
write_osd_superblock(options["meta_device"], sb) ||
options["journal_device"] != "" &&
options["journal_device"] != options["data_device"] && options["journal_device"] != options["data_device"] &&
options["journal_device"] != options["meta_device"] && options["journal_device"] != options["meta_device"];
!write_osd_superblock(options["journal_device"], sb)) if (!write_osd_superblock(options["data_device"], sb) ||
sep_m && !write_osd_superblock(options["meta_device"], sb) ||
sep_j && !write_osd_superblock(options["journal_device"], sb))
{ {
return 1; return 1;
} }
auto desc = realpath_str(options["data_device"]);
if (sep_m)
desc += " with metadata on "+realpath_str(options["meta_device"]);
if (sep_j)
desc += (sep_m ? " and journal on " : " with journal on ") + realpath_str(options["journal_device"]);
fprintf(stderr, "Initialized OSD %lu on %s\n", osd_num, desc.c_str());
if (shell_exec({ "systemctl", "enable", "--now", "vitastor-osd@"+std::to_string(osd_num) }, "", NULL, NULL) != 0)
{
fprintf(stderr, "Failed to enable systemd unit vitastor-osd@%lu\n", osd_num);
return 1;
}
return 0; return 0;
} }
@ -190,7 +207,7 @@ std::vector<vitastor_dev_info_t> disk_tool_t::collect_devices(const std::vector<
return {}; return {};
} }
// Check if the device is an SSD // Check if the device is an SSD
bool is_hdd = read_file("/sys/block/"+dev.substr(5)+"/queue/rotational") == "1"; bool is_hdd = trim(read_file("/sys/block/"+dev.substr(5)+"/queue/rotational")) == "1";
// Check if it has a partition table // Check if it has a partition table
json11::Json pt = read_parttable(dev); json11::Json pt = read_parttable(dev);
if (pt.is_bool() && !pt.bool_value()) if (pt.is_bool() && !pt.bool_value())
@ -202,8 +219,8 @@ std::vector<vitastor_dev_info_t> disk_tool_t::collect_devices(const std::vector<
{ {
// No partition table // No partition table
std::string out; std::string out;
int r = shell_exec({ "blkid", "-p", dev }, "", &out, NULL); int r = shell_exec({ "wipefs", dev }, "", &out, NULL);
if (r == 0) if (r != 0 || out != "")
{ {
fprintf(stderr, "%s contains data, skipping:\n %s\n", dev.c_str(), str_replace(trim(out), "\n", "\n ").c_str()); fprintf(stderr, "%s contains data, skipping:\n %s\n", dev.c_str(), str_replace(trim(out), "\n", "\n ").c_str());
continue; continue;
@ -244,9 +261,9 @@ json11::Json disk_tool_t::add_partitions(vitastor_dev_info_t & devinfo, std::vec
{ {
if (kv.first != "node") if (kv.first != "node")
{ {
script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump());
if (n++) if (n++)
script += ", "; script += ", ";
script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump());
} }
} }
script += "\n"; script += "\n";
@ -510,6 +527,11 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
fprintf(stderr, "No SSDs found\n"); fprintf(stderr, "No SSDs found\n");
return 1; return 1;
} }
else if (ssds.size() == devinfo.size())
{
fprintf(stderr, "No HDDs found\n");
return 1;
}
if (options["journal_size"] == "") if (options["journal_size"] == "")
options["journal_size"] = DEFAULT_HYBRID_JOURNAL; options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
} }
@ -521,7 +543,7 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
for (const auto & uuid: get_new_data_parts(dev, osd_per_disk, max_other_percent)) for (const auto & uuid: get_new_data_parts(dev, osd_per_disk, max_other_percent))
{ {
options["force"] = true; options["force"] = true;
options["data_device"] = "/dev/disk/by-uuid/"+strtolower(uuid); options["data_device"] = "/dev/disk/by-partuuid/"+strtolower(uuid);
if (hybrid) if (hybrid)
{ {
// Select/create journal and metadata partitions // Select/create journal and metadata partitions

View File

@ -110,11 +110,6 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
free(buf); free(buf);
return 0; return 0;
} }
// Lock the file
if (flock(fd, LOCK_EX|LOCK_NB) < 0)
{
fprintf(stderr, "Warning: Failed to lock %s with flock - udev autodetection may fail. Error: %s\n", device.c_str(), strerror(errno));
}
int r = write_blocking(fd, buf, buf_len); int r = write_blocking(fd, buf, buf_len);
if (r < 0) if (r < 0)
{ {
@ -125,6 +120,7 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
} }
close(fd); close(fd);
free(buf); free(buf);
shell_exec({ "udevadm", "trigger", "--settle", device }, "", NULL, NULL);
return sb_size; return sb_size;
} }

View File

@ -663,9 +663,9 @@ void osd_t::apply_pg_config()
{ {
printf( printf(
"[OSD %lu] My block_size and bitmap_granularity are %u/%u" "[OSD %lu] My block_size and bitmap_granularity are %u/%u"
", but pool has %u/%u. Refusing to start PGs of this pool\n", ", but pool %u has %u/%u. Refusing to start PGs of this pool\n",
this->osd_num, bs_block_size, bs_bitmap_granularity, this->osd_num, bs_block_size, bs_bitmap_granularity,
pool_item.second.data_block_size, pool_item.second.bitmap_granularity pool_id, pool_item.second.data_block_size, pool_item.second.bitmap_granularity
); );
} }
warned_block_size = true; warned_block_size = true;