Fix vitastor-disk prepare bugs

Vitaliy Filippov 2022-08-18 02:28:43 +03:00
parent c0d5e83fb8
commit 1407db9c08
5 changed files with 52 additions and 34 deletions

View File

@ -219,7 +219,7 @@ void blockstore_disk_t::open_data()
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
if (data_fd == -1)
{
throw std::runtime_error("Failed to open data device");
throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno)));
}
check_size(data_fd, &data_device_size, &data_device_sect, "data device");
if (disk_alignment % data_device_sect)
@ -243,11 +243,10 @@ void blockstore_disk_t::open_meta()
{
if (meta_device != data_device)
{
meta_offset = 0;
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
if (meta_fd == -1)
{
throw std::runtime_error("Failed to open metadata device");
throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
}
check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device");
if (meta_offset >= meta_device_size)
@ -285,7 +284,7 @@ void blockstore_disk_t::open_journal()
journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
if (journal_fd == -1)
{
throw std::runtime_error("Failed to open journal device");
throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno)));
}
check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)

View File

@ -12,13 +12,13 @@ static const char *help_text =
"\n"
"vitastor-disk prepare [OPTIONS] [devices...]\n"
" Initialize disk(s) for Vitastor OSD(s).\n"
" There are two forms of this command. In the first form, you pass <devices> which\n"
" There are two modes of this command. In the first mode, you pass <devices> which\n"
" must be raw disks (not partitions). They are partitioned automatically and OSDs\n"
" are initialized on all of them.\n"
" In the second form, you omit <devices> and pass --data_device, --journal_device\n"
" and/or --meta_device which must be already existing partitions. In this case\n"
" a single OSD is created.\n"
" Requires `vitastor-cli`, `blkid`, `sfdisk` and `partprobe` (from parted) utilities.\n"
" In the second mode, you omit <devices> and pass --data_device, --journal_device\n"
" and/or --meta_device which must be already existing partitions identified by their\n"
" GPT partition UUIDs. In this case a single OSD is created.\n"
" Requires `vitastor-cli`, `wipefs`, `sfdisk` and `partprobe` (from parted) utilities.\n"
" OPTIONS may include:\n"
" --hybrid\n"
" Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for\n"
@ -30,6 +30,7 @@ static const char *help_text =
" --data_device <DEV> Create a single OSD using partition <DEV> for data\n"
" --meta_device <DEV> Create a single OSD using partition <DEV> for metadata\n"
" --journal_device <DEV> Create a single OSD using partition <DEV> for journal\n"
" --force Bypass checks on data/meta/journal partitions\n"
" --journal_size 1G/32M Set journal size\n"
" --object_size 1M/128k Set blockstore object size\n"
" --disable_ssd_cache 1 Disable cache and fsyncs for SSD journal and metadata\n"
@ -43,8 +44,8 @@ static const char *help_text =
" metadata size to ease possible future extension. The default is to allocate\n"
" 2 times more space and at least 1G. Use this option to override.\n"
" --max_other 10%\n"
" Use disks for OSD data even if they already have non-Vitastor partitions,\n"
" but only if these take up no more than this percent of disk space.\n"
" In the automatic mode, use disks for OSD data even if they already have non-Vitastor\n"
" partitions, but only if these take up no more than this percent of disk space.\n"
"\n"
"vitastor-disk upgrade-simple <UNIT_FILE|OSD_NUMBER>\n"
" Upgrade an OSD created by old (0.7.1 and older) make-osd.sh or make-osd-hybrid.js scripts.\n"
@ -149,7 +150,7 @@ int main(int argc, char *argv[])
{
self.options["hybrid"] = "1";
}
else if (!strcmp(argv[i], "--help"))
else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
{
cmd.insert(cmd.begin(), (char*)"help");
}

View File

@ -30,6 +30,12 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
const auto & dev = all_devs[i];
if (dev == "")
continue;
if (dev.substr(0, 22) != "/dev/disk/by-partuuid/")
{
// Partitions should be identified by GPT partition UUID
fprintf(stderr, "%s does not start with /dev/disk/by-partuuid/. Partitions should be identified by GPT partition UUIDs\n", dev.c_str());
return 1;
}
std::string real_dev = realpath_str(dev, false);
if (real_dev == "")
return 1;
@ -42,11 +48,11 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
return 1;
}
if (i == 0 && is_hdd == -1)
is_hdd = read_file("/sys/block/"+parent_dev+"/queue/rotational") == "1";
is_hdd = trim(read_file("/sys/block/"+parent_dev+"/queue/rotational")) == "1";
std::string out;
if (shell_exec({ "blkid", "-D", "-p", dev }, "", &out, NULL) == 0)
if (shell_exec({ "wipefs", dev }, "", &out, NULL) != 0 || out != "")
{
fprintf(stderr, "%s contains data, not creating OSD without --force. blkid -D -p says:\n%s", dev.c_str(), out.c_str());
fprintf(stderr, "%s contains data, not creating OSD without --force. wipefs shows:\n%s", dev.c_str(), out.c_str());
return 1;
}
json11::Json sb = read_osd_superblock(dev, false);
@ -143,17 +149,28 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
}
dsk.close_all();
// Write superblocks
if (!write_osd_superblock(options["data_device"], sb) ||
options["meta_device"] != "" &&
options["meta_device"] != options["data_device"] &&
write_osd_superblock(options["meta_device"], sb) ||
options["journal_device"] != "" &&
bool sep_m = options["meta_device"] != "" &&
options["meta_device"] != options["data_device"];
bool sep_j = options["journal_device"] != "" &&
options["journal_device"] != options["data_device"] &&
options["journal_device"] != options["meta_device"] &&
!write_osd_superblock(options["journal_device"], sb))
options["journal_device"] != options["meta_device"];
if (!write_osd_superblock(options["data_device"], sb) ||
sep_m && !write_osd_superblock(options["meta_device"], sb) ||
sep_j && !write_osd_superblock(options["journal_device"], sb))
{
return 1;
}
auto desc = realpath_str(options["data_device"]);
if (sep_m)
desc += " with metadata on "+realpath_str(options["meta_device"]);
if (sep_j)
desc += (sep_m ? " and journal on " : " with journal on ") + realpath_str(options["journal_device"]);
fprintf(stderr, "Initialized OSD %lu on %s\n", osd_num, desc.c_str());
if (shell_exec({ "systemctl", "enable", "--now", "vitastor-osd@"+std::to_string(osd_num) }, "", NULL, NULL) != 0)
{
fprintf(stderr, "Failed to enable systemd unit vitastor-osd@%lu\n", osd_num);
return 1;
}
return 0;
}
@ -190,7 +207,7 @@ std::vector<vitastor_dev_info_t> disk_tool_t::collect_devices(const std::vector<
return {};
}
// Check if the device is an SSD
bool is_hdd = read_file("/sys/block/"+dev.substr(5)+"/queue/rotational") == "1";
bool is_hdd = trim(read_file("/sys/block/"+dev.substr(5)+"/queue/rotational")) == "1";
// Check if it has a partition table
json11::Json pt = read_parttable(dev);
if (pt.is_bool() && !pt.bool_value())
@ -202,8 +219,8 @@ std::vector<vitastor_dev_info_t> disk_tool_t::collect_devices(const std::vector<
{
// No partition table
std::string out;
int r = shell_exec({ "blkid", "-p", dev }, "", &out, NULL);
if (r == 0)
int r = shell_exec({ "wipefs", dev }, "", &out, NULL);
if (r != 0 || out != "")
{
fprintf(stderr, "%s contains data, skipping:\n %s\n", dev.c_str(), str_replace(trim(out), "\n", "\n ").c_str());
continue;
@ -244,9 +261,9 @@ json11::Json disk_tool_t::add_partitions(vitastor_dev_info_t & devinfo, std::vec
{
if (kv.first != "node")
{
script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump());
if (n++)
script += ", ";
script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump());
}
}
script += "\n";
@ -510,6 +527,11 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
fprintf(stderr, "No SSDs found\n");
return 1;
}
else if (ssds.size() == devinfo.size())
{
fprintf(stderr, "No HDDs found\n");
return 1;
}
if (options["journal_size"] == "")
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
}
@ -521,7 +543,7 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
for (const auto & uuid: get_new_data_parts(dev, osd_per_disk, max_other_percent))
{
options["force"] = true;
options["data_device"] = "/dev/disk/by-uuid/"+strtolower(uuid);
options["data_device"] = "/dev/disk/by-partuuid/"+strtolower(uuid);
if (hybrid)
{
// Select/create journal and metadata partitions

View File

@ -110,11 +110,6 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
free(buf);
return 0;
}
// Lock the file
if (flock(fd, LOCK_EX|LOCK_NB) < 0)
{
fprintf(stderr, "Warning: Failed to lock %s with flock - udev autodetection may fail. Error: %s\n", device.c_str(), strerror(errno));
}
int r = write_blocking(fd, buf, buf_len);
if (r < 0)
{
@ -125,6 +120,7 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
}
close(fd);
free(buf);
shell_exec({ "udevadm", "trigger", "--settle", device }, "", NULL, NULL);
return sb_size;
}

View File

@ -663,9 +663,9 @@ void osd_t::apply_pg_config()
{
printf(
"[OSD %lu] My block_size and bitmap_granularity are %u/%u"
", but pool has %u/%u. Refusing to start PGs of this pool\n",
", but pool %u has %u/%u. Refusing to start PGs of this pool\n",
this->osd_num, bs_block_size, bs_bitmap_granularity,
pool_item.second.data_block_size, pool_item.second.bitmap_granularity
pool_id, pool_item.second.data_block_size, pool_item.second.bitmap_granularity
);
}
warned_block_size = true;