forked from vitalif/vitastor
Fix vitastor-disk prepare bugs
parent
c0d5e83fb8
commit
1407db9c08
|
@ -219,7 +219,7 @@ void blockstore_disk_t::open_data()
|
||||||
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
|
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
|
||||||
if (data_fd == -1)
|
if (data_fd == -1)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("Failed to open data device");
|
throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno)));
|
||||||
}
|
}
|
||||||
check_size(data_fd, &data_device_size, &data_device_sect, "data device");
|
check_size(data_fd, &data_device_size, &data_device_sect, "data device");
|
||||||
if (disk_alignment % data_device_sect)
|
if (disk_alignment % data_device_sect)
|
||||||
|
@ -243,11 +243,10 @@ void blockstore_disk_t::open_meta()
|
||||||
{
|
{
|
||||||
if (meta_device != data_device)
|
if (meta_device != data_device)
|
||||||
{
|
{
|
||||||
meta_offset = 0;
|
|
||||||
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
|
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
|
||||||
if (meta_fd == -1)
|
if (meta_fd == -1)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("Failed to open metadata device");
|
throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
|
||||||
}
|
}
|
||||||
check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device");
|
check_size(meta_fd, &meta_device_size, &meta_device_sect, "metadata device");
|
||||||
if (meta_offset >= meta_device_size)
|
if (meta_offset >= meta_device_size)
|
||||||
|
@ -285,7 +284,7 @@ void blockstore_disk_t::open_journal()
|
||||||
journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
|
journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
|
||||||
if (journal_fd == -1)
|
if (journal_fd == -1)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("Failed to open journal device");
|
throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno)));
|
||||||
}
|
}
|
||||||
check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
|
check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
|
||||||
if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
|
if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
|
||||||
|
|
|
@ -12,13 +12,13 @@ static const char *help_text =
|
||||||
"\n"
|
"\n"
|
||||||
"vitastor-disk prepare [OPTIONS] [devices...]\n"
|
"vitastor-disk prepare [OPTIONS] [devices...]\n"
|
||||||
" Initialize disk(s) for Vitastor OSD(s).\n"
|
" Initialize disk(s) for Vitastor OSD(s).\n"
|
||||||
" There are two forms of this command. In the first form, you pass <devices> which\n"
|
" There are two modes of this command. In the first mode, you pass <devices> which\n"
|
||||||
" must be raw disks (not partitions). They are partitioned automatically and OSDs\n"
|
" must be raw disks (not partitions). They are partitioned automatically and OSDs\n"
|
||||||
" are initialized on all of them.\n"
|
" are initialized on all of them.\n"
|
||||||
" In the second form, you omit <devices> and pass --data_device, --journal_device\n"
|
" In the second mode, you omit <devices> and pass --data_device, --journal_device\n"
|
||||||
" and/or --meta_device which must be already existing partitions. In this case\n"
|
" and/or --meta_device which must be already existing partitions identified by their\n"
|
||||||
" a single OSD is created.\n"
|
" GPT partition UUIDs. In this case a single OSD is created.\n"
|
||||||
" Requires `vitastor-cli`, `blkid`, `sfdisk` and `partprobe` (from parted) utilities.\n"
|
" Requires `vitastor-cli`, `wipefs`, `sfdisk` and `partprobe` (from parted) utilities.\n"
|
||||||
" OPTIONS may include:\n"
|
" OPTIONS may include:\n"
|
||||||
" --hybrid\n"
|
" --hybrid\n"
|
||||||
" Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for\n"
|
" Prepare hybrid (HDD+SSD) OSDs using provided devices. SSDs will be used for\n"
|
||||||
|
@ -30,6 +30,7 @@ static const char *help_text =
|
||||||
" --data_device <DEV> Create a single OSD using partition <DEV> for data\n"
|
" --data_device <DEV> Create a single OSD using partition <DEV> for data\n"
|
||||||
" --meta_device <DEV> Create a single OSD using partition <DEV> for metadata\n"
|
" --meta_device <DEV> Create a single OSD using partition <DEV> for metadata\n"
|
||||||
" --journal_device <DEV> Create a single OSD using partition <DEV> for journal\n"
|
" --journal_device <DEV> Create a single OSD using partition <DEV> for journal\n"
|
||||||
|
" --force Bypass checks on data/meta/journal partitions\n"
|
||||||
" --journal_size 1G/32M Set journal size\n"
|
" --journal_size 1G/32M Set journal size\n"
|
||||||
" --object_size 1M/128k Set blockstore object size\n"
|
" --object_size 1M/128k Set blockstore object size\n"
|
||||||
" --disable_ssd_cache 1 Disable cache and fsyncs for SSD journal and metadata\n"
|
" --disable_ssd_cache 1 Disable cache and fsyncs for SSD journal and metadata\n"
|
||||||
|
@ -43,8 +44,8 @@ static const char *help_text =
|
||||||
" metadata size to ease possible future extension. The default is to allocate\n"
|
" metadata size to ease possible future extension. The default is to allocate\n"
|
||||||
" 2 times more space and at least 1G. Use this option to override.\n"
|
" 2 times more space and at least 1G. Use this option to override.\n"
|
||||||
" --max_other 10%\n"
|
" --max_other 10%\n"
|
||||||
" Use disks for OSD data even if they already have non-Vitastor partitions,\n"
|
" In the automatic mode, use disks for OSD data even if they already have non-Vitastor\n"
|
||||||
" but only if these take up no more than this percent of disk space.\n"
|
" partitions, but only if these take up no more than this percent of disk space.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"vitastor-disk upgrade-simple <UNIT_FILE|OSD_NUMBER>\n"
|
"vitastor-disk upgrade-simple <UNIT_FILE|OSD_NUMBER>\n"
|
||||||
" Upgrade an OSD created by old (0.7.1 and older) make-osd.sh or make-osd-hybrid.js scripts.\n"
|
" Upgrade an OSD created by old (0.7.1 and older) make-osd.sh or make-osd-hybrid.js scripts.\n"
|
||||||
|
@ -149,7 +150,7 @@ int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
self.options["hybrid"] = "1";
|
self.options["hybrid"] = "1";
|
||||||
}
|
}
|
||||||
else if (!strcmp(argv[i], "--help"))
|
else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
|
||||||
{
|
{
|
||||||
cmd.insert(cmd.begin(), (char*)"help");
|
cmd.insert(cmd.begin(), (char*)"help");
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,12 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
||||||
const auto & dev = all_devs[i];
|
const auto & dev = all_devs[i];
|
||||||
if (dev == "")
|
if (dev == "")
|
||||||
continue;
|
continue;
|
||||||
|
if (dev.substr(0, 22) != "/dev/disk/by-partuuid/")
|
||||||
|
{
|
||||||
|
// Partitions should be identified by GPT partition UUID
|
||||||
|
fprintf(stderr, "%s does not start with /dev/disk/by-partuuid/. Partitions should be identified by GPT partition UUIDs\n", dev.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
std::string real_dev = realpath_str(dev, false);
|
std::string real_dev = realpath_str(dev, false);
|
||||||
if (real_dev == "")
|
if (real_dev == "")
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -42,11 +48,11 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (i == 0 && is_hdd == -1)
|
if (i == 0 && is_hdd == -1)
|
||||||
is_hdd = read_file("/sys/block/"+parent_dev+"/queue/rotational") == "1";
|
is_hdd = trim(read_file("/sys/block/"+parent_dev+"/queue/rotational")) == "1";
|
||||||
std::string out;
|
std::string out;
|
||||||
if (shell_exec({ "blkid", "-D", "-p", dev }, "", &out, NULL) == 0)
|
if (shell_exec({ "wipefs", dev }, "", &out, NULL) != 0 || out != "")
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s contains data, not creating OSD without --force. blkid -D -p says:\n%s", dev.c_str(), out.c_str());
|
fprintf(stderr, "%s contains data, not creating OSD without --force. wipefs shows:\n%s", dev.c_str(), out.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
json11::Json sb = read_osd_superblock(dev, false);
|
json11::Json sb = read_osd_superblock(dev, false);
|
||||||
|
@ -143,17 +149,28 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
||||||
}
|
}
|
||||||
dsk.close_all();
|
dsk.close_all();
|
||||||
// Write superblocks
|
// Write superblocks
|
||||||
if (!write_osd_superblock(options["data_device"], sb) ||
|
bool sep_m = options["meta_device"] != "" &&
|
||||||
options["meta_device"] != "" &&
|
options["meta_device"] != options["data_device"];
|
||||||
options["meta_device"] != options["data_device"] &&
|
bool sep_j = options["journal_device"] != "" &&
|
||||||
write_osd_superblock(options["meta_device"], sb) ||
|
|
||||||
options["journal_device"] != "" &&
|
|
||||||
options["journal_device"] != options["data_device"] &&
|
options["journal_device"] != options["data_device"] &&
|
||||||
options["journal_device"] != options["meta_device"] &&
|
options["journal_device"] != options["meta_device"];
|
||||||
!write_osd_superblock(options["journal_device"], sb))
|
if (!write_osd_superblock(options["data_device"], sb) ||
|
||||||
|
sep_m && !write_osd_superblock(options["meta_device"], sb) ||
|
||||||
|
sep_j && !write_osd_superblock(options["journal_device"], sb))
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
auto desc = realpath_str(options["data_device"]);
|
||||||
|
if (sep_m)
|
||||||
|
desc += " with metadata on "+realpath_str(options["meta_device"]);
|
||||||
|
if (sep_j)
|
||||||
|
desc += (sep_m ? " and journal on " : " with journal on ") + realpath_str(options["journal_device"]);
|
||||||
|
fprintf(stderr, "Initialized OSD %lu on %s\n", osd_num, desc.c_str());
|
||||||
|
if (shell_exec({ "systemctl", "enable", "--now", "vitastor-osd@"+std::to_string(osd_num) }, "", NULL, NULL) != 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to enable systemd unit vitastor-osd@%lu\n", osd_num);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -190,7 +207,7 @@ std::vector<vitastor_dev_info_t> disk_tool_t::collect_devices(const std::vector<
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
// Check if the device is an SSD
|
// Check if the device is an SSD
|
||||||
bool is_hdd = read_file("/sys/block/"+dev.substr(5)+"/queue/rotational") == "1";
|
bool is_hdd = trim(read_file("/sys/block/"+dev.substr(5)+"/queue/rotational")) == "1";
|
||||||
// Check if it has a partition table
|
// Check if it has a partition table
|
||||||
json11::Json pt = read_parttable(dev);
|
json11::Json pt = read_parttable(dev);
|
||||||
if (pt.is_bool() && !pt.bool_value())
|
if (pt.is_bool() && !pt.bool_value())
|
||||||
|
@ -202,8 +219,8 @@ std::vector<vitastor_dev_info_t> disk_tool_t::collect_devices(const std::vector<
|
||||||
{
|
{
|
||||||
// No partition table
|
// No partition table
|
||||||
std::string out;
|
std::string out;
|
||||||
int r = shell_exec({ "blkid", "-p", dev }, "", &out, NULL);
|
int r = shell_exec({ "wipefs", dev }, "", &out, NULL);
|
||||||
if (r == 0)
|
if (r != 0 || out != "")
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s contains data, skipping:\n %s\n", dev.c_str(), str_replace(trim(out), "\n", "\n ").c_str());
|
fprintf(stderr, "%s contains data, skipping:\n %s\n", dev.c_str(), str_replace(trim(out), "\n", "\n ").c_str());
|
||||||
continue;
|
continue;
|
||||||
|
@ -244,9 +261,9 @@ json11::Json disk_tool_t::add_partitions(vitastor_dev_info_t & devinfo, std::vec
|
||||||
{
|
{
|
||||||
if (kv.first != "node")
|
if (kv.first != "node")
|
||||||
{
|
{
|
||||||
script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump());
|
|
||||||
if (n++)
|
if (n++)
|
||||||
script += ", ";
|
script += ", ";
|
||||||
|
script += kv.first+"="+(kv.second.is_string() ? kv.second.string_value() : kv.second.dump());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
script += "\n";
|
script += "\n";
|
||||||
|
@ -510,6 +527,11 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
|
||||||
fprintf(stderr, "No SSDs found\n");
|
fprintf(stderr, "No SSDs found\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
else if (ssds.size() == devinfo.size())
|
||||||
|
{
|
||||||
|
fprintf(stderr, "No HDDs found\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
if (options["journal_size"] == "")
|
if (options["journal_size"] == "")
|
||||||
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
|
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
|
||||||
}
|
}
|
||||||
|
@ -521,7 +543,7 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
|
||||||
for (const auto & uuid: get_new_data_parts(dev, osd_per_disk, max_other_percent))
|
for (const auto & uuid: get_new_data_parts(dev, osd_per_disk, max_other_percent))
|
||||||
{
|
{
|
||||||
options["force"] = true;
|
options["force"] = true;
|
||||||
options["data_device"] = "/dev/disk/by-uuid/"+strtolower(uuid);
|
options["data_device"] = "/dev/disk/by-partuuid/"+strtolower(uuid);
|
||||||
if (hybrid)
|
if (hybrid)
|
||||||
{
|
{
|
||||||
// Select/create journal and metadata partitions
|
// Select/create journal and metadata partitions
|
||||||
|
|
|
@ -110,11 +110,6 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
|
||||||
free(buf);
|
free(buf);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Lock the file
|
|
||||||
if (flock(fd, LOCK_EX|LOCK_NB) < 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Warning: Failed to lock %s with flock - udev autodetection may fail. Error: %s\n", device.c_str(), strerror(errno));
|
|
||||||
}
|
|
||||||
int r = write_blocking(fd, buf, buf_len);
|
int r = write_blocking(fd, buf, buf_len);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
{
|
{
|
||||||
|
@ -125,6 +120,7 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
|
||||||
}
|
}
|
||||||
close(fd);
|
close(fd);
|
||||||
free(buf);
|
free(buf);
|
||||||
|
shell_exec({ "udevadm", "trigger", "--settle", device }, "", NULL, NULL);
|
||||||
return sb_size;
|
return sb_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -663,9 +663,9 @@ void osd_t::apply_pg_config()
|
||||||
{
|
{
|
||||||
printf(
|
printf(
|
||||||
"[OSD %lu] My block_size and bitmap_granularity are %u/%u"
|
"[OSD %lu] My block_size and bitmap_granularity are %u/%u"
|
||||||
", but pool has %u/%u. Refusing to start PGs of this pool\n",
|
", but pool %u has %u/%u. Refusing to start PGs of this pool\n",
|
||||||
this->osd_num, bs_block_size, bs_bitmap_granularity,
|
this->osd_num, bs_block_size, bs_bitmap_granularity,
|
||||||
pool_item.second.data_block_size, pool_item.second.bitmap_granularity
|
pool_id, pool_item.second.data_block_size, pool_item.second.bitmap_granularity
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
warned_block_size = true;
|
warned_block_size = true;
|
||||||
|
|
Loading…
Reference in New Issue