Automatically check whether to disable cache during prepare

rm-left-on-dead
Vitaliy Filippov 2022-09-03 00:48:55 +03:00
parent bd11db5d0a
commit 9481456dfe
7 changed files with 98 additions and 80 deletions

View File

@ -54,10 +54,10 @@ Options (automatic mode):
by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object
size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB, size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,
and throttle_small_writes is enabled by default. and throttle_small_writes is enabled by default.
--disable_data_fsync 1 --disable_data_fsync auto
Disable data device cache and fsync (1/yes/true = on, default on) Disable data device cache and fsync (1/yes/true = on, default auto)
--disable_meta_fsync 1 --disable_meta_fsync auto
Disable metadata/journal device cache and fsync (default on) Disable metadata/journal device cache and fsync (default auto)
--meta_reserve 2x,1G --meta_reserve 2x,1G
New metadata partitions in --hybrid mode are created larger than actual New metadata partitions in --hybrid mode are created larger than actual
metadata size to ease possible future extension. The default is to allocate metadata size to ease possible future extension. The default is to allocate

View File

@ -54,10 +54,10 @@ vitastor-disk - инструмент командной строки для уп
по флагу `/sys/block/.../queue/rotational`. В гибридном режиме по умолчанию по флагу `/sys/block/.../queue/rotational`. В гибридном режиме по умолчанию
используется размер объекта 1 МБ вместо 128 КБ, размер журнала 1 ГБ вместо 32 МБ используется размер объекта 1 МБ вместо 128 КБ, размер журнала 1 ГБ вместо 32 МБ
и включённый throttle_small_writes. и включённый throttle_small_writes.
--disable_data_fsync 1 --disable_data_fsync auto
Отключать кэш и fsync-и для устройств данных (1/yes/true = да, по умолчанию да) Отключать кэш и fsync-и для устройств данных. (1/yes/true = да, по умолчанию автоопределение)
--disable_meta_fsync 1 --disable_meta_fsync auto
Отключать кэш и fsync-и для журналов и метаданных (по умолчанию да) Отключать кэш и fsync-и для журналов и метаданных (по умолчанию автоопределение)
--meta_reserve 2x,1G --meta_reserve 2x,1G
В гибридном режиме для метаданных выделяется больше места, чем нужно на самом В гибридном режиме для метаданных выделяется больше места, чем нужно на самом
деле, чтобы оставить запас под будущее расширение. По умолчанию выделяется деле, чтобы оставить запас под будущее расширение. По умолчанию выделяется

View File

@ -33,10 +33,10 @@ static const char *help_text =
" by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object\n" " by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object\n"
" size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,\n" " size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,\n"
" and throttle_small_writes is enabled by default.\n" " and throttle_small_writes is enabled by default.\n"
" --disable_data_fsync 1\n" " --disable_data_fsync auto\n"
" Disable data device cache and fsync (1/yes/true = on, default on)\n" " Disable data device cache and fsync (1/yes/true = on, default auto)\n"
" --disable_meta_fsync 1\n" " --disable_meta_fsync auto\n"
" Disable metadata/journal device cache and fsync (default on)\n" " Disable metadata/journal device cache and fsync (default auto)\n"
" --meta_reserve 2x,1G\n" " --meta_reserve 2x,1G\n"
" New metadata partitions in --hybrid mode are created larger than actual\n" " New metadata partitions in --hybrid mode are created larger than actual\n"
" metadata size to ease possible future extension. The default is to allocate\n" " metadata size to ease possible future extension. The default is to allocate\n"

View File

@ -131,7 +131,7 @@ void fromhexstr(const std::string & from, int bytes, uint8_t *to);
std::string realpath_str(std::string path, bool nofail = true); std::string realpath_str(std::string path, bool nofail = true);
std::string read_all_fd(int fd); std::string read_all_fd(int fd);
std::string read_file(std::string file, bool allow_enoent = false); std::string read_file(std::string file, bool allow_enoent = false);
int check_queue_cache(std::string dev, std::string parent_dev); int disable_cache(std::string dev);
std::string get_parent_device(std::string dev); std::string get_parent_device(std::string dev);
bool json_is_true(const json11::Json & val); bool json_is_true(const json11::Json & val);
int shell_exec(const std::vector<std::string> & cmd, const std::string & in, std::string *out, std::string *err); int shell_exec(const std::vector<std::string> & cmd, const std::string & in, std::string *out, std::string *err);

View File

@ -63,6 +63,21 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
} }
} }
} }
for (auto dev: std::vector<std::string>{"data", "meta", "journal"})
{
if (options[dev+"_device"] != "" && options["disable_"+dev+"_fsync"] == "auto")
{
int r = disable_cache(realpath_str(options[dev+"_device"], false));
if (r != 0)
{
if (r == 1)
fprintf(stderr, "Warning: disable_%s_fsync is auto, but cache status check failed. Leaving fsync on\n", dev.c_str());
options["disable_"+dev+"_fsync"] = "0";
}
else
options["disable_"+dev+"_fsync"] = "1";
}
}
// Calculate offsets if the same device is used for two or more of data, meta, and journal // Calculate offsets if the same device is used for two or more of data, meta, and journal
if (options["journal_size"] == "") if (options["journal_size"] == "")
{ {
@ -107,6 +122,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
{ "disable_data_fsync", json_is_true(options["disable_data_fsync"]) }, { "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
{ "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) }, { "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },
{ "disable_journal_fsync", json_is_true(options["disable_journal_fsync"]) }, { "disable_journal_fsync", json_is_true(options["disable_journal_fsync"]) },
{ "skip_cache_check", json_is_true(options["skip_cache_check"]) },
{ "immediate_commit", json_is_true(options["disable_data_fsync"]) { "immediate_commit", json_is_true(options["disable_data_fsync"])
? (json_is_true(options["disable_journal_fsync"]) ? "all" : "small") : "none" }, ? (json_is_true(options["disable_journal_fsync"]) ? "all" : "small") : "none" },
}; };
@ -518,11 +534,11 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
max_other_percent = 100; max_other_percent = 100;
std::vector<vitastor_dev_info_t> ssds; std::vector<vitastor_dev_info_t> ssds;
if (options.find("disable_data_fsync") == options.end()) if (options.find("disable_data_fsync") == options.end())
options["disable_data_fsync"] = "1"; options["disable_data_fsync"] = "auto";
if (hybrid) if (hybrid)
{ {
if (options.find("disable_meta_fsync") == options.end()) if (options.find("disable_meta_fsync") == options.end())
options["disable_meta_fsync"] = "1"; options["disable_meta_fsync"] = "auto";
options["disable_journal_fsync"] = options["disable_meta_fsync"]; options["disable_journal_fsync"] = options["disable_meta_fsync"];
for (auto & dev: devinfo) for (auto & dev: devinfo)
if (!dev.is_hdd) if (!dev.is_hdd)

View File

@ -2,7 +2,6 @@
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
#include <sys/file.h> #include <sys/file.h>
#include <dirent.h>
#include "disk_tool.h" #include "disk_tool.h"
#include "rw_blocking.h" #include "rw_blocking.h"
@ -311,69 +310,6 @@ int disk_tool_t::exec_osd(std::string device)
return 0; return 0;
} }
// returns 1 = warning, -1 = error, 0 = success
static int disable_cache(std::string dev)
{
auto parent_dev = get_parent_device(dev);
if (parent_dev == "")
return 1;
auto scsi_disk = "/sys/block/"+parent_dev+"/device/scsi_disk";
DIR *dir = opendir(scsi_disk.c_str());
if (!dir)
{
if (errno == ENOENT)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
return check_queue_cache(dev.substr(5), parent_dev);
}
else
{
fprintf(stderr, "Can't read directory %s: %s\n", scsi_disk.c_str(), strerror(errno));
return 1;
}
}
else
{
dirent *de = readdir(dir);
while (de && de->d_name[0] == '.' && (de->d_name[1] == 0 || de->d_name[1] == '.' && de->d_name[2] == 0))
de = readdir(dir);
if (!de)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
closedir(dir);
return check_queue_cache(dev.substr(5), parent_dev);
}
scsi_disk += "/";
scsi_disk += de->d_name;
if (readdir(dir) != NULL)
{
// Error, multiple scsi_disk/* entries
closedir(dir);
fprintf(stderr, "Multiple entries in %s found\n", scsi_disk.c_str());
return 1;
}
closedir(dir);
// Check cache_type
scsi_disk += "/cache_type";
std::string cache_type = read_file(scsi_disk);
if (cache_type == "")
return 1;
if (cache_type == "write back")
{
int fd = open(scsi_disk.c_str(), O_WRONLY);
if (fd < 0 || write_blocking(fd, (void*)"write through", strlen("write through")) != strlen("write through"))
{
if (fd >= 0)
close(fd);
fprintf(stderr, "Can't write to %s: %s\n", scsi_disk.c_str(), strerror(errno));
return -1;
}
close(fd);
}
}
return 0;
}
static int check_disabled_cache(std::string dev) static int check_disabled_cache(std::string dev)
{ {
int r = disable_cache(dev); int r = disable_cache(dev);

View File

@ -2,6 +2,7 @@
// License: VNPL-1.1 (see README.md for details) // License: VNPL-1.1 (see README.md for details)
#include <sys/wait.h> #include <sys/wait.h>
#include <dirent.h>
#include "disk_tool.h" #include "disk_tool.h"
#include "rw_blocking.h" #include "rw_blocking.h"
@ -88,7 +89,9 @@ std::string read_file(std::string file, bool allow_enoent)
return res; return res;
} }
int check_queue_cache(std::string dev, std::string parent_dev) // returns 1 = check error, 0 = write through, -1 = write back
// (similar to 1 = warning, -1 = error, 0 = success in disable_cache)
static int check_queue_cache(std::string dev, std::string parent_dev)
{ {
auto r = read_file("/sys/block/"+dev+"/queue/write_cache", true); auto r = read_file("/sys/block/"+dev+"/queue/write_cache", true);
if (r == "") if (r == "")
@ -98,6 +101,69 @@ int check_queue_cache(std::string dev, std::string parent_dev)
return trim(r) == "write through" ? 0 : -1; return trim(r) == "write through" ? 0 : -1;
} }
// returns 1 = warning, -1 = error, 0 = success
int disable_cache(std::string dev)
{
auto parent_dev = get_parent_device(dev);
if (parent_dev == "")
return 1;
auto scsi_disk = "/sys/block/"+parent_dev+"/device/scsi_disk";
DIR *dir = opendir(scsi_disk.c_str());
if (!dir)
{
if (errno == ENOENT)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
return check_queue_cache(dev.substr(5), parent_dev);
}
else
{
fprintf(stderr, "Can't read directory %s: %s\n", scsi_disk.c_str(), strerror(errno));
return 1;
}
}
else
{
dirent *de = readdir(dir);
while (de && de->d_name[0] == '.' && (de->d_name[1] == 0 || de->d_name[1] == '.' && de->d_name[2] == 0))
de = readdir(dir);
if (!de)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
closedir(dir);
return check_queue_cache(dev.substr(5), parent_dev);
}
scsi_disk += "/";
scsi_disk += de->d_name;
if (readdir(dir) != NULL)
{
// Error, multiple scsi_disk/* entries
closedir(dir);
fprintf(stderr, "Multiple entries in %s found\n", scsi_disk.c_str());
return 1;
}
closedir(dir);
// Check cache_type
scsi_disk += "/cache_type";
std::string cache_type = read_file(scsi_disk);
if (cache_type == "")
return 1;
if (cache_type == "write back")
{
int fd = open(scsi_disk.c_str(), O_WRONLY);
if (fd < 0 || write_blocking(fd, (void*)"write through", strlen("write through")) != strlen("write through"))
{
if (fd >= 0)
close(fd);
fprintf(stderr, "Can't write to %s: %s\n", scsi_disk.c_str(), strerror(errno));
return -1;
}
close(fd);
}
}
return 0;
}
std::string get_parent_device(std::string dev) std::string get_parent_device(std::string dev)
{ {
if (dev.substr(0, 5) != "/dev/") if (dev.substr(0, 5) != "/dev/")