Automatically check whether to disable cache during prepare

rm-left-on-dead
Vitaliy Filippov 2022-09-03 00:48:55 +03:00
parent bd11db5d0a
commit 9481456dfe
7 changed files with 98 additions and 80 deletions

View File

@ -54,10 +54,10 @@ Options (automatic mode):
by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object
size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,
and throttle_small_writes is enabled by default.
--disable_data_fsync 1
Disable data device cache and fsync (1/yes/true = on, default on)
--disable_meta_fsync 1
Disable metadata/journal device cache and fsync (default on)
--disable_data_fsync auto
Disable data device cache and fsync (1/yes/true = on, default auto)
--disable_meta_fsync auto
Disable metadata/journal device cache and fsync (default auto)
--meta_reserve 2x,1G
New metadata partitions in --hybrid mode are created larger than actual
metadata size to ease possible future extension. The default is to allocate

View File

@ -54,10 +54,10 @@ vitastor-disk - инструмент командной строки для уп
по флагу `/sys/block/.../queue/rotational`. В гибридном режиме по умолчанию
используется размер объекта 1 МБ вместо 128 КБ, размер журнала 1 ГБ вместо 32 МБ
и включённый throttle_small_writes.
--disable_data_fsync 1
Отключать кэш и fsync-и для устройств данных (1/yes/true = да, по умолчанию да)
--disable_meta_fsync 1
Отключать кэш и fsync-и для журналов и метаданных (по умолчанию да)
--disable_data_fsync auto
Отключать кэш и fsync-и для устройств данных. (1/yes/true = да, по умолчанию автоопределение)
--disable_meta_fsync auto
Отключать кэш и fsync-и для журналов и метаданных (по умолчанию автоопределение)
--meta_reserve 2x,1G
В гибридном режиме для метаданных выделяется больше места, чем нужно на самом
деле, чтобы оставить запас под будущее расширение. По умолчанию выделяется

View File

@ -33,10 +33,10 @@ static const char *help_text =
" by the `/sys/block/.../queue/rotational` flag. In hybrid mode, default object\n"
" size is 1 MB instead of 128 KB, default journal size is 1 GB instead of 32 MB,\n"
" and throttle_small_writes is enabled by default.\n"
" --disable_data_fsync 1\n"
" Disable data device cache and fsync (1/yes/true = on, default on)\n"
" --disable_meta_fsync 1\n"
" Disable metadata/journal device cache and fsync (default on)\n"
" --disable_data_fsync auto\n"
" Disable data device cache and fsync (1/yes/true = on, default auto)\n"
" --disable_meta_fsync auto\n"
" Disable metadata/journal device cache and fsync (default auto)\n"
" --meta_reserve 2x,1G\n"
" New metadata partitions in --hybrid mode are created larger than actual\n"
" metadata size to ease possible future extension. The default is to allocate\n"

View File

@ -131,7 +131,7 @@ void fromhexstr(const std::string & from, int bytes, uint8_t *to);
std::string realpath_str(std::string path, bool nofail = true);
std::string read_all_fd(int fd);
std::string read_file(std::string file, bool allow_enoent = false);
int check_queue_cache(std::string dev, std::string parent_dev);
int disable_cache(std::string dev);
std::string get_parent_device(std::string dev);
bool json_is_true(const json11::Json & val);
int shell_exec(const std::vector<std::string> & cmd, const std::string & in, std::string *out, std::string *err);

View File

@ -63,6 +63,21 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
}
}
}
for (auto dev: std::vector<std::string>{"data", "meta", "journal"})
{
if (options[dev+"_device"] != "" && options["disable_"+dev+"_fsync"] == "auto")
{
int r = disable_cache(realpath_str(options[dev+"_device"], false));
if (r != 0)
{
if (r == 1)
fprintf(stderr, "Warning: disable_%s_fsync is auto, but cache status check failed. Leaving fsync on\n", dev.c_str());
options["disable_"+dev+"_fsync"] = "0";
}
else
options["disable_"+dev+"_fsync"] = "1";
}
}
// Calculate offsets if the same device is used for two or more of data, meta, and journal
if (options["journal_size"] == "")
{
@ -107,6 +122,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
{ "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
{ "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },
{ "disable_journal_fsync", json_is_true(options["disable_journal_fsync"]) },
{ "skip_cache_check", json_is_true(options["skip_cache_check"]) },
{ "immediate_commit", json_is_true(options["disable_data_fsync"])
? (json_is_true(options["disable_journal_fsync"]) ? "all" : "small") : "none" },
};
@ -518,11 +534,11 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
max_other_percent = 100;
std::vector<vitastor_dev_info_t> ssds;
if (options.find("disable_data_fsync") == options.end())
options["disable_data_fsync"] = "1";
options["disable_data_fsync"] = "auto";
if (hybrid)
{
if (options.find("disable_meta_fsync") == options.end())
options["disable_meta_fsync"] = "1";
options["disable_meta_fsync"] = "auto";
options["disable_journal_fsync"] = options["disable_meta_fsync"];
for (auto & dev: devinfo)
if (!dev.is_hdd)

View File

@ -2,7 +2,6 @@
// License: VNPL-1.1 (see README.md for details)
#include <sys/file.h>
#include <dirent.h>
#include "disk_tool.h"
#include "rw_blocking.h"
@ -311,69 +310,6 @@ int disk_tool_t::exec_osd(std::string device)
return 0;
}
// returns 1 = warning, -1 = error, 0 = success
static int disable_cache(std::string dev)
{
auto parent_dev = get_parent_device(dev);
if (parent_dev == "")
return 1;
auto scsi_disk = "/sys/block/"+parent_dev+"/device/scsi_disk";
DIR *dir = opendir(scsi_disk.c_str());
if (!dir)
{
if (errno == ENOENT)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
return check_queue_cache(dev.substr(5), parent_dev);
}
else
{
fprintf(stderr, "Can't read directory %s: %s\n", scsi_disk.c_str(), strerror(errno));
return 1;
}
}
else
{
dirent *de = readdir(dir);
while (de && de->d_name[0] == '.' && (de->d_name[1] == 0 || de->d_name[1] == '.' && de->d_name[2] == 0))
de = readdir(dir);
if (!de)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
closedir(dir);
return check_queue_cache(dev.substr(5), parent_dev);
}
scsi_disk += "/";
scsi_disk += de->d_name;
if (readdir(dir) != NULL)
{
// Error, multiple scsi_disk/* entries
closedir(dir);
fprintf(stderr, "Multiple entries in %s found\n", scsi_disk.c_str());
return 1;
}
closedir(dir);
// Check cache_type
scsi_disk += "/cache_type";
std::string cache_type = read_file(scsi_disk);
if (cache_type == "")
return 1;
if (cache_type == "write back")
{
int fd = open(scsi_disk.c_str(), O_WRONLY);
if (fd < 0 || write_blocking(fd, (void*)"write through", strlen("write through")) != strlen("write through"))
{
if (fd >= 0)
close(fd);
fprintf(stderr, "Can't write to %s: %s\n", scsi_disk.c_str(), strerror(errno));
return -1;
}
close(fd);
}
}
return 0;
}
static int check_disabled_cache(std::string dev)
{
int r = disable_cache(dev);

View File

@ -2,6 +2,7 @@
// License: VNPL-1.1 (see README.md for details)
#include <sys/wait.h>
#include <dirent.h>
#include "disk_tool.h"
#include "rw_blocking.h"
@ -88,7 +89,9 @@ std::string read_file(std::string file, bool allow_enoent)
return res;
}
int check_queue_cache(std::string dev, std::string parent_dev)
// returns 1 = check error, 0 = write through, -1 = write back
// (similar to 1 = warning, -1 = error, 0 = success in disable_cache)
static int check_queue_cache(std::string dev, std::string parent_dev)
{
auto r = read_file("/sys/block/"+dev+"/queue/write_cache", true);
if (r == "")
@ -98,6 +101,69 @@ int check_queue_cache(std::string dev, std::string parent_dev)
return trim(r) == "write through" ? 0 : -1;
}
// returns 1 = warning, -1 = error, 0 = success
int disable_cache(std::string dev)
{
auto parent_dev = get_parent_device(dev);
if (parent_dev == "")
return 1;
auto scsi_disk = "/sys/block/"+parent_dev+"/device/scsi_disk";
DIR *dir = opendir(scsi_disk.c_str());
if (!dir)
{
if (errno == ENOENT)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
return check_queue_cache(dev.substr(5), parent_dev);
}
else
{
fprintf(stderr, "Can't read directory %s: %s\n", scsi_disk.c_str(), strerror(errno));
return 1;
}
}
else
{
dirent *de = readdir(dir);
while (de && de->d_name[0] == '.' && (de->d_name[1] == 0 || de->d_name[1] == '.' && de->d_name[2] == 0))
de = readdir(dir);
if (!de)
{
// Not a SCSI/SATA device, just check /sys/block/.../queue/write_cache
closedir(dir);
return check_queue_cache(dev.substr(5), parent_dev);
}
scsi_disk += "/";
scsi_disk += de->d_name;
if (readdir(dir) != NULL)
{
// Error, multiple scsi_disk/* entries
closedir(dir);
fprintf(stderr, "Multiple entries in %s found\n", scsi_disk.c_str());
return 1;
}
closedir(dir);
// Check cache_type
scsi_disk += "/cache_type";
std::string cache_type = read_file(scsi_disk);
if (cache_type == "")
return 1;
if (cache_type == "write back")
{
int fd = open(scsi_disk.c_str(), O_WRONLY);
if (fd < 0 || write_blocking(fd, (void*)"write through", strlen("write through")) != strlen("write through"))
{
if (fd >= 0)
close(fd);
fprintf(stderr, "Can't write to %s: %s\n", scsi_disk.c_str(), strerror(errno));
return -1;
}
close(fd);
}
}
return 0;
}
std::string get_parent_device(std::string dev)
{
if (dev.substr(0, 5) != "/dev/")