From 6261809e873e932fe5e529adce7d6dbe482af44c Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sat, 12 Feb 2022 01:30:50 +0300 Subject: [PATCH] WIP Simplified NFS proxy --- .gitmodules | 3 + libnfs | 1 + src/CMakeLists.txt | 61 ++++ src/etcd_state_client.cpp | 8 + src/etcd_state_client.h | 1 + src/nfs_conn.cpp | 748 ++++++++++++++++++++++++++++++++++++++ src/nfs_portmap.cpp | 172 +++++++++ src/nfs_portmap.h | 41 +++ src/nfs_proxy.cpp | 301 +++++++++++++++ src/nfs_proxy.h | 47 +++ src/sha256.c | 158 ++++++++ src/sha256.h | 41 +++ 12 files changed, 1582 insertions(+) create mode 160000 libnfs create mode 100644 src/nfs_conn.cpp create mode 100644 src/nfs_portmap.cpp create mode 100644 src/nfs_portmap.h create mode 100644 src/nfs_proxy.cpp create mode 100644 src/nfs_proxy.h create mode 100644 src/sha256.c create mode 100644 src/sha256.h diff --git a/.gitmodules b/.gitmodules index c6636c20..e70fb78a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "json11"] path = json11 url = ../json11.git +[submodule "libnfs"] + path = libnfs + url = ../libnfs.git diff --git a/libnfs b/libnfs new file mode 160000 index 00000000..5a991e1f --- /dev/null +++ b/libnfs @@ -0,0 +1 @@ +Subproject commit 5a991e1fcbcdcca9a191c8e377b4cfb89142761e diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 446371ee..0dd0e36b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -152,6 +152,67 @@ target_link_libraries(vitastor-nbd vitastor_client ) +# vitastor-nfs +add_executable(vitastor-nfs + nfs_proxy.cpp + nfs_conn.cpp + nfs_portmap.cpp + sha256.c + ../libnfs/lib/init.c + ../libnfs/lib/pdu.c + ../libnfs/lib/libnfs-zdr.c + ../libnfs/lib/socket.c + ../libnfs/portmap/libnfs-raw-portmap.c + ../libnfs/nfs/libnfs-raw-nfs.c + ../libnfs/mount/libnfs-raw-mount.c +) +set_source_files_properties( + ../libnfs/nfs/libnfs-raw-nfs.c + PROPERTIES + COMPILE_FLAGS "-Wno-unused-but-set-variable" +) +# Simplified static configuration +# The other option is to build patched libnfs packages until all distros get my fixes +target_compile_options(vitastor-nfs + PRIVATE + -DHAVE_ARPA_INET_H + -DHAVE_INTTYPES_H + -DHAVE_MEMORY_H + -DHAVE_NETDB_H + -DHAVE_NETINET_IN_H + -DHAVE_NETINET_TCP_H + -DHAVE_NET_IF_H + -DHAVE_POLL_H + -DHAVE_STDINT_H + -DHAVE_STDLIB_H + -DHAVE_STRINGS_H + -DHAVE_STRING_H + -DHAVE_SYS_IOCTL_H + -DHAVE_SYS_SOCKET_H + -DHAVE_SYS_STATVFS_H + -DHAVE_SYS_STAT_H + -DHAVE_SYS_SYSMACROS_H + -DHAVE_SYS_TIME_H + -DHAVE_SYS_TYPES_H + -DHAVE_SYS_VFS_H + -DHAVE_UNISTD_H + -DHAVE_UTIME_H + -DHAVE_SOCKADDR_STORAGE + -DHAVE_STRUCT_STAT_ST_MTIM_TV_NSEC + -D_U_= +) +target_include_directories(vitastor-nfs + PRIVATE + ../libnfs/include + ../libnfs/include/nfsc + ../libnfs/portmap + ../libnfs/nfs + ../libnfs/mount +) +target_link_libraries(vitastor-nfs + vitastor_client +) + # vitastor-cli add_executable(vitastor-cli cli.cpp cli_common.cpp cli_alloc_osd.cpp cli_simple_offsets.cpp cli_status.cpp cli_df.cpp diff --git a/src/etcd_state_client.cpp b/src/etcd_state_client.cpp index af726a15..19a7be3a 100644 --- a/src/etcd_state_client.cpp +++ b/src/etcd_state_client.cpp @@ -954,6 +954,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv) } if (!value.is_object()) { + if (on_inode_change_hook != NULL) + { + on_inode_change_hook(inode_num, true); + } this->inode_config.erase(inode_num); } else @@ -995,6 +999,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv) } } } + if (on_inode_change_hook != NULL) + { + on_inode_change_hook(inode_num, false); + } } } } diff --git a/src/etcd_state_client.h b/src/etcd_state_client.h index 6ebc2ae7..79cbb70f 100644 --- a/src/etcd_state_client.h +++ b/src/etcd_state_client.h @@ -109,6 +109,7 @@ public: std::function on_change_pg_history_hook; std::function on_change_osd_state_hook; std::function on_reload_hook; + std::function on_inode_change_hook; json11::Json::object serialize_inode_cfg(inode_config_t *cfg); etcd_kv_t parse_etcd_kv(const json11::Json & kv_json); diff --git a/src/nfs_conn.cpp b/src/nfs_conn.cpp new file mode 100644 index 00000000..c38fd835 --- /dev/null +++ b/src/nfs_conn.cpp @@ -0,0 +1,748 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) +// +// NFS connection handler for NFS proxy + +#include + +#include "libnfs-raw-mount.h" +#include "libnfs-raw-nfs.h" + +#include "base64.h" + +#include "nfs_proxy.h" + +static unsigned len_pad4(unsigned len) +{ + return len + (len&3 ? 4-(len&3) : 0); +} + +static int nfs3_null_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0); + return 0; +} + +static fattr3 get_dir_attributes(nfs_client_t *self, std::string dir) +{ + return (fattr3){ + .type = NF3DIR, + .mode = 0755, + .nlink = 1, + .uid = 0, + .gid = 0, + .size = 4096, + .used = 4096, + .rdev = (specdata3){ 0 }, + .fsid = self->parent->fsid, + .fileid = dir == "" ? 1 : self->parent->dir_ids.at(dir), + //.atime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + //.mtime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + //.ctime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + }; +} + +static int nfs3_getattr_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + GETATTR3args *args = (GETATTR3args*)call->body.cbody.args; + GETATTR3res reply; + std::string dirhash = std::string(args->object.data.data_val, args->object.data.data_len); + bool is_dir = false; + std::string dir; + if (dirhash == "roothandle") + is_dir = true; + else + { + auto dir_it = self->parent->dir_by_hash.find(dirhash); + if (dir_it != self->parent->dir_by_hash.end()) + { + is_dir = true; + dir = dir_it->second; + } + } + if (is_dir) + { + // Directory info + reply.status = NFS3_OK; + reply.GETATTR3res_u.resok.obj_attributes = get_dir_attributes(self, dir); + } + else + { + uint64_t inode_num; + auto inode_num_it = self->parent->inode_by_hash.find(dirhash); + if (inode_num_it != self->parent->inode_by_hash.end()) + inode_num = inode_num_it->second; + auto inode_it = self->parent->cli->st_cli.inode_config.find(inode_num); + if (inode_it != self->parent->cli->st_cli.inode_config.end()) + { + // File info + auto & inode_cfg = inode_it->second; + reply.status = NFS3_OK; + reply.GETATTR3res_u.resok.obj_attributes = { + .type = NF3REG, + .mode = 0644, + .nlink = 1, + .uid = 0, + .gid = 0, + .size = inode_cfg.size, + .used = inode_cfg.size, + .rdev = (specdata3){ 0 }, + .fsid = self->parent->fsid, + .fileid = inode_it->first, + //.atime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + //.mtime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + //.ctime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + }; + } + else + { + // File not exists + reply.status = NFS3ERR_NOENT; + } + } + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_GETATTR3res, sizeof(GETATTR3res)); + return 0; +} + +static int nfs3_setattr_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + SETATTR3args *args = (SETATTR3args*)call->body.cbody.args; + SETATTR3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_SETATTR3res, sizeof(SETATTR3res)); + return 0; +} + +static int nfs3_lookup_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + LOOKUP3args *args = (LOOKUP3args*)call->body.cbody.args; + LOOKUP3res reply; + std::string dirhash = std::string(args->what.dir.data.data_val, args->what.dir.data.data_len); + std::string dir; + if (dirhash != "roothandle") + { + auto dir_it = self->parent->dir_by_hash.find(dirhash); + if (dir_it != self->parent->dir_by_hash.end()) + dir = dir_it->second; + } + std::string full_name = self->parent->name_prefix; + if (dir != "") + { + full_name += dir+"/"; + } + full_name += std::string(args->what.name); + for (auto & ic: self->parent->cli->st_cli.inode_config) + { + if (ic.second.name == full_name) + { + std::string fh = "S"+base64_encode(sha256(full_name.substr(self->parent->name_prefix.size()))); + reply.status = NFS3_OK; + reply.LOOKUP3res_u.resok.object.data.data_len = fh.size(); + reply.LOOKUP3res_u.resok.object.data.data_val = (char*)fh.c_str(); + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_LOOKUP3res, sizeof(LOOKUP3res)); + return 0; + } + } + reply.status = NFS3ERR_NOENT; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_LOOKUP3res, sizeof(LOOKUP3res)); + return 0; +} + +static int nfs3_access_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + ACCESS3args *args = (ACCESS3args*)call->body.cbody.args; + ACCESS3res reply = { + .status = NFS3_OK, + .ACCESS3res_u = { .resok = { + .access = args->access, + } }, + }; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_ACCESS3res, sizeof(ACCESS3res)); + return 0; +} + +static int nfs3_readlink_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + READLINK3args *args = (READLINK3args*)call->body.cbody.args; + READLINK3res reply = {}; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READLINK3res, sizeof(READLINK3res)); + return 0; +} + +#define MAX_REQUEST_SIZE 128*1024*1024 + +static int nfs3_read_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + READ3args *args = (READ3args*)call->body.cbody.args; + std::string handle = std::string(args->file.data.data_val, args->file.data.data_len); + auto ino_it = self->parent->inode_by_hash.find(handle); + if (ino_it == self->parent->inode_by_hash.end()) + { + READ3res reply = { .status = NFS3ERR_NOENT }; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READ3res, sizeof(READ3res)); + return 0; + } + if (args->count > MAX_REQUEST_SIZE) + { + READ3res reply = { .status = NFS3ERR_INVAL }; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READ3res, sizeof(READ3res)); + return 0; + } + void *buf = malloc_or_die(args->count); + cluster_op_t *op = new cluster_op_t; + op->opcode = OSD_OP_READ; + op->inode = ino_it->second; + op->offset = args->offset; + op->len = args->count; + op->iov.push_back(buf, args->count); + op->callback = [rpc, call](cluster_op_t *op) + { + void *buf = op->iov.buf[0].iov_base; + READ3res reply = {}; + if (op->retval != op->len) + { + if (op->retval == -EINVAL) + reply.status = NFS3ERR_INVAL; + else if (op->retval == -ENOSPC) + reply.status = NFS3ERR_NOSPC; + else + reply.status = NFS3ERR_IO; + } + else + { + reply.status = NFS3_OK; + auto & reply_ok = reply.READ3res_u.resok; + reply_ok.count = op->retval; + reply_ok.eof = FALSE; + reply_ok.data.data_len = reply_ok.count; + reply_ok.data.data_val = (char*)buf; + } + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READ3res, sizeof(READ3res)); + delete op; + free(buf); + }; + self->parent->cli->execute(op); + return 0; +} + +static int nfs3_write_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + WRITE3args *args = (WRITE3args*)call->body.cbody.args; + WRITE3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_WRITE3res, sizeof(WRITE3res)); + return 0; +} + +static int nfs3_create_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + CREATE3args *args = (CREATE3args*)call->body.cbody.args; + CREATE3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_CREATE3res, sizeof(CREATE3res)); + return 0; +} + +static int nfs3_mkdir_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + MKDIR3args *args = (MKDIR3args*)call->body.cbody.args; + MKDIR3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_MKDIR3res, sizeof(MKDIR3res)); + return 0; +} + +static int nfs3_symlink_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + SYMLINK3args *args = (SYMLINK3args*)call->body.cbody.args; + SYMLINK3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_SYMLINK3res, sizeof(SYMLINK3res)); + return 0; +} + +static int nfs3_mknod_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + MKNOD3args *args = (MKNOD3args*)call->body.cbody.args; + MKNOD3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_MKNOD3res, sizeof(MKNOD3res)); + return 0; +} + +static int nfs3_remove_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + REMOVE3args *args = (REMOVE3args*)call->body.cbody.args; + REMOVE3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_REMOVE3res, sizeof(REMOVE3res)); + return 0; +} + +static int nfs3_rmdir_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + RMDIR3args *args = (RMDIR3args*)call->body.cbody.args; + RMDIR3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_RMDIR3res, sizeof(RMDIR3res)); + return 0; +} + +static int nfs3_rename_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + RENAME3args *args = (RENAME3args*)call->body.cbody.args; + RENAME3res reply; + // Not supported yet + reply.status = NFS3ERR_NOTSUPP; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_RENAME3res, sizeof(RENAME3res)); + return 0; +} + +static int nfs3_link_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + LINK3args *args = (LINK3args*)call->body.cbody.args; + // We don't support hard links + LINK3res reply = { NFS3ERR_NOTSUPP }; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_LINK3res, sizeof(LINK3res)); + return 0; +} + +static void nfs3_readdir_common(struct rpc_context *rpc, struct rpc_msg *call, void *opaque, bool is_plus) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + READDIRPLUS3args plus_args; + READDIRPLUS3args *args = NULL; + if (is_plus) + args = ((READDIRPLUS3args*)call->body.cbody.args); + else + { + args = &plus_args; + READDIR3args *in_args = ((READDIR3args*)call->body.cbody.args); + args->dir = in_args->dir; + args->cookie = in_args->cookie; + *((uint64_t*)args->cookieverf) = *((uint64_t*)in_args->cookieverf); + args->dircount = 512; + args->maxcount = in_args->count; + } + std::string dirhash = std::string(args->dir.data.data_val, args->dir.data.data_len); + std::string dir; + if (dirhash != "roothandle") + { + auto dir_it = self->parent->dir_by_hash.find(dirhash); + if (dir_it != self->parent->dir_by_hash.end()) + dir = dir_it->second; + } + std::string prefix = self->parent->name_prefix; + if (dir != "") + { + prefix += dir+"/"; + } + //struct timespec now; + //clock_gettime(CLOCK_REALTIME, &now); + std::map entries; + std::vector handles; + for (auto & ic: self->parent->cli->st_cli.inode_config) + { + auto & inode_cfg = ic.second; + if (prefix != "" && inode_cfg.name.substr(0, prefix.size()) != prefix) + continue; + std::string subname = inode_cfg.name.substr(prefix.size()); + int p = 0; + while (p < subname.size() && subname[p] == '/') + p++; + if (p > 0) + subname = subname.substr(p); + if (subname.size() == 0) + continue; + p = 0; + while (p < subname.size() && subname[p] != '/') + p++; + if (p >= subname.size()) + { + entries[subname] = (struct entryplus3){ + // fileid will change when the user creates snapshots + // however, we hope that clients tolerate it well + // Linux does, even though it complains about "fileid changed" in dmesg + .fileid = ic.first, + }; + if (is_plus) + { + handles.push_back("S"+base64_encode(sha256(inode_cfg.name))); + entries[subname].name_attributes = { + .attributes_follow = TRUE, + .post_op_attr_u = { .attributes = { + .type = NF3REG, + .mode = 0644, + .nlink = 1, + .uid = 0, + .gid = 0, + .size = inode_cfg.size, + .used = inode_cfg.size, // FIXME take from statistics + .rdev = (specdata3){ 0 }, + .fsid = self->parent->fsid, + .fileid = ic.first, + //.atime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + //.mtime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + //.ctime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec }, + } }, + }; + entries[subname].name_handle = { + .handle_follows = TRUE, + .post_op_fh3_u = { .handle = { + .data = { + // FIXME: I really want ZDR with std::string + .data_len = handles[handles.size()-1].size(), + .data_val = (char*)handles[handles.size()-1].c_str(), + }, + } }, + }; + } + } + else + { + auto subdir = dir == "" ? subname.substr(0, p) : dir+"/"+subname.substr(0, p); + entries[subdir] = (struct entryplus3){ + // for directories, fileid will change when the user restarts proxy + .fileid = self->parent->dir_ids.at(subdir), + }; + if (is_plus) + { + handles.push_back("S"+base64_encode(sha256(subdir))); + entries[subdir].name_attributes = { + .attributes_follow = TRUE, + .post_op_attr_u = { .attributes = get_dir_attributes(self, subdir) }, + }; + entries[subdir].name_handle = { + .handle_follows = TRUE, + .post_op_fh3_u = { .handle = { + .data = { + // FIXME: I really want ZDR with std::string + .data_len = (unsigned)handles[handles.size()-1].size(), + .data_val = (char*)handles[handles.size()-1].c_str(), + }, + } }, + }; + } + } + } + // Offset results by the continuation cookie (equal to index in the listing) + uint64_t idx = 1; + void *prev = NULL; + for (auto it = entries.begin(); it != entries.end(); it++) + { + entryplus3 *entry = &it->second; + // First fields of entry3 and entryplus3 are the same: fileid, name, cookie + entry->name = (char*)it->first.c_str(); + entry->cookie = idx++; + if (prev) + { + if (is_plus) + ((entryplus3*)prev)->nextentry = entry; + else + ((entry3*)prev)->nextentry = (entry3*)entry; + } + prev = entry; + if (args->cookie > 0 && entry->cookie == args->cookie+1) + { + entries.erase(entries.begin(), it); + } + } + // Now limit results based on maximum reply size + // Sadly we have to calculate reply size by hand + // reply without entries is 4+4+(dir_attributes ? sizeof(fattr3) : 0)+8+4 bytes + int reply_size = 20; + if (reply_size > args->maxcount) + { + // Error, too small max reply size + if (is_plus) + { + READDIRPLUS3res reply = { .status = NFS3ERR_TOOSMALL }; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIRPLUS3res, sizeof(READDIRPLUS3res)); + } + else + { + READDIR3res reply = { .status = NFS3ERR_TOOSMALL }; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIR3res, sizeof(READDIR3res)); + } + return; + } + // 1 entry3 is (8+4+(filename_len+3)/4*4+8) bytes + // 1 entryplus3 is (8+4+(filename_len+3)/4*4+8 + // + 4+(name_attributes ? (sizeof(fattr3) = 84) : 0) + // + 4+(name_handle ? 4+(handle_len+3)/4*4 : 0)) bytes + bool eof = true; + for (auto it = entries.begin(); it != entries.end(); it++) + { + reply_size += 20+len_pad4(it->first.size())+(is_plus + ? 8+84+len_pad4(it->second.name_handle.post_op_fh3_u.handle.data.data_len) : 0); + if (reply_size > args->maxcount) + { + // Stop + entries.erase(it, entries.end()); + eof = false; + break; + } + } + if (entries.end() != entries.begin()) + { + auto last_it = entries.end(); + last_it--; + if (is_plus) + ((entryplus3*)&last_it->second)->nextentry = NULL; + else + ((entry3*)&last_it->second)->nextentry = NULL; + } + // Send reply + if (is_plus) + { + READDIRPLUS3res reply = { .status = NFS3_OK }; + *(uint64_t*)(reply.READDIRPLUS3res_u.resok.cookieverf) = self->parent->dir_mod_rev.at(dir); + reply.READDIRPLUS3res_u.resok.reply.entries = &entries.begin()->second; + reply.READDIRPLUS3res_u.resok.reply.eof = eof; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIRPLUS3res, sizeof(READDIRPLUS3res)); + } + else + { + READDIR3res reply = { .status = NFS3_OK }; + *(uint64_t*)(reply.READDIR3res_u.resok.cookieverf) = self->parent->dir_mod_rev.at(dir); + reply.READDIR3res_u.resok.reply.entries = (entry3*)&entries.begin()->second; + reply.READDIR3res_u.resok.reply.eof = eof; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIR3res, sizeof(READDIR3res)); + } +} + +static int nfs3_readdir_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs3_readdir_common(rpc, call, opaque, false); + return 0; +} + +static int nfs3_readdirplus_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs3_readdir_common(rpc, call, opaque, true); + return 0; +} + +// Get file system statistics +static int nfs3_fsstat_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + FSSTAT3args *args = (FSSTAT3args*)call->body.cbody.args; + FSSTAT3res reply; + reply.status = NFS3_OK; + reply.FSSTAT3res_u.resok.obj_attributes.attributes_follow = TRUE; + reply.FSSTAT3res_u.resok.obj_attributes.post_op_attr_u.attributes = get_dir_attributes(self, ""); + reply.FSSTAT3res_u.resok.tbytes = 4096; // total bytes + reply.FSSTAT3res_u.resok.fbytes = 4096; // free bytes + reply.FSSTAT3res_u.resok.abytes = 4096; // available bytes + reply.FSSTAT3res_u.resok.tfiles = 1 << 31; // total files + reply.FSSTAT3res_u.resok.ffiles = 1 << 31; // free files + reply.FSSTAT3res_u.resok.afiles = 1 << 31; // available files + reply.FSSTAT3res_u.resok.invarsec = 0; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_FSSTAT3res, sizeof(FSSTAT3res)); + return 0; +} + +static int nfs3_fsinfo_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + FSINFO3args *args = (FSINFO3args*)call->body.cbody.args; + FSINFO3res reply; + if (args->fsroot.data.data_len != 10) + { + // Example error + reply.status = NFS3ERR_INVAL; + } + else + { + // Fill info + reply.status = NFS3_OK; + reply.FSINFO3res_u.resok.obj_attributes.attributes_follow = TRUE; + reply.FSINFO3res_u.resok.obj_attributes.post_op_attr_u.attributes = get_dir_attributes(self, ""); + reply.FSINFO3res_u.resok.rtmax = 128*1024*1024; + reply.FSINFO3res_u.resok.rtpref = 128*1024*1024; + reply.FSINFO3res_u.resok.rtmult = 4096; + reply.FSINFO3res_u.resok.wtmax = 128*1024*1024; + reply.FSINFO3res_u.resok.wtpref = 128*1024*1024; + reply.FSINFO3res_u.resok.wtmult = 4096; + reply.FSINFO3res_u.resok.dtpref = 128; + reply.FSINFO3res_u.resok.maxfilesize = 0x7fffffffffffffff; + reply.FSINFO3res_u.resok.time_delta.seconds = 1; + reply.FSINFO3res_u.resok.time_delta.nseconds = 0; + reply.FSINFO3res_u.resok.properties = FSF3_SYMLINK | FSF3_HOMOGENEOUS; + } + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_FSINFO3res, sizeof(FSINFO3res)); + return 0; +} + +static int nfs3_pathconf_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + PATHCONF3args *args = (PATHCONF3args*)call->body.cbody.args; + PATHCONF3res reply; + if (args->object.data.data_len != 10) + { + // Example error + reply.status = NFS3ERR_INVAL; + } + else + { + // Fill info + reply.status = NFS3_OK; + reply.PATHCONF3res_u.resok.obj_attributes.attributes_follow = FALSE; + reply.PATHCONF3res_u.resok.linkmax = 0; + reply.PATHCONF3res_u.resok.name_max = 255; + reply.PATHCONF3res_u.resok.no_trunc = TRUE; + reply.PATHCONF3res_u.resok.chown_restricted = FALSE; + reply.PATHCONF3res_u.resok.case_insensitive = FALSE; + reply.PATHCONF3res_u.resok.case_preserving = TRUE; + } + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PATHCONF3res, sizeof(PATHCONF3res)); + return 0; +} + +static int nfs3_commit_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + COMMIT3args *args = (COMMIT3args*)call->body.cbody.args; + COMMIT3res reply = {}; + // Just pretend we did fsync :-) + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_COMMIT3res, sizeof(COMMIT3res)); + return 0; +} + +static int mount3_mnt_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + dirpath *arg = (dirpath*)call->body.cbody.args; + int flavor = AUTH_NONE; + mountres3 reply; + reply.fhs_status = MNT3_OK; + reply.mountres3_u.mountinfo.fhandle.fhandle3_len = 10; + reply.mountres3_u.mountinfo.fhandle.fhandle3_val = "roothandle"; + reply.mountres3_u.mountinfo.auth_flavors.auth_flavors_len = 1; + reply.mountres3_u.mountinfo.auth_flavors.auth_flavors_val = &flavor; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_mountres3, sizeof(mountres3)); + return 0; +} + +static int mount3_dump_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + mountlist reply; + reply = (struct mountbody*)malloc(sizeof(struct mountbody)); + reply->ml_hostname = (dirpath)"127.0.0.1"; + reply->ml_directory = (dirpath)"/test"; + reply->ml_next = NULL; + rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_mountlist, sizeof(mountlist)); + free(reply); + return 0; +} + +static int mount3_umnt_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + dirpath *arg = (dirpath*)call->body.cbody.args; + // do nothing + rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0); + return 0; +} + +static int mount3_umntall_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + // do nothing + rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0); + return 0; +} + +static int mount3_export_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + nfs_client_t *self = (nfs_client_t*)opaque; + exports reply; + reply = (struct exportnode*)malloc(sizeof(struct exportnode) + sizeof(struct groupnode)); + reply->ex_dir = (dirpath)"/test"; + reply->ex_groups = (struct groupnode*)(reply+1); + reply->ex_groups->gr_name = (dirpath)"127.0.0.1"; + reply->ex_groups->gr_next = NULL; + reply->ex_next = NULL; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_exports, sizeof(exports)); + free(reply); + return 0; +} + +nfs_client_t::nfs_client_t() +{ + struct service_proc nfs3_pt_a[22] = { + {NFS3_NULL, nfs3_null_proc, (zdrproc_t)zdr_void, 0, this}, + {NFS3_GETATTR, nfs3_getattr_proc, (zdrproc_t)zdr_GETATTR3args, sizeof(GETATTR3args), this}, + {NFS3_SETATTR, nfs3_setattr_proc, (zdrproc_t)zdr_SETATTR3args, sizeof(SETATTR3args), this}, + {NFS3_LOOKUP, nfs3_lookup_proc, (zdrproc_t)zdr_LOOKUP3args, sizeof(LOOKUP3args), this}, + {NFS3_ACCESS, nfs3_access_proc, (zdrproc_t)zdr_ACCESS3args, sizeof(ACCESS3args), this}, + {NFS3_READLINK, nfs3_readlink_proc, (zdrproc_t)zdr_READLINK3args, sizeof(READLINK3args), this}, + {NFS3_READ, nfs3_read_proc, (zdrproc_t)zdr_READ3args, sizeof(READ3args), this}, + {NFS3_WRITE, nfs3_write_proc, (zdrproc_t)zdr_WRITE3args, sizeof(WRITE3args), this}, + {NFS3_CREATE, nfs3_create_proc, (zdrproc_t)zdr_CREATE3args, sizeof(CREATE3args), this}, + {NFS3_MKDIR, nfs3_mkdir_proc, (zdrproc_t)zdr_MKDIR3args, sizeof(MKDIR3args), this}, + {NFS3_SYMLINK, nfs3_symlink_proc, (zdrproc_t)zdr_SYMLINK3args, sizeof(SYMLINK3args), this}, + {NFS3_MKNOD, nfs3_mknod_proc, (zdrproc_t)zdr_MKNOD3args, sizeof(MKNOD3args), this}, + {NFS3_REMOVE, nfs3_remove_proc, (zdrproc_t)zdr_REMOVE3args, sizeof(REMOVE3args), this}, + {NFS3_RMDIR, nfs3_rmdir_proc, (zdrproc_t)zdr_RMDIR3args, sizeof(RMDIR3args), this}, + {NFS3_RENAME, nfs3_rename_proc, (zdrproc_t)zdr_RENAME3args, sizeof(RENAME3args), this}, + {NFS3_LINK, nfs3_link_proc, (zdrproc_t)zdr_LINK3args, sizeof(LINK3args), this}, + {NFS3_READDIR, nfs3_readdir_proc, (zdrproc_t)zdr_READDIR3args, sizeof(READDIR3args), this}, + {NFS3_READDIRPLUS, nfs3_readdirplus_proc, (zdrproc_t)zdr_READDIRPLUS3args, sizeof(READDIRPLUS3args), this}, + {NFS3_FSSTAT, nfs3_fsstat_proc, (zdrproc_t)zdr_FSSTAT3args, sizeof(FSSTAT3args), this}, + {NFS3_FSINFO, nfs3_fsinfo_proc, (zdrproc_t)zdr_FSINFO3args, sizeof(FSINFO3args), this}, + {NFS3_PATHCONF, nfs3_pathconf_proc, (zdrproc_t)zdr_PATHCONF3args, sizeof(PATHCONF3args), this}, + {NFS3_COMMIT, nfs3_commit_proc, (zdrproc_t)zdr_COMMIT3args, sizeof(COMMIT3args), this}, + }; + for (int i = 0; i < sizeof(nfs3_pt_a)/sizeof(service_proc); i++) + { + nfs3_pt.push_back(nfs3_pt_a[i]); + } + struct service_proc nfs3_mount_pt_a[6] = { + {MOUNT3_NULL, nfs3_null_proc, (zdrproc_t)zdr_void, 0, this}, + {MOUNT3_MNT, mount3_mnt_proc, (zdrproc_t)zdr_dirpath, sizeof(dirpath), this}, + {MOUNT3_DUMP, mount3_dump_proc, (zdrproc_t)zdr_void, 0, this}, + {MOUNT3_UMNT, mount3_umnt_proc, (zdrproc_t)zdr_dirpath, sizeof(dirpath), this}, + {MOUNT3_UMNTALL, mount3_umntall_proc, (zdrproc_t)zdr_void, 0, this}, + {MOUNT3_EXPORT, mount3_export_proc, (zdrproc_t)zdr_void, 0, this}, + }; + for (int i = 0; i < sizeof(nfs3_mount_pt_a)/sizeof(service_proc); i++) + { + nfs3_mount_pt.push_back(nfs3_mount_pt_a[i]); + } +} + +nfs_client_t::~nfs_client_t() +{ + if (rpc) + { + rpc_disconnect(rpc, NULL); + rpc_destroy_context(rpc); + } +} diff --git a/src/nfs_portmap.cpp b/src/nfs_portmap.cpp new file mode 100644 index 00000000..e8e66386 --- /dev/null +++ b/src/nfs_portmap.cpp @@ -0,0 +1,172 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) +// +// Portmap service for NFS proxy + +#include +#include + +#include "nfs_portmap.h" + +#include "libnfs-raw-portmap.h" + +#include "sha256.h" +#include "base64.h" + +/* + * The NULL procedure. All protocols/versions must provide a NULL procedure + * as index 0. + * It is used by clients, and rpcinfo, to "ping" a service and verify that + * the service is available and that it does support the indicated version. + */ +static int pmap2_null_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0); + return 0; +} + +/* + * v2 GETPORT. + * This is the lookup function for portmapper version 2. + * A client provides program, version and protocol (tcp or udp) + * and portmapper returns which port that service is available on, + * (or 0 if no such program is registered.) + */ +static int pmap2_getport_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + portmap_service_t *self = (portmap_service_t *)opaque; + PMAP2GETPORTargs *args = (PMAP2GETPORTargs *)call->body.cbody.args; + uint32_t port = 0; + auto it = self->reg_ports.lower_bound((portmap_id_t){ + .prog = args->prog, + .vers = args->vers, + .udp = args->prot == IPPROTO_UDP, + .ipv6 = false, + }); + if (it != self->reg_ports.end() && + it->prog == args->prog && it->vers == args->vers && + it->udp == (args->prot == IPPROTO_UDP)) + { + port = it->port; + } + rpc_send_reply(rpc, call, &port, (zdrproc_t)zdr_uint32_t, sizeof(uint32_t)); + return 0; +} + +/* + * v2 DUMP. + * This RPC returns a list of all endpoints that are registered with + * portmapper. + */ +static int pmap2_dump_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + portmap_service_t *self = (portmap_service_t *)opaque; + pmap2_mapping_list *list = new pmap2_mapping_list[self->reg_ports.size()]; + int i = 0; + for (auto it = self->reg_ports.begin(); it != self->reg_ports.end(); it++) + { + if (it->ipv6) + continue; + list[i] = { + .map = { + .prog = it->prog, + .vers = it->vers, + .prot = it->udp ? IPPROTO_UDP : IPPROTO_TCP, + .port = it->port, + }, + .next = list+i+1, + }; + i++; + } + list[i-1].next = NULL; + // Send reply + PMAP2DUMPres reply; + reply.list = list; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PMAP2DUMPres, sizeof(PMAP2DUMPres)); + reply.list = NULL; + delete list; + return 0; +} + +/* + * v3 GETADDR. + * This is the lookup function for portmapper version 3. + */ +static int pmap3_getaddr_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + portmap_service_t *self = (portmap_service_t *)opaque; + PMAP3GETADDRargs *args = (PMAP3GETADDRargs *)call->body.cbody.args; + portmap_id_t ref = (portmap_id_t){ + .prog = args->prog, + .vers = args->vers, + .udp = !strcmp(args->netid, "udp") || !strcmp(args->netid, "udp6"), + .ipv6 = !strcmp(args->netid, "tcp6") || !strcmp(args->netid, "udp6"), + }; + auto it = self->reg_ports.lower_bound(ref); + PMAP3GETADDRres reply; + if (it != self->reg_ports.end() && + it->prog == ref.prog && it->vers == ref.vers && + it->udp == ref.udp && it->ipv6 == ref.ipv6) + { + reply.addr = (char*)it->addr.c_str(); + } + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PMAP3GETADDRres, sizeof(PMAP3GETADDRres)); + return 0; +} + +/* + * v3 DUMP. + * This RPC returns a list of all endpoints that are registered with + * portmapper. + */ +static int pmap3_dump_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque) +{ + portmap_service_t *self = (portmap_service_t *)opaque; + pmap3_mapping_list *list = new pmap3_mapping_list[self->reg_ports.size()]; + int i = 0; + for (auto it = self->reg_ports.begin(); it != self->reg_ports.end(); it++) + { + list[i] = (pmap3_mapping_list){ + .map = { + .prog = it->prog, + .vers = it->vers, + .netid = (char*)(it->ipv6 + ? (it->udp ? "udp6" : "tcp6") + : (it->udp ? "udp" : "tcp")), + .addr = (char*)it->addr.c_str(), // 0.0.0.0.port + .owner = (char*)it->owner.c_str(), + }, + .next = list+i+1, + }; + i++; + } + list[i-1].next = NULL; + // Send reply + PMAP3DUMPres reply; + reply.list = list; + rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PMAP3DUMPres, sizeof(PMAP3DUMPres)); + reply.list = NULL; + delete list; + return 0; +} + +portmap_service_t::portmap_service_t() +{ + pmap2_pt.push_back((service_proc){PMAP2_NULL, pmap2_null_proc, (zdrproc_t)zdr_void, 0, this}); + pmap2_pt.push_back((service_proc){PMAP2_GETPORT, pmap2_getport_proc, (zdrproc_t)zdr_PMAP2GETPORTargs, sizeof(PMAP2GETPORTargs), this}); + pmap2_pt.push_back((service_proc){PMAP2_DUMP, pmap2_dump_proc, (zdrproc_t)zdr_void, 0, this}); + pmap3_pt.push_back((service_proc){PMAP3_NULL, pmap2_null_proc, (zdrproc_t)zdr_void, 0, this}); + pmap3_pt.push_back((service_proc){PMAP3_GETADDR, pmap3_getaddr_proc, (zdrproc_t)zdr_PMAP3GETADDRargs, sizeof(PMAP3GETADDRargs), this}); + pmap3_pt.push_back((service_proc){PMAP3_DUMP, pmap3_dump_proc, (zdrproc_t)zdr_void, 0, this}); +} + +std::string sha256(const std::string & str) +{ + std::string hash; + hash.resize(32); + SHA256_CTX ctx; + sha256_init(&ctx); + sha256_update(&ctx, (uint8_t*)str.data(), str.size()); + sha256_final(&ctx, (uint8_t*)hash.data()); + return hash; +} diff --git a/src/nfs_portmap.h b/src/nfs_portmap.h new file mode 100644 index 00000000..c9a483a3 --- /dev/null +++ b/src/nfs_portmap.h @@ -0,0 +1,41 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) +// +// Portmap service for NFS proxy + +#pragma once + +#include +#include +#include + +#include "nfsc/libnfs.h" +#include "nfsc/libnfs-raw.h" + +struct portmap_id_t +{ + unsigned prog, vers; + bool udp; + bool ipv6; + unsigned port; + std::string owner; + std::string addr; +}; + +class portmap_service_t +{ +public: + std::set reg_ports; + std::vector pmap2_pt; + std::vector pmap3_pt; + portmap_service_t(); +}; + +inline bool operator < (const portmap_id_t &a, const portmap_id_t &b) +{ + return a.prog < b.prog || a.prog == b.prog && a.vers < b.vers || + a.prog == b.prog && a.vers == b.vers && a.udp < b.udp || + a.prog == b.prog && a.vers == b.vers && a.udp == b.udp && a.ipv6 < b.ipv6; +} + +std::string sha256(const std::string & str); diff --git a/src/nfs_proxy.cpp b/src/nfs_proxy.cpp new file mode 100644 index 00000000..1f1058f2 --- /dev/null +++ b/src/nfs_proxy.cpp @@ -0,0 +1,301 @@ +// Copyright (c) Vitaliy Filippov, 2019+ +// License: VNPL-1.1 (see README.md for details) +// +// Simplified NFS proxy +// Presents all images as files, stores small files directly in etcd +// Keeps image list in memory and thus is unsuitable for a lot of files + +#include +#include +#include +#include +#include +//#include + +#include "libnfs-raw-mount.h" +#include "libnfs-raw-nfs.h" +#include "libnfs-raw-portmap.h" + +#include "addr_util.h" +#include "base64.h" +#include "nfs_proxy.h" + +const char *exe_name = NULL; + +nfs_proxy_t::~nfs_proxy_t() +{ + if (cli) + delete cli; + if (epmgr) + delete epmgr; + if (ringloop) + delete ringloop; +} + +json11::Json::object nfs_proxy_t::parse_args(int narg, const char *args[]) +{ + json11::Json::object cfg; + for (int i = 1; i < narg; i++) + { + if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help")) + { + printf( + "Vitastor NFS 3.0 proxy\n" + "(c) Vitaliy Filippov, 2021-2022 (VNPL-1.1)\n\n" + "USAGE:\n" + " %s [--etcd_address ADDR] [OTHER OPTIONS]\n", + exe_name + ); + exit(0); + } + else if (args[i][0] == '-' && args[i][1] == '-') + { + const char *opt = args[i]+2; + cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i]; + } + } + return cfg; +} + +void nfs_proxy_t::run(json11::Json cfg) +{ + bind_address = cfg["bind_address"].string_value(); + if (bind_address == "") + bind_address = "0.0.0.0"; + // Create client + ringloop = new ring_loop_t(512); + epmgr = new epoll_manager_t(ringloop); + cli = new cluster_client_t(ringloop, epmgr->tfd, cfg); + // We need inode name hashes for NFS handles to remain stateless and <= 64 bytes long + dir_mod_rev[""] = 0; + dir_ids[""] = 1; + assert(cli->st_cli.on_inode_change_hook == NULL); + cli->st_cli.on_inode_change_hook = [this](inode_t changed_inode, bool removed) + { + if (removed) + { + auto ino_it = hash_by_inode.find(changed_inode); + if (ino_it != hash_by_inode.end()) + { + inode_by_hash.erase(ino_it->second); + hash_by_inode.erase(ino_it); + } + // FIXME also calculate dir_mod_rev + } + else + { + auto & inode_cfg = cli->st_cli.inode_config.at(changed_inode); + std::string name = inode_cfg.name; + if (name_prefix != "") + { + if (name.substr(0, name_prefix.size()) != name_prefix) + return; + name = name.substr(name_prefix.size()); + } + dir_mod_rev[""] = dir_mod_rev[""] < inode_cfg.mod_revision ? inode_cfg.mod_revision : dir_mod_rev[""]; + std::string hash = "S"+base64_encode(sha256(name)); + int pos = name.find('/'); + while (pos >= 0) + { + std::string dir = name.substr(0, pos); + if (dir_ids.find(dir) == dir_ids.end()) + dir_ids[dir] = next_dir_id++; + dir_mod_rev[dir] = dir_mod_rev[dir] < inode_cfg.mod_revision ? inode_cfg.mod_revision : dir_mod_rev[dir]; + dir_by_hash["S"+base64_encode(sha256(dir))] = dir; + int next = name.substr(pos+1).find('/'); + pos = next < 0 ? -1 : pos+1+next; + } + auto hbi_it = hash_by_inode.find(changed_inode); + if (hbi_it != hash_by_inode.end() && hbi_it->second != hash) + { + // inode had a different name, remove old hash=>inode pointer + inode_by_hash.erase(hbi_it->second); + } + inode_by_hash[hash] = changed_inode; + hash_by_inode[changed_inode] = hash; + } + }; + // Load image metadata + while (!cli->is_ready()) + { + ringloop->loop(); + if (cli->is_ready()) + break; + ringloop->wait(); + } + // Create portmap socket + int portmap_socket = create_and_bind_socket(bind_address, 111, 128, NULL); + fcntl(portmap_socket, F_SETFL, fcntl(portmap_socket, F_GETFL, 0) | O_NONBLOCK); + // Create NFS socket + int nfs_socket = create_and_bind_socket(bind_address, 2049, 128, NULL); + fcntl(nfs_socket, F_SETFL, fcntl(nfs_socket, F_GETFL, 0) | O_NONBLOCK); + // Self-register portmap and NFS + pmap.reg_ports.insert((portmap_id_t){ + .prog = PMAP_PROGRAM, + .vers = PMAP_V2, + .port = 111, + .owner = "portmapper-service", + .addr = "0.0.0.0.0.111", + }); + pmap.reg_ports.insert((portmap_id_t){ + .prog = PMAP_PROGRAM, + .vers = PMAP_V3, + .port = 111, + .owner = "portmapper-service", + .addr = "0.0.0.0.0.111", + }); + pmap.reg_ports.insert((portmap_id_t){ + .prog = NFS_PROGRAM, + .vers = NFS_V3, + .port = 2049, + .owner = "nfs-server", + .addr = "0.0.0.0.0.2049", + }); + pmap.reg_ports.insert((portmap_id_t){ + .prog = MOUNT_PROGRAM, + .vers = MOUNT_V3, + .port = 2049, + .owner = "rpc.mountd", + .addr = "0.0.0.0.0.2049", + }); + // Add FDs to epoll + epmgr->tfd->set_fd_handler(portmap_socket, false, [this](int portmap_socket, int epoll_events) + { + if (epoll_events & EPOLLRDHUP) + { + fprintf(stderr, "Listening portmap socket disconnected, exiting\n"); + exit(1); + } + else + { + do_accept(portmap_socket); + } + }); + epmgr->tfd->set_fd_handler(nfs_socket, false, [this](int nfs_socket, int epoll_events) + { + if (epoll_events & EPOLLRDHUP) + { + fprintf(stderr, "Listening portmap socket disconnected, exiting\n"); + exit(1); + } + else + { + do_accept(nfs_socket); + } + }); + if (cfg["foreground"].is_null()) + { + daemonize(); + } + while (true) + { + ringloop->loop(); + ringloop->wait(); + } + /*// Sync at the end + cluster_op_t *close_sync = new cluster_op_t; + close_sync->opcode = OSD_OP_SYNC; + close_sync->callback = [&stop](cluster_op_t *op) + { + stop = true; + delete op; + }; + cli->execute(close_sync);*/ + // Destroy the client + delete cli; + delete epmgr; + delete ringloop; + cli = NULL; + epmgr = NULL; + ringloop = NULL; +} + +void nfs_proxy_t::do_accept(int listen_fd) +{ + struct sockaddr_storage addr; + socklen_t addr_size = sizeof(addr); + int nfs_fd = 0; + while ((nfs_fd = accept(listen_fd, (struct sockaddr *)&addr, &addr_size)) >= 0) + { + fprintf(stderr, "New client %d: connection from %s\n", nfs_fd, addr_to_string(addr).c_str()); + fcntl(nfs_fd, F_SETFL, fcntl(nfs_fd, F_GETFL, 0) | O_NONBLOCK); + int one = 1; + setsockopt(nfs_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one)); + auto cli = new nfs_client_t(); + cli->parent = this; + cli->nfs_fd = nfs_fd; + cli->rpc = rpc_init_server_context(nfs_fd); + if (!cli->rpc) + { + delete cli; + close(nfs_fd); + fprintf(stderr, "Failed to init libnfs server context\n"); + exit(1); + } + // Use both portmap and NFS everywhere + rpc_register_service(cli->rpc, PMAP_PROGRAM, PMAP_V2, pmap.pmap2_pt.data(), pmap.pmap2_pt.size()); + rpc_register_service(cli->rpc, PMAP_PROGRAM, PMAP_V3, pmap.pmap3_pt.data(), pmap.pmap3_pt.size()); + rpc_register_service(cli->rpc, NFS_PROGRAM, NFS_V3, cli->nfs3_pt.data(), cli->nfs3_pt.size()); + rpc_register_service(cli->rpc, MOUNT_PROGRAM, MOUNT_V3, cli->nfs3_mount_pt.data(), cli->nfs3_mount_pt.size()); + epmgr->tfd->set_fd_handler(nfs_fd, true, [this, cli](int nfs_fd, int epoll_events) + { + // Handle incoming event + if (epoll_events & EPOLLRDHUP) + { + fprintf(stderr, "Client %d disconnected\n", nfs_fd); + epmgr->tfd->set_fd_handler(cli->nfs_fd, true, NULL); + delete cli; + close(nfs_fd); + return; + } + int revents = 0; + if (epoll_events & EPOLLIN) + revents |= POLLIN; + if (epoll_events & EPOLLOUT) + revents |= POLLOUT; + // Let libnfs process the event + if (rpc_service(cli->rpc, revents) < 0) + { + fprintf(stderr, "libnfs error: %s, disconnecting client %d\n", rpc_get_error(cli->rpc), nfs_fd); + epmgr->tfd->set_fd_handler(cli->nfs_fd, true, NULL); + delete cli; + close(nfs_fd); + return; + } + // FIXME Add/remove events based on rpc_which_events(rpc) ? + }); + } + if (nfs_fd < 0 && errno != EAGAIN) + { + fprintf(stderr, "Failed to accept connection: %s\n", strerror(errno)); + exit(1); + } +} + +void nfs_proxy_t::daemonize() +{ + if (fork()) + exit(0); + setsid(); + if (fork()) + exit(0); + if (chdir("/") != 0) + fprintf(stderr, "Warning: Failed to chdir into /\n"); + close(0); + close(1); + close(2); + open("/dev/null", O_RDONLY); + open("/dev/null", O_WRONLY); + open("/dev/null", O_WRONLY); +} + +int main(int narg, const char *args[]) +{ + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + exe_name = args[0]; + nfs_proxy_t *p = new nfs_proxy_t(); + p->run(nfs_proxy_t::parse_args(narg, args)); + delete p; + return 0; +} diff --git a/src/nfs_proxy.h b/src/nfs_proxy.h new file mode 100644 index 00000000..64c5bfae --- /dev/null +++ b/src/nfs_proxy.h @@ -0,0 +1,47 @@ +#pragma once + +#include "cluster_client.h" +#include "epoll_manager.h" +#include "nfs_portmap.h" + +#include "nfsc/libnfs-raw.h" + +class nfs_proxy_t +{ +public: + std::string bind_address; + std::string name_prefix; + int fsid = 1; + + portmap_service_t pmap; + ring_loop_t *ringloop = NULL; + epoll_manager_t *epmgr = NULL; + cluster_client_t *cli = NULL; + + uint64_t next_dir_id = 2; + std::map dir_by_hash; + std::map dir_ids; + std::map dir_mod_rev; + std::map hash_by_inode; + std::map inode_by_hash; + + ~nfs_proxy_t(); + + static json11::Json::object parse_args(int narg, const char *args[]); + void run(json11::Json cfg); + void do_accept(int listen_fd); + void daemonize(); +}; + +class nfs_client_t +{ +public: + nfs_proxy_t *parent = NULL; + int nfs_fd; + struct rpc_context *rpc = NULL; + std::vector nfs3_pt; + std::vector nfs3_mount_pt; + + nfs_client_t(); + ~nfs_client_t(); +}; diff --git a/src/sha256.c b/src/sha256.c new file mode 100644 index 00000000..eb9c5c07 --- /dev/null +++ b/src/sha256.c @@ -0,0 +1,158 @@ +/********************************************************************* +* Filename: sha256.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the SHA-256 hashing algorithm. + SHA-256 is one of the three algorithms in the SHA2 + specification. The others, SHA-384 and SHA-512, are not + offered in this implementation. + Algorithm specification can be found here: + * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf + This implementation uses little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "sha256.h" + +/****************************** MACROS ******************************/ +#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) +#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) + +#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) +#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) +#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) +#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) + +/**************************** VARIABLES *****************************/ +static const WORD k[64] = { + 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) +{ + WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; + + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); + for ( ; i < 64; ++i) + m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < 64; ++i) { + t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; + t2 = EP0(a) + MAJ(a,b,c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + ctx->state[5] += f; + ctx->state[6] += g; + ctx->state[7] += h; +} + +void sha256_init(SHA256_CTX *ctx) +{ + ctx->datalen = 0; + ctx->bitlen = 0; + ctx->state[0] = 0x6a09e667; + ctx->state[1] = 0xbb67ae85; + ctx->state[2] = 0x3c6ef372; + ctx->state[3] = 0xa54ff53a; + ctx->state[4] = 0x510e527f; + ctx->state[5] = 0x9b05688c; + ctx->state[6] = 0x1f83d9ab; + ctx->state[7] = 0x5be0cd19; +} + +void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) +{ + WORD i; + + for (i = 0; i < len; ++i) { + ctx->data[ctx->datalen] = data[i]; + ctx->datalen++; + if (ctx->datalen == 64) { + sha256_transform(ctx, ctx->data); + ctx->bitlen += 512; + ctx->datalen = 0; + } + } +} + +void sha256_final(SHA256_CTX *ctx, BYTE hash[]) +{ + WORD i; + + i = ctx->datalen; + + // Pad whatever data is left in the buffer. + if (ctx->datalen < 56) { + ctx->data[i++] = 0x80; + while (i < 56) + ctx->data[i++] = 0x00; + } + else { + ctx->data[i++] = 0x80; + while (i < 64) + ctx->data[i++] = 0x00; + sha256_transform(ctx, ctx->data); + memset(ctx->data, 0, 56); + } + + // Append to the padding the total message's length in bits and transform. + ctx->bitlen += ctx->datalen * 8; + ctx->data[63] = ctx->bitlen; + ctx->data[62] = ctx->bitlen >> 8; + ctx->data[61] = ctx->bitlen >> 16; + ctx->data[60] = ctx->bitlen >> 24; + ctx->data[59] = ctx->bitlen >> 32; + ctx->data[58] = ctx->bitlen >> 40; + ctx->data[57] = ctx->bitlen >> 48; + ctx->data[56] = ctx->bitlen >> 56; + sha256_transform(ctx, ctx->data); + + // Since this implementation uses little endian byte ordering and SHA uses big endian, + // reverse all the bytes when copying the final state to the output hash. + for (i = 0; i < 4; ++i) { + hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; + hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; + hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; + hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; + hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; + hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; + hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; + hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; + } +} diff --git a/src/sha256.h b/src/sha256.h new file mode 100644 index 00000000..17987c62 --- /dev/null +++ b/src/sha256.h @@ -0,0 +1,41 @@ +/********************************************************************* +* Filename: sha256.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding SHA1 implementation. +*********************************************************************/ + +#ifndef SHA256_H +#define SHA256_H + +/*************************** HEADER FILES ***************************/ +#include + +/****************************** MACROS ******************************/ +#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest + +#ifdef __cplusplus +extern "C" { +#endif +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte +typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines + +typedef struct { + BYTE data[64]; + WORD datalen; + unsigned long long bitlen; + WORD state[8]; +} SHA256_CTX; + +/*********************** FUNCTION DECLARATIONS **********************/ +void sha256_init(SHA256_CTX *ctx); +void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); +void sha256_final(SHA256_CTX *ctx, BYTE hash[]); + +#ifdef __cplusplus +}; +#endif + +#endif // SHA256_H