WIP Simplified NFS proxy

nfs-proxy-old
Vitaliy Filippov 2022-02-12 01:30:50 +03:00
parent d7e64e6ea1
commit 6261809e87
12 changed files with 1582 additions and 0 deletions

3
.gitmodules vendored
View File

@ -4,3 +4,6 @@
[submodule "json11"] [submodule "json11"]
path = json11 path = json11
url = ../json11.git url = ../json11.git
[submodule "libnfs"]
path = libnfs
url = ../libnfs.git

1
libnfs Submodule

@ -0,0 +1 @@
Subproject commit 5a991e1fcbcdcca9a191c8e377b4cfb89142761e

View File

@ -152,6 +152,67 @@ target_link_libraries(vitastor-nbd
vitastor_client vitastor_client
) )
# vitastor-nfs
add_executable(vitastor-nfs
nfs_proxy.cpp
nfs_conn.cpp
nfs_portmap.cpp
sha256.c
../libnfs/lib/init.c
../libnfs/lib/pdu.c
../libnfs/lib/libnfs-zdr.c
../libnfs/lib/socket.c
../libnfs/portmap/libnfs-raw-portmap.c
../libnfs/nfs/libnfs-raw-nfs.c
../libnfs/mount/libnfs-raw-mount.c
)
set_source_files_properties(
../libnfs/nfs/libnfs-raw-nfs.c
PROPERTIES
COMPILE_FLAGS "-Wno-unused-but-set-variable"
)
# Simplified static configuration
# The other option is to build patched libnfs packages until all distros get my fixes
target_compile_options(vitastor-nfs
PRIVATE
-DHAVE_ARPA_INET_H
-DHAVE_INTTYPES_H
-DHAVE_MEMORY_H
-DHAVE_NETDB_H
-DHAVE_NETINET_IN_H
-DHAVE_NETINET_TCP_H
-DHAVE_NET_IF_H
-DHAVE_POLL_H
-DHAVE_STDINT_H
-DHAVE_STDLIB_H
-DHAVE_STRINGS_H
-DHAVE_STRING_H
-DHAVE_SYS_IOCTL_H
-DHAVE_SYS_SOCKET_H
-DHAVE_SYS_STATVFS_H
-DHAVE_SYS_STAT_H
-DHAVE_SYS_SYSMACROS_H
-DHAVE_SYS_TIME_H
-DHAVE_SYS_TYPES_H
-DHAVE_SYS_VFS_H
-DHAVE_UNISTD_H
-DHAVE_UTIME_H
-DHAVE_SOCKADDR_STORAGE
-DHAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
-D_U_=
)
target_include_directories(vitastor-nfs
PRIVATE
../libnfs/include
../libnfs/include/nfsc
../libnfs/portmap
../libnfs/nfs
../libnfs/mount
)
target_link_libraries(vitastor-nfs
vitastor_client
)
# vitastor-cli # vitastor-cli
add_executable(vitastor-cli add_executable(vitastor-cli
cli.cpp cli_common.cpp cli_alloc_osd.cpp cli_simple_offsets.cpp cli_status.cpp cli_df.cpp cli.cpp cli_common.cpp cli_alloc_osd.cpp cli_simple_offsets.cpp cli_status.cpp cli_df.cpp

View File

@ -954,6 +954,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
} }
if (!value.is_object()) if (!value.is_object())
{ {
if (on_inode_change_hook != NULL)
{
on_inode_change_hook(inode_num, true);
}
this->inode_config.erase(inode_num); this->inode_config.erase(inode_num);
} }
else else
@ -995,6 +999,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
} }
} }
} }
if (on_inode_change_hook != NULL)
{
on_inode_change_hook(inode_num, false);
}
} }
} }
} }

View File

@ -109,6 +109,7 @@ public:
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook; std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
std::function<void(osd_num_t)> on_change_osd_state_hook; std::function<void(osd_num_t)> on_change_osd_state_hook;
std::function<void()> on_reload_hook; std::function<void()> on_reload_hook;
std::function<void(inode_t, bool)> on_inode_change_hook;
json11::Json::object serialize_inode_cfg(inode_config_t *cfg); json11::Json::object serialize_inode_cfg(inode_config_t *cfg);
etcd_kv_t parse_etcd_kv(const json11::Json & kv_json); etcd_kv_t parse_etcd_kv(const json11::Json & kv_json);

748
src/nfs_conn.cpp Normal file
View File

@ -0,0 +1,748 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
//
// NFS connection handler for NFS proxy
#include <sys/time.h>
#include "libnfs-raw-mount.h"
#include "libnfs-raw-nfs.h"
#include "base64.h"
#include "nfs_proxy.h"
static unsigned len_pad4(unsigned len)
{
return len + (len&3 ? 4-(len&3) : 0);
}
static int nfs3_null_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0);
return 0;
}
static fattr3 get_dir_attributes(nfs_client_t *self, std::string dir)
{
return (fattr3){
.type = NF3DIR,
.mode = 0755,
.nlink = 1,
.uid = 0,
.gid = 0,
.size = 4096,
.used = 4096,
.rdev = (specdata3){ 0 },
.fsid = self->parent->fsid,
.fileid = dir == "" ? 1 : self->parent->dir_ids.at(dir),
//.atime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
//.mtime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
//.ctime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
};
}
static int nfs3_getattr_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
GETATTR3args *args = (GETATTR3args*)call->body.cbody.args;
GETATTR3res reply;
std::string dirhash = std::string(args->object.data.data_val, args->object.data.data_len);
bool is_dir = false;
std::string dir;
if (dirhash == "roothandle")
is_dir = true;
else
{
auto dir_it = self->parent->dir_by_hash.find(dirhash);
if (dir_it != self->parent->dir_by_hash.end())
{
is_dir = true;
dir = dir_it->second;
}
}
if (is_dir)
{
// Directory info
reply.status = NFS3_OK;
reply.GETATTR3res_u.resok.obj_attributes = get_dir_attributes(self, dir);
}
else
{
uint64_t inode_num;
auto inode_num_it = self->parent->inode_by_hash.find(dirhash);
if (inode_num_it != self->parent->inode_by_hash.end())
inode_num = inode_num_it->second;
auto inode_it = self->parent->cli->st_cli.inode_config.find(inode_num);
if (inode_it != self->parent->cli->st_cli.inode_config.end())
{
// File info
auto & inode_cfg = inode_it->second;
reply.status = NFS3_OK;
reply.GETATTR3res_u.resok.obj_attributes = {
.type = NF3REG,
.mode = 0644,
.nlink = 1,
.uid = 0,
.gid = 0,
.size = inode_cfg.size,
.used = inode_cfg.size,
.rdev = (specdata3){ 0 },
.fsid = self->parent->fsid,
.fileid = inode_it->first,
//.atime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
//.mtime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
//.ctime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
};
}
else
{
// File not exists
reply.status = NFS3ERR_NOENT;
}
}
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_GETATTR3res, sizeof(GETATTR3res));
return 0;
}
static int nfs3_setattr_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
SETATTR3args *args = (SETATTR3args*)call->body.cbody.args;
SETATTR3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_SETATTR3res, sizeof(SETATTR3res));
return 0;
}
static int nfs3_lookup_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
LOOKUP3args *args = (LOOKUP3args*)call->body.cbody.args;
LOOKUP3res reply;
std::string dirhash = std::string(args->what.dir.data.data_val, args->what.dir.data.data_len);
std::string dir;
if (dirhash != "roothandle")
{
auto dir_it = self->parent->dir_by_hash.find(dirhash);
if (dir_it != self->parent->dir_by_hash.end())
dir = dir_it->second;
}
std::string full_name = self->parent->name_prefix;
if (dir != "")
{
full_name += dir+"/";
}
full_name += std::string(args->what.name);
for (auto & ic: self->parent->cli->st_cli.inode_config)
{
if (ic.second.name == full_name)
{
std::string fh = "S"+base64_encode(sha256(full_name.substr(self->parent->name_prefix.size())));
reply.status = NFS3_OK;
reply.LOOKUP3res_u.resok.object.data.data_len = fh.size();
reply.LOOKUP3res_u.resok.object.data.data_val = (char*)fh.c_str();
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_LOOKUP3res, sizeof(LOOKUP3res));
return 0;
}
}
reply.status = NFS3ERR_NOENT;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_LOOKUP3res, sizeof(LOOKUP3res));
return 0;
}
static int nfs3_access_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
ACCESS3args *args = (ACCESS3args*)call->body.cbody.args;
ACCESS3res reply = {
.status = NFS3_OK,
.ACCESS3res_u = { .resok = {
.access = args->access,
} },
};
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_ACCESS3res, sizeof(ACCESS3res));
return 0;
}
static int nfs3_readlink_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
READLINK3args *args = (READLINK3args*)call->body.cbody.args;
READLINK3res reply = {};
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READLINK3res, sizeof(READLINK3res));
return 0;
}
#define MAX_REQUEST_SIZE 128*1024*1024
static int nfs3_read_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
READ3args *args = (READ3args*)call->body.cbody.args;
std::string handle = std::string(args->file.data.data_val, args->file.data.data_len);
auto ino_it = self->parent->inode_by_hash.find(handle);
if (ino_it == self->parent->inode_by_hash.end())
{
READ3res reply = { .status = NFS3ERR_NOENT };
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READ3res, sizeof(READ3res));
return 0;
}
if (args->count > MAX_REQUEST_SIZE)
{
READ3res reply = { .status = NFS3ERR_INVAL };
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READ3res, sizeof(READ3res));
return 0;
}
void *buf = malloc_or_die(args->count);
cluster_op_t *op = new cluster_op_t;
op->opcode = OSD_OP_READ;
op->inode = ino_it->second;
op->offset = args->offset;
op->len = args->count;
op->iov.push_back(buf, args->count);
op->callback = [rpc, call](cluster_op_t *op)
{
void *buf = op->iov.buf[0].iov_base;
READ3res reply = {};
if (op->retval != op->len)
{
if (op->retval == -EINVAL)
reply.status = NFS3ERR_INVAL;
else if (op->retval == -ENOSPC)
reply.status = NFS3ERR_NOSPC;
else
reply.status = NFS3ERR_IO;
}
else
{
reply.status = NFS3_OK;
auto & reply_ok = reply.READ3res_u.resok;
reply_ok.count = op->retval;
reply_ok.eof = FALSE;
reply_ok.data.data_len = reply_ok.count;
reply_ok.data.data_val = (char*)buf;
}
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READ3res, sizeof(READ3res));
delete op;
free(buf);
};
self->parent->cli->execute(op);
return 0;
}
static int nfs3_write_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
WRITE3args *args = (WRITE3args*)call->body.cbody.args;
WRITE3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_WRITE3res, sizeof(WRITE3res));
return 0;
}
static int nfs3_create_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
CREATE3args *args = (CREATE3args*)call->body.cbody.args;
CREATE3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_CREATE3res, sizeof(CREATE3res));
return 0;
}
static int nfs3_mkdir_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
MKDIR3args *args = (MKDIR3args*)call->body.cbody.args;
MKDIR3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_MKDIR3res, sizeof(MKDIR3res));
return 0;
}
static int nfs3_symlink_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
SYMLINK3args *args = (SYMLINK3args*)call->body.cbody.args;
SYMLINK3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_SYMLINK3res, sizeof(SYMLINK3res));
return 0;
}
static int nfs3_mknod_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
MKNOD3args *args = (MKNOD3args*)call->body.cbody.args;
MKNOD3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_MKNOD3res, sizeof(MKNOD3res));
return 0;
}
static int nfs3_remove_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
REMOVE3args *args = (REMOVE3args*)call->body.cbody.args;
REMOVE3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_REMOVE3res, sizeof(REMOVE3res));
return 0;
}
static int nfs3_rmdir_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
RMDIR3args *args = (RMDIR3args*)call->body.cbody.args;
RMDIR3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_RMDIR3res, sizeof(RMDIR3res));
return 0;
}
static int nfs3_rename_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
RENAME3args *args = (RENAME3args*)call->body.cbody.args;
RENAME3res reply;
// Not supported yet
reply.status = NFS3ERR_NOTSUPP;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_RENAME3res, sizeof(RENAME3res));
return 0;
}
static int nfs3_link_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
LINK3args *args = (LINK3args*)call->body.cbody.args;
// We don't support hard links
LINK3res reply = { NFS3ERR_NOTSUPP };
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_LINK3res, sizeof(LINK3res));
return 0;
}
static void nfs3_readdir_common(struct rpc_context *rpc, struct rpc_msg *call, void *opaque, bool is_plus)
{
nfs_client_t *self = (nfs_client_t*)opaque;
READDIRPLUS3args plus_args;
READDIRPLUS3args *args = NULL;
if (is_plus)
args = ((READDIRPLUS3args*)call->body.cbody.args);
else
{
args = &plus_args;
READDIR3args *in_args = ((READDIR3args*)call->body.cbody.args);
args->dir = in_args->dir;
args->cookie = in_args->cookie;
*((uint64_t*)args->cookieverf) = *((uint64_t*)in_args->cookieverf);
args->dircount = 512;
args->maxcount = in_args->count;
}
std::string dirhash = std::string(args->dir.data.data_val, args->dir.data.data_len);
std::string dir;
if (dirhash != "roothandle")
{
auto dir_it = self->parent->dir_by_hash.find(dirhash);
if (dir_it != self->parent->dir_by_hash.end())
dir = dir_it->second;
}
std::string prefix = self->parent->name_prefix;
if (dir != "")
{
prefix += dir+"/";
}
//struct timespec now;
//clock_gettime(CLOCK_REALTIME, &now);
std::map<std::string, struct entryplus3> entries;
std::vector<std::string> handles;
for (auto & ic: self->parent->cli->st_cli.inode_config)
{
auto & inode_cfg = ic.second;
if (prefix != "" && inode_cfg.name.substr(0, prefix.size()) != prefix)
continue;
std::string subname = inode_cfg.name.substr(prefix.size());
int p = 0;
while (p < subname.size() && subname[p] == '/')
p++;
if (p > 0)
subname = subname.substr(p);
if (subname.size() == 0)
continue;
p = 0;
while (p < subname.size() && subname[p] != '/')
p++;
if (p >= subname.size())
{
entries[subname] = (struct entryplus3){
// fileid will change when the user creates snapshots
// however, we hope that clients tolerate it well
// Linux does, even though it complains about "fileid changed" in dmesg
.fileid = ic.first,
};
if (is_plus)
{
handles.push_back("S"+base64_encode(sha256(inode_cfg.name)));
entries[subname].name_attributes = {
.attributes_follow = TRUE,
.post_op_attr_u = { .attributes = {
.type = NF3REG,
.mode = 0644,
.nlink = 1,
.uid = 0,
.gid = 0,
.size = inode_cfg.size,
.used = inode_cfg.size, // FIXME take from statistics
.rdev = (specdata3){ 0 },
.fsid = self->parent->fsid,
.fileid = ic.first,
//.atime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
//.mtime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
//.ctime = (nfstime3){ .seconds = now.tv_sec, .nseconds = now.tv_nsec },
} },
};
entries[subname].name_handle = {
.handle_follows = TRUE,
.post_op_fh3_u = { .handle = {
.data = {
// FIXME: I really want ZDR with std::string
.data_len = handles[handles.size()-1].size(),
.data_val = (char*)handles[handles.size()-1].c_str(),
},
} },
};
}
}
else
{
auto subdir = dir == "" ? subname.substr(0, p) : dir+"/"+subname.substr(0, p);
entries[subdir] = (struct entryplus3){
// for directories, fileid will change when the user restarts proxy
.fileid = self->parent->dir_ids.at(subdir),
};
if (is_plus)
{
handles.push_back("S"+base64_encode(sha256(subdir)));
entries[subdir].name_attributes = {
.attributes_follow = TRUE,
.post_op_attr_u = { .attributes = get_dir_attributes(self, subdir) },
};
entries[subdir].name_handle = {
.handle_follows = TRUE,
.post_op_fh3_u = { .handle = {
.data = {
// FIXME: I really want ZDR with std::string
.data_len = (unsigned)handles[handles.size()-1].size(),
.data_val = (char*)handles[handles.size()-1].c_str(),
},
} },
};
}
}
}
// Offset results by the continuation cookie (equal to index in the listing)
uint64_t idx = 1;
void *prev = NULL;
for (auto it = entries.begin(); it != entries.end(); it++)
{
entryplus3 *entry = &it->second;
// First fields of entry3 and entryplus3 are the same: fileid, name, cookie
entry->name = (char*)it->first.c_str();
entry->cookie = idx++;
if (prev)
{
if (is_plus)
((entryplus3*)prev)->nextentry = entry;
else
((entry3*)prev)->nextentry = (entry3*)entry;
}
prev = entry;
if (args->cookie > 0 && entry->cookie == args->cookie+1)
{
entries.erase(entries.begin(), it);
}
}
// Now limit results based on maximum reply size
// Sadly we have to calculate reply size by hand
// reply without entries is 4+4+(dir_attributes ? sizeof(fattr3) : 0)+8+4 bytes
int reply_size = 20;
if (reply_size > args->maxcount)
{
// Error, too small max reply size
if (is_plus)
{
READDIRPLUS3res reply = { .status = NFS3ERR_TOOSMALL };
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIRPLUS3res, sizeof(READDIRPLUS3res));
}
else
{
READDIR3res reply = { .status = NFS3ERR_TOOSMALL };
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIR3res, sizeof(READDIR3res));
}
return;
}
// 1 entry3 is (8+4+(filename_len+3)/4*4+8) bytes
// 1 entryplus3 is (8+4+(filename_len+3)/4*4+8
// + 4+(name_attributes ? (sizeof(fattr3) = 84) : 0)
// + 4+(name_handle ? 4+(handle_len+3)/4*4 : 0)) bytes
bool eof = true;
for (auto it = entries.begin(); it != entries.end(); it++)
{
reply_size += 20+len_pad4(it->first.size())+(is_plus
? 8+84+len_pad4(it->second.name_handle.post_op_fh3_u.handle.data.data_len) : 0);
if (reply_size > args->maxcount)
{
// Stop
entries.erase(it, entries.end());
eof = false;
break;
}
}
if (entries.end() != entries.begin())
{
auto last_it = entries.end();
last_it--;
if (is_plus)
((entryplus3*)&last_it->second)->nextentry = NULL;
else
((entry3*)&last_it->second)->nextentry = NULL;
}
// Send reply
if (is_plus)
{
READDIRPLUS3res reply = { .status = NFS3_OK };
*(uint64_t*)(reply.READDIRPLUS3res_u.resok.cookieverf) = self->parent->dir_mod_rev.at(dir);
reply.READDIRPLUS3res_u.resok.reply.entries = &entries.begin()->second;
reply.READDIRPLUS3res_u.resok.reply.eof = eof;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIRPLUS3res, sizeof(READDIRPLUS3res));
}
else
{
READDIR3res reply = { .status = NFS3_OK };
*(uint64_t*)(reply.READDIR3res_u.resok.cookieverf) = self->parent->dir_mod_rev.at(dir);
reply.READDIR3res_u.resok.reply.entries = (entry3*)&entries.begin()->second;
reply.READDIR3res_u.resok.reply.eof = eof;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_READDIR3res, sizeof(READDIR3res));
}
}
static int nfs3_readdir_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs3_readdir_common(rpc, call, opaque, false);
return 0;
}
static int nfs3_readdirplus_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs3_readdir_common(rpc, call, opaque, true);
return 0;
}
// Get file system statistics
static int nfs3_fsstat_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
FSSTAT3args *args = (FSSTAT3args*)call->body.cbody.args;
FSSTAT3res reply;
reply.status = NFS3_OK;
reply.FSSTAT3res_u.resok.obj_attributes.attributes_follow = TRUE;
reply.FSSTAT3res_u.resok.obj_attributes.post_op_attr_u.attributes = get_dir_attributes(self, "");
reply.FSSTAT3res_u.resok.tbytes = 4096; // total bytes
reply.FSSTAT3res_u.resok.fbytes = 4096; // free bytes
reply.FSSTAT3res_u.resok.abytes = 4096; // available bytes
reply.FSSTAT3res_u.resok.tfiles = 1 << 31; // total files
reply.FSSTAT3res_u.resok.ffiles = 1 << 31; // free files
reply.FSSTAT3res_u.resok.afiles = 1 << 31; // available files
reply.FSSTAT3res_u.resok.invarsec = 0;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_FSSTAT3res, sizeof(FSSTAT3res));
return 0;
}
static int nfs3_fsinfo_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
FSINFO3args *args = (FSINFO3args*)call->body.cbody.args;
FSINFO3res reply;
if (args->fsroot.data.data_len != 10)
{
// Example error
reply.status = NFS3ERR_INVAL;
}
else
{
// Fill info
reply.status = NFS3_OK;
reply.FSINFO3res_u.resok.obj_attributes.attributes_follow = TRUE;
reply.FSINFO3res_u.resok.obj_attributes.post_op_attr_u.attributes = get_dir_attributes(self, "");
reply.FSINFO3res_u.resok.rtmax = 128*1024*1024;
reply.FSINFO3res_u.resok.rtpref = 128*1024*1024;
reply.FSINFO3res_u.resok.rtmult = 4096;
reply.FSINFO3res_u.resok.wtmax = 128*1024*1024;
reply.FSINFO3res_u.resok.wtpref = 128*1024*1024;
reply.FSINFO3res_u.resok.wtmult = 4096;
reply.FSINFO3res_u.resok.dtpref = 128;
reply.FSINFO3res_u.resok.maxfilesize = 0x7fffffffffffffff;
reply.FSINFO3res_u.resok.time_delta.seconds = 1;
reply.FSINFO3res_u.resok.time_delta.nseconds = 0;
reply.FSINFO3res_u.resok.properties = FSF3_SYMLINK | FSF3_HOMOGENEOUS;
}
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_FSINFO3res, sizeof(FSINFO3res));
return 0;
}
static int nfs3_pathconf_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
PATHCONF3args *args = (PATHCONF3args*)call->body.cbody.args;
PATHCONF3res reply;
if (args->object.data.data_len != 10)
{
// Example error
reply.status = NFS3ERR_INVAL;
}
else
{
// Fill info
reply.status = NFS3_OK;
reply.PATHCONF3res_u.resok.obj_attributes.attributes_follow = FALSE;
reply.PATHCONF3res_u.resok.linkmax = 0;
reply.PATHCONF3res_u.resok.name_max = 255;
reply.PATHCONF3res_u.resok.no_trunc = TRUE;
reply.PATHCONF3res_u.resok.chown_restricted = FALSE;
reply.PATHCONF3res_u.resok.case_insensitive = FALSE;
reply.PATHCONF3res_u.resok.case_preserving = TRUE;
}
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PATHCONF3res, sizeof(PATHCONF3res));
return 0;
}
static int nfs3_commit_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
COMMIT3args *args = (COMMIT3args*)call->body.cbody.args;
COMMIT3res reply = {};
// Just pretend we did fsync :-)
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_COMMIT3res, sizeof(COMMIT3res));
return 0;
}
static int mount3_mnt_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
dirpath *arg = (dirpath*)call->body.cbody.args;
int flavor = AUTH_NONE;
mountres3 reply;
reply.fhs_status = MNT3_OK;
reply.mountres3_u.mountinfo.fhandle.fhandle3_len = 10;
reply.mountres3_u.mountinfo.fhandle.fhandle3_val = "roothandle";
reply.mountres3_u.mountinfo.auth_flavors.auth_flavors_len = 1;
reply.mountres3_u.mountinfo.auth_flavors.auth_flavors_val = &flavor;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_mountres3, sizeof(mountres3));
return 0;
}
static int mount3_dump_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
mountlist reply;
reply = (struct mountbody*)malloc(sizeof(struct mountbody));
reply->ml_hostname = (dirpath)"127.0.0.1";
reply->ml_directory = (dirpath)"/test";
reply->ml_next = NULL;
rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_mountlist, sizeof(mountlist));
free(reply);
return 0;
}
static int mount3_umnt_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
dirpath *arg = (dirpath*)call->body.cbody.args;
// do nothing
rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0);
return 0;
}
static int mount3_umntall_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
// do nothing
rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0);
return 0;
}
static int mount3_export_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
nfs_client_t *self = (nfs_client_t*)opaque;
exports reply;
reply = (struct exportnode*)malloc(sizeof(struct exportnode) + sizeof(struct groupnode));
reply->ex_dir = (dirpath)"/test";
reply->ex_groups = (struct groupnode*)(reply+1);
reply->ex_groups->gr_name = (dirpath)"127.0.0.1";
reply->ex_groups->gr_next = NULL;
reply->ex_next = NULL;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_exports, sizeof(exports));
free(reply);
return 0;
}
nfs_client_t::nfs_client_t()
{
struct service_proc nfs3_pt_a[22] = {
{NFS3_NULL, nfs3_null_proc, (zdrproc_t)zdr_void, 0, this},
{NFS3_GETATTR, nfs3_getattr_proc, (zdrproc_t)zdr_GETATTR3args, sizeof(GETATTR3args), this},
{NFS3_SETATTR, nfs3_setattr_proc, (zdrproc_t)zdr_SETATTR3args, sizeof(SETATTR3args), this},
{NFS3_LOOKUP, nfs3_lookup_proc, (zdrproc_t)zdr_LOOKUP3args, sizeof(LOOKUP3args), this},
{NFS3_ACCESS, nfs3_access_proc, (zdrproc_t)zdr_ACCESS3args, sizeof(ACCESS3args), this},
{NFS3_READLINK, nfs3_readlink_proc, (zdrproc_t)zdr_READLINK3args, sizeof(READLINK3args), this},
{NFS3_READ, nfs3_read_proc, (zdrproc_t)zdr_READ3args, sizeof(READ3args), this},
{NFS3_WRITE, nfs3_write_proc, (zdrproc_t)zdr_WRITE3args, sizeof(WRITE3args), this},
{NFS3_CREATE, nfs3_create_proc, (zdrproc_t)zdr_CREATE3args, sizeof(CREATE3args), this},
{NFS3_MKDIR, nfs3_mkdir_proc, (zdrproc_t)zdr_MKDIR3args, sizeof(MKDIR3args), this},
{NFS3_SYMLINK, nfs3_symlink_proc, (zdrproc_t)zdr_SYMLINK3args, sizeof(SYMLINK3args), this},
{NFS3_MKNOD, nfs3_mknod_proc, (zdrproc_t)zdr_MKNOD3args, sizeof(MKNOD3args), this},
{NFS3_REMOVE, nfs3_remove_proc, (zdrproc_t)zdr_REMOVE3args, sizeof(REMOVE3args), this},
{NFS3_RMDIR, nfs3_rmdir_proc, (zdrproc_t)zdr_RMDIR3args, sizeof(RMDIR3args), this},
{NFS3_RENAME, nfs3_rename_proc, (zdrproc_t)zdr_RENAME3args, sizeof(RENAME3args), this},
{NFS3_LINK, nfs3_link_proc, (zdrproc_t)zdr_LINK3args, sizeof(LINK3args), this},
{NFS3_READDIR, nfs3_readdir_proc, (zdrproc_t)zdr_READDIR3args, sizeof(READDIR3args), this},
{NFS3_READDIRPLUS, nfs3_readdirplus_proc, (zdrproc_t)zdr_READDIRPLUS3args, sizeof(READDIRPLUS3args), this},
{NFS3_FSSTAT, nfs3_fsstat_proc, (zdrproc_t)zdr_FSSTAT3args, sizeof(FSSTAT3args), this},
{NFS3_FSINFO, nfs3_fsinfo_proc, (zdrproc_t)zdr_FSINFO3args, sizeof(FSINFO3args), this},
{NFS3_PATHCONF, nfs3_pathconf_proc, (zdrproc_t)zdr_PATHCONF3args, sizeof(PATHCONF3args), this},
{NFS3_COMMIT, nfs3_commit_proc, (zdrproc_t)zdr_COMMIT3args, sizeof(COMMIT3args), this},
};
for (int i = 0; i < sizeof(nfs3_pt_a)/sizeof(service_proc); i++)
{
nfs3_pt.push_back(nfs3_pt_a[i]);
}
struct service_proc nfs3_mount_pt_a[6] = {
{MOUNT3_NULL, nfs3_null_proc, (zdrproc_t)zdr_void, 0, this},
{MOUNT3_MNT, mount3_mnt_proc, (zdrproc_t)zdr_dirpath, sizeof(dirpath), this},
{MOUNT3_DUMP, mount3_dump_proc, (zdrproc_t)zdr_void, 0, this},
{MOUNT3_UMNT, mount3_umnt_proc, (zdrproc_t)zdr_dirpath, sizeof(dirpath), this},
{MOUNT3_UMNTALL, mount3_umntall_proc, (zdrproc_t)zdr_void, 0, this},
{MOUNT3_EXPORT, mount3_export_proc, (zdrproc_t)zdr_void, 0, this},
};
for (int i = 0; i < sizeof(nfs3_mount_pt_a)/sizeof(service_proc); i++)
{
nfs3_mount_pt.push_back(nfs3_mount_pt_a[i]);
}
}
nfs_client_t::~nfs_client_t()
{
if (rpc)
{
rpc_disconnect(rpc, NULL);
rpc_destroy_context(rpc);
}
}

172
src/nfs_portmap.cpp Normal file
View File

@ -0,0 +1,172 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
//
// Portmap service for NFS proxy
#include <netinet/in.h>
#include <string.h>
#include "nfs_portmap.h"
#include "libnfs-raw-portmap.h"
#include "sha256.h"
#include "base64.h"
/*
* The NULL procedure. All protocols/versions must provide a NULL procedure
* as index 0.
* It is used by clients, and rpcinfo, to "ping" a service and verify that
* the service is available and that it does support the indicated version.
*/
static int pmap2_null_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
rpc_send_reply(rpc, call, NULL, (zdrproc_t)zdr_void, 0);
return 0;
}
/*
* v2 GETPORT.
* This is the lookup function for portmapper version 2.
* A client provides program, version and protocol (tcp or udp)
* and portmapper returns which port that service is available on,
* (or 0 if no such program is registered.)
*/
static int pmap2_getport_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
portmap_service_t *self = (portmap_service_t *)opaque;
PMAP2GETPORTargs *args = (PMAP2GETPORTargs *)call->body.cbody.args;
uint32_t port = 0;
auto it = self->reg_ports.lower_bound((portmap_id_t){
.prog = args->prog,
.vers = args->vers,
.udp = args->prot == IPPROTO_UDP,
.ipv6 = false,
});
if (it != self->reg_ports.end() &&
it->prog == args->prog && it->vers == args->vers &&
it->udp == (args->prot == IPPROTO_UDP))
{
port = it->port;
}
rpc_send_reply(rpc, call, &port, (zdrproc_t)zdr_uint32_t, sizeof(uint32_t));
return 0;
}
/*
* v2 DUMP.
* This RPC returns a list of all endpoints that are registered with
* portmapper.
*/
static int pmap2_dump_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
portmap_service_t *self = (portmap_service_t *)opaque;
pmap2_mapping_list *list = new pmap2_mapping_list[self->reg_ports.size()];
int i = 0;
for (auto it = self->reg_ports.begin(); it != self->reg_ports.end(); it++)
{
if (it->ipv6)
continue;
list[i] = {
.map = {
.prog = it->prog,
.vers = it->vers,
.prot = it->udp ? IPPROTO_UDP : IPPROTO_TCP,
.port = it->port,
},
.next = list+i+1,
};
i++;
}
list[i-1].next = NULL;
// Send reply
PMAP2DUMPres reply;
reply.list = list;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PMAP2DUMPres, sizeof(PMAP2DUMPres));
reply.list = NULL;
delete list;
return 0;
}
/*
* v3 GETADDR.
* This is the lookup function for portmapper version 3.
*/
static int pmap3_getaddr_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
portmap_service_t *self = (portmap_service_t *)opaque;
PMAP3GETADDRargs *args = (PMAP3GETADDRargs *)call->body.cbody.args;
portmap_id_t ref = (portmap_id_t){
.prog = args->prog,
.vers = args->vers,
.udp = !strcmp(args->netid, "udp") || !strcmp(args->netid, "udp6"),
.ipv6 = !strcmp(args->netid, "tcp6") || !strcmp(args->netid, "udp6"),
};
auto it = self->reg_ports.lower_bound(ref);
PMAP3GETADDRres reply;
if (it != self->reg_ports.end() &&
it->prog == ref.prog && it->vers == ref.vers &&
it->udp == ref.udp && it->ipv6 == ref.ipv6)
{
reply.addr = (char*)it->addr.c_str();
}
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PMAP3GETADDRres, sizeof(PMAP3GETADDRres));
return 0;
}
/*
* v3 DUMP.
* This RPC returns a list of all endpoints that are registered with
* portmapper.
*/
static int pmap3_dump_proc(struct rpc_context *rpc, struct rpc_msg *call, void *opaque)
{
portmap_service_t *self = (portmap_service_t *)opaque;
pmap3_mapping_list *list = new pmap3_mapping_list[self->reg_ports.size()];
int i = 0;
for (auto it = self->reg_ports.begin(); it != self->reg_ports.end(); it++)
{
list[i] = (pmap3_mapping_list){
.map = {
.prog = it->prog,
.vers = it->vers,
.netid = (char*)(it->ipv6
? (it->udp ? "udp6" : "tcp6")
: (it->udp ? "udp" : "tcp")),
.addr = (char*)it->addr.c_str(), // 0.0.0.0.port
.owner = (char*)it->owner.c_str(),
},
.next = list+i+1,
};
i++;
}
list[i-1].next = NULL;
// Send reply
PMAP3DUMPres reply;
reply.list = list;
rpc_send_reply(rpc, call, &reply, (zdrproc_t)zdr_PMAP3DUMPres, sizeof(PMAP3DUMPres));
reply.list = NULL;
delete list;
return 0;
}
portmap_service_t::portmap_service_t()
{
pmap2_pt.push_back((service_proc){PMAP2_NULL, pmap2_null_proc, (zdrproc_t)zdr_void, 0, this});
pmap2_pt.push_back((service_proc){PMAP2_GETPORT, pmap2_getport_proc, (zdrproc_t)zdr_PMAP2GETPORTargs, sizeof(PMAP2GETPORTargs), this});
pmap2_pt.push_back((service_proc){PMAP2_DUMP, pmap2_dump_proc, (zdrproc_t)zdr_void, 0, this});
pmap3_pt.push_back((service_proc){PMAP3_NULL, pmap2_null_proc, (zdrproc_t)zdr_void, 0, this});
pmap3_pt.push_back((service_proc){PMAP3_GETADDR, pmap3_getaddr_proc, (zdrproc_t)zdr_PMAP3GETADDRargs, sizeof(PMAP3GETADDRargs), this});
pmap3_pt.push_back((service_proc){PMAP3_DUMP, pmap3_dump_proc, (zdrproc_t)zdr_void, 0, this});
}
std::string sha256(const std::string & str)
{
std::string hash;
hash.resize(32);
SHA256_CTX ctx;
sha256_init(&ctx);
sha256_update(&ctx, (uint8_t*)str.data(), str.size());
sha256_final(&ctx, (uint8_t*)hash.data());
return hash;
}

41
src/nfs_portmap.h Normal file
View File

@ -0,0 +1,41 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
//
// Portmap service for NFS proxy
#pragma once
#include <string>
#include <set>
#include <vector>
#include "nfsc/libnfs.h"
#include "nfsc/libnfs-raw.h"
struct portmap_id_t
{
unsigned prog, vers;
bool udp;
bool ipv6;
unsigned port;
std::string owner;
std::string addr;
};
class portmap_service_t
{
public:
std::set<portmap_id_t> reg_ports;
std::vector<service_proc> pmap2_pt;
std::vector<service_proc> pmap3_pt;
portmap_service_t();
};
inline bool operator < (const portmap_id_t &a, const portmap_id_t &b)
{
return a.prog < b.prog || a.prog == b.prog && a.vers < b.vers ||
a.prog == b.prog && a.vers == b.vers && a.udp < b.udp ||
a.prog == b.prog && a.vers == b.vers && a.udp == b.udp && a.ipv6 < b.ipv6;
}
std::string sha256(const std::string & str);

301
src/nfs_proxy.cpp Normal file
View File

@ -0,0 +1,301 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
//
// Simplified NFS proxy
// Presents all images as files, stores small files directly in etcd
// Keeps image list in memory and thus is unsuitable for a lot of files
#include <netinet/tcp.h>
#include <sys/epoll.h>
#include <sys/poll.h>
#include <unistd.h>
#include <fcntl.h>
//#include <signal.h>
#include "libnfs-raw-mount.h"
#include "libnfs-raw-nfs.h"
#include "libnfs-raw-portmap.h"
#include "addr_util.h"
#include "base64.h"
#include "nfs_proxy.h"
const char *exe_name = NULL;
nfs_proxy_t::~nfs_proxy_t()
{
if (cli)
delete cli;
if (epmgr)
delete epmgr;
if (ringloop)
delete ringloop;
}
json11::Json::object nfs_proxy_t::parse_args(int narg, const char *args[])
{
json11::Json::object cfg;
for (int i = 1; i < narg; i++)
{
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
{
printf(
"Vitastor NFS 3.0 proxy\n"
"(c) Vitaliy Filippov, 2021-2022 (VNPL-1.1)\n\n"
"USAGE:\n"
" %s [--etcd_address ADDR] [OTHER OPTIONS]\n",
exe_name
);
exit(0);
}
else if (args[i][0] == '-' && args[i][1] == '-')
{
const char *opt = args[i]+2;
cfg[opt] = !strcmp(opt, "json") || i == narg-1 ? "1" : args[++i];
}
}
return cfg;
}
void nfs_proxy_t::run(json11::Json cfg)
{
bind_address = cfg["bind_address"].string_value();
if (bind_address == "")
bind_address = "0.0.0.0";
// Create client
ringloop = new ring_loop_t(512);
epmgr = new epoll_manager_t(ringloop);
cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
// We need inode name hashes for NFS handles to remain stateless and <= 64 bytes long
dir_mod_rev[""] = 0;
dir_ids[""] = 1;
assert(cli->st_cli.on_inode_change_hook == NULL);
cli->st_cli.on_inode_change_hook = [this](inode_t changed_inode, bool removed)
{
if (removed)
{
auto ino_it = hash_by_inode.find(changed_inode);
if (ino_it != hash_by_inode.end())
{
inode_by_hash.erase(ino_it->second);
hash_by_inode.erase(ino_it);
}
// FIXME also calculate dir_mod_rev
}
else
{
auto & inode_cfg = cli->st_cli.inode_config.at(changed_inode);
std::string name = inode_cfg.name;
if (name_prefix != "")
{
if (name.substr(0, name_prefix.size()) != name_prefix)
return;
name = name.substr(name_prefix.size());
}
dir_mod_rev[""] = dir_mod_rev[""] < inode_cfg.mod_revision ? inode_cfg.mod_revision : dir_mod_rev[""];
std::string hash = "S"+base64_encode(sha256(name));
int pos = name.find('/');
while (pos >= 0)
{
std::string dir = name.substr(0, pos);
if (dir_ids.find(dir) == dir_ids.end())
dir_ids[dir] = next_dir_id++;
dir_mod_rev[dir] = dir_mod_rev[dir] < inode_cfg.mod_revision ? inode_cfg.mod_revision : dir_mod_rev[dir];
dir_by_hash["S"+base64_encode(sha256(dir))] = dir;
int next = name.substr(pos+1).find('/');
pos = next < 0 ? -1 : pos+1+next;
}
auto hbi_it = hash_by_inode.find(changed_inode);
if (hbi_it != hash_by_inode.end() && hbi_it->second != hash)
{
// inode had a different name, remove old hash=>inode pointer
inode_by_hash.erase(hbi_it->second);
}
inode_by_hash[hash] = changed_inode;
hash_by_inode[changed_inode] = hash;
}
};
// Load image metadata
while (!cli->is_ready())
{
ringloop->loop();
if (cli->is_ready())
break;
ringloop->wait();
}
// Create portmap socket
int portmap_socket = create_and_bind_socket(bind_address, 111, 128, NULL);
fcntl(portmap_socket, F_SETFL, fcntl(portmap_socket, F_GETFL, 0) | O_NONBLOCK);
// Create NFS socket
int nfs_socket = create_and_bind_socket(bind_address, 2049, 128, NULL);
fcntl(nfs_socket, F_SETFL, fcntl(nfs_socket, F_GETFL, 0) | O_NONBLOCK);
// Self-register portmap and NFS
pmap.reg_ports.insert((portmap_id_t){
.prog = PMAP_PROGRAM,
.vers = PMAP_V2,
.port = 111,
.owner = "portmapper-service",
.addr = "0.0.0.0.0.111",
});
pmap.reg_ports.insert((portmap_id_t){
.prog = PMAP_PROGRAM,
.vers = PMAP_V3,
.port = 111,
.owner = "portmapper-service",
.addr = "0.0.0.0.0.111",
});
pmap.reg_ports.insert((portmap_id_t){
.prog = NFS_PROGRAM,
.vers = NFS_V3,
.port = 2049,
.owner = "nfs-server",
.addr = "0.0.0.0.0.2049",
});
pmap.reg_ports.insert((portmap_id_t){
.prog = MOUNT_PROGRAM,
.vers = MOUNT_V3,
.port = 2049,
.owner = "rpc.mountd",
.addr = "0.0.0.0.0.2049",
});
// Add FDs to epoll
epmgr->tfd->set_fd_handler(portmap_socket, false, [this](int portmap_socket, int epoll_events)
{
if (epoll_events & EPOLLRDHUP)
{
fprintf(stderr, "Listening portmap socket disconnected, exiting\n");
exit(1);
}
else
{
do_accept(portmap_socket);
}
});
epmgr->tfd->set_fd_handler(nfs_socket, false, [this](int nfs_socket, int epoll_events)
{
if (epoll_events & EPOLLRDHUP)
{
fprintf(stderr, "Listening portmap socket disconnected, exiting\n");
exit(1);
}
else
{
do_accept(nfs_socket);
}
});
if (cfg["foreground"].is_null())
{
daemonize();
}
while (true)
{
ringloop->loop();
ringloop->wait();
}
/*// Sync at the end
cluster_op_t *close_sync = new cluster_op_t;
close_sync->opcode = OSD_OP_SYNC;
close_sync->callback = [&stop](cluster_op_t *op)
{
stop = true;
delete op;
};
cli->execute(close_sync);*/
// Destroy the client
delete cli;
delete epmgr;
delete ringloop;
cli = NULL;
epmgr = NULL;
ringloop = NULL;
}
void nfs_proxy_t::do_accept(int listen_fd)
{
struct sockaddr_storage addr;
socklen_t addr_size = sizeof(addr);
int nfs_fd = 0;
while ((nfs_fd = accept(listen_fd, (struct sockaddr *)&addr, &addr_size)) >= 0)
{
fprintf(stderr, "New client %d: connection from %s\n", nfs_fd, addr_to_string(addr).c_str());
fcntl(nfs_fd, F_SETFL, fcntl(nfs_fd, F_GETFL, 0) | O_NONBLOCK);
int one = 1;
setsockopt(nfs_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
auto cli = new nfs_client_t();
cli->parent = this;
cli->nfs_fd = nfs_fd;
cli->rpc = rpc_init_server_context(nfs_fd);
if (!cli->rpc)
{
delete cli;
close(nfs_fd);
fprintf(stderr, "Failed to init libnfs server context\n");
exit(1);
}
// Use both portmap and NFS everywhere
rpc_register_service(cli->rpc, PMAP_PROGRAM, PMAP_V2, pmap.pmap2_pt.data(), pmap.pmap2_pt.size());
rpc_register_service(cli->rpc, PMAP_PROGRAM, PMAP_V3, pmap.pmap3_pt.data(), pmap.pmap3_pt.size());
rpc_register_service(cli->rpc, NFS_PROGRAM, NFS_V3, cli->nfs3_pt.data(), cli->nfs3_pt.size());
rpc_register_service(cli->rpc, MOUNT_PROGRAM, MOUNT_V3, cli->nfs3_mount_pt.data(), cli->nfs3_mount_pt.size());
epmgr->tfd->set_fd_handler(nfs_fd, true, [this, cli](int nfs_fd, int epoll_events)
{
// Handle incoming event
if (epoll_events & EPOLLRDHUP)
{
fprintf(stderr, "Client %d disconnected\n", nfs_fd);
epmgr->tfd->set_fd_handler(cli->nfs_fd, true, NULL);
delete cli;
close(nfs_fd);
return;
}
int revents = 0;
if (epoll_events & EPOLLIN)
revents |= POLLIN;
if (epoll_events & EPOLLOUT)
revents |= POLLOUT;
// Let libnfs process the event
if (rpc_service(cli->rpc, revents) < 0)
{
fprintf(stderr, "libnfs error: %s, disconnecting client %d\n", rpc_get_error(cli->rpc), nfs_fd);
epmgr->tfd->set_fd_handler(cli->nfs_fd, true, NULL);
delete cli;
close(nfs_fd);
return;
}
// FIXME Add/remove events based on rpc_which_events(rpc) ?
});
}
if (nfs_fd < 0 && errno != EAGAIN)
{
fprintf(stderr, "Failed to accept connection: %s\n", strerror(errno));
exit(1);
}
}
void nfs_proxy_t::daemonize()
{
if (fork())
exit(0);
setsid();
if (fork())
exit(0);
if (chdir("/") != 0)
fprintf(stderr, "Warning: Failed to chdir into /\n");
close(0);
close(1);
close(2);
open("/dev/null", O_RDONLY);
open("/dev/null", O_WRONLY);
open("/dev/null", O_WRONLY);
}
int main(int narg, const char *args[])
{
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
exe_name = args[0];
nfs_proxy_t *p = new nfs_proxy_t();
p->run(nfs_proxy_t::parse_args(narg, args));
delete p;
return 0;
}

47
src/nfs_proxy.h Normal file
View File

@ -0,0 +1,47 @@
#pragma once
#include "cluster_client.h"
#include "epoll_manager.h"
#include "nfs_portmap.h"
#include "nfsc/libnfs-raw.h"
class nfs_proxy_t
{
public:
std::string bind_address;
std::string name_prefix;
int fsid = 1;
portmap_service_t pmap;
ring_loop_t *ringloop = NULL;
epoll_manager_t *epmgr = NULL;
cluster_client_t *cli = NULL;
uint64_t next_dir_id = 2;
std::map<std::string, std::string> dir_by_hash;
std::map<std::string, uint64_t> dir_ids;
std::map<std::string, uint64_t> dir_mod_rev;
std::map<inode_t, std::string> hash_by_inode;
std::map<std::string, inode_t> inode_by_hash;
~nfs_proxy_t();
static json11::Json::object parse_args(int narg, const char *args[]);
void run(json11::Json cfg);
void do_accept(int listen_fd);
void daemonize();
};
class nfs_client_t
{
public:
nfs_proxy_t *parent = NULL;
int nfs_fd;
struct rpc_context *rpc = NULL;
std::vector<service_proc> nfs3_pt;
std::vector<service_proc> nfs3_mount_pt;
nfs_client_t();
~nfs_client_t();
};

158
src/sha256.c Normal file
View File

@ -0,0 +1,158 @@
/*********************************************************************
* Filename: sha256.c
* Author: Brad Conte (brad AT bradconte.com)
* Copyright:
* Disclaimer: This code is presented "as is" without any guarantees.
* Details: Implementation of the SHA-256 hashing algorithm.
SHA-256 is one of the three algorithms in the SHA2
specification. The others, SHA-384 and SHA-512, are not
offered in this implementation.
Algorithm specification can be found here:
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf
This implementation uses little endian byte order.
*********************************************************************/
/*************************** HEADER FILES ***************************/
#include <stdlib.h>
#include <memory.h>
#include "sha256.h"
/****************************** MACROS ******************************/
#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
/**************************** VARIABLES *****************************/
static const WORD k[64] = {
0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};
/*********************** FUNCTION DEFINITIONS ***********************/
void sha256_transform(SHA256_CTX *ctx, const BYTE data[])
{
WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
for (i = 0, j = 0; i < 16; ++i, j += 4)
m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);
for ( ; i < 64; ++i)
m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
a = ctx->state[0];
b = ctx->state[1];
c = ctx->state[2];
d = ctx->state[3];
e = ctx->state[4];
f = ctx->state[5];
g = ctx->state[6];
h = ctx->state[7];
for (i = 0; i < 64; ++i) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
ctx->state[0] += a;
ctx->state[1] += b;
ctx->state[2] += c;
ctx->state[3] += d;
ctx->state[4] += e;
ctx->state[5] += f;
ctx->state[6] += g;
ctx->state[7] += h;
}
void sha256_init(SHA256_CTX *ctx)
{
ctx->datalen = 0;
ctx->bitlen = 0;
ctx->state[0] = 0x6a09e667;
ctx->state[1] = 0xbb67ae85;
ctx->state[2] = 0x3c6ef372;
ctx->state[3] = 0xa54ff53a;
ctx->state[4] = 0x510e527f;
ctx->state[5] = 0x9b05688c;
ctx->state[6] = 0x1f83d9ab;
ctx->state[7] = 0x5be0cd19;
}
void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len)
{
WORD i;
for (i = 0; i < len; ++i) {
ctx->data[ctx->datalen] = data[i];
ctx->datalen++;
if (ctx->datalen == 64) {
sha256_transform(ctx, ctx->data);
ctx->bitlen += 512;
ctx->datalen = 0;
}
}
}
void sha256_final(SHA256_CTX *ctx, BYTE hash[])
{
WORD i;
i = ctx->datalen;
// Pad whatever data is left in the buffer.
if (ctx->datalen < 56) {
ctx->data[i++] = 0x80;
while (i < 56)
ctx->data[i++] = 0x00;
}
else {
ctx->data[i++] = 0x80;
while (i < 64)
ctx->data[i++] = 0x00;
sha256_transform(ctx, ctx->data);
memset(ctx->data, 0, 56);
}
// Append to the padding the total message's length in bits and transform.
ctx->bitlen += ctx->datalen * 8;
ctx->data[63] = ctx->bitlen;
ctx->data[62] = ctx->bitlen >> 8;
ctx->data[61] = ctx->bitlen >> 16;
ctx->data[60] = ctx->bitlen >> 24;
ctx->data[59] = ctx->bitlen >> 32;
ctx->data[58] = ctx->bitlen >> 40;
ctx->data[57] = ctx->bitlen >> 48;
ctx->data[56] = ctx->bitlen >> 56;
sha256_transform(ctx, ctx->data);
// Since this implementation uses little endian byte ordering and SHA uses big endian,
// reverse all the bytes when copying the final state to the output hash.
for (i = 0; i < 4; ++i) {
hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff;
hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff;
hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff;
hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff;
hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff;
hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff;
hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff;
hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff;
}
}

41
src/sha256.h Normal file
View File

@ -0,0 +1,41 @@
/*********************************************************************
* Filename: sha256.h
* Author: Brad Conte (brad AT bradconte.com)
* Copyright:
* Disclaimer: This code is presented "as is" without any guarantees.
* Details: Defines the API for the corresponding SHA1 implementation.
*********************************************************************/
#ifndef SHA256_H
#define SHA256_H
/*************************** HEADER FILES ***************************/
#include <stddef.h>
/****************************** MACROS ******************************/
#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest
#ifdef __cplusplus
extern "C" {
#endif
/**************************** DATA TYPES ****************************/
typedef unsigned char BYTE; // 8-bit byte
typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines
typedef struct {
BYTE data[64];
WORD datalen;
unsigned long long bitlen;
WORD state[8];
} SHA256_CTX;
/*********************** FUNCTION DECLARATIONS **********************/
void sha256_init(SHA256_CTX *ctx);
void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len);
void sha256_final(SHA256_CTX *ctx, BYTE hash[]);
#ifdef __cplusplus
};
#endif
#endif // SHA256_H