Implement vectored read support

Read requests can now take vectored responses from the filesystem
implementation and send them to FUSE device via the writev() system call.

This allows file systems to send data without copying it into the
library-provided buffer if the data is already in memory.
geesefs-0-31-3
Vitaliy Filippov 2021-08-25 11:05:53 +03:00 committed by Michael Stapelberg
parent da71c70600
commit c818f6216b
9 changed files with 159 additions and 113 deletions

View File

@ -338,7 +338,7 @@ func (c *Connection) readMessage() (*buffer.InMessage, error) {
// Loop past transient errors. // Loop past transient errors.
for { for {
// Attempt a reaed. // Attempt a read.
err := m.Init(c.dev) err := m.Init(c.dev)
// Special cases: // Special cases:
@ -405,7 +405,7 @@ func (c *Connection) ReadOp() (_ context.Context, op interface{}, _ error) {
// Convert the message to an op. // Convert the message to an op.
outMsg := c.getOutMessage() outMsg := c.getOutMessage()
op, err = convertInMessage(inMsg, outMsg, c.protocol) op, err = convertInMessage(&c.cfg, inMsg, outMsg, c.protocol)
if err != nil { if err != nil {
c.putOutMessage(outMsg) c.putOutMessage(outMsg)
return nil, nil, fmt.Errorf("convertInMessage: %v", err) return nil, nil, fmt.Errorf("convertInMessage: %v", err)
@ -510,10 +510,16 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
noResponse := c.kernelResponse(outMsg, inMsg.Header().Unique, op, opErr) noResponse := c.kernelResponse(outMsg, inMsg.Header().Unique, op, opErr)
if !noResponse { if !noResponse {
err := c.writeMessage(outMsg.Bytes()) var err error
if err != nil && c.errorLogger != nil { if outMsg.Sglist != nil {
c.errorLogger.Printf("writeMessage: %v %v", err, outMsg.Bytes()) _, err = writev(int(c.dev.Fd()), outMsg.Sglist)
} else {
err = c.writeMessage(outMsg.OutHeaderBytes())
} }
if err != nil && c.errorLogger != nil {
c.errorLogger.Printf("writeMessage: %v %v", err, outMsg.OutHeaderBytes())
}
outMsg.Sglist = nil
} }
} }

View File

@ -38,6 +38,7 @@ import (
// //
// The caller is responsible for arranging for the message to be destroyed. // The caller is responsible for arranging for the message to be destroyed.
func convertInMessage( func convertInMessage(
config *MountConfig,
inMsg *buffer.InMessage, inMsg *buffer.InMessage,
outMsg *buffer.OutMessage, outMsg *buffer.OutMessage,
protocol fusekernel.Protocol) (o interface{}, err error) { protocol fusekernel.Protocol) (o interface{}, err error) {
@ -288,20 +289,15 @@ func convertInMessage(
Inode: fuseops.InodeID(inMsg.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
Offset: int64(in.Offset), Offset: int64(in.Offset),
Size: int64(in.Size),
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid}, OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
} }
o = to if !config.UseVectoredRead {
// Use part of the incoming message storage as the read buffer
readSize := int(in.Size) // For vectored zero-copy reads, don't allocate any buffers
p := outMsg.GrowNoZero(readSize) to.Dst = inMsg.GetFree(int(in.Size))
if p == nil {
return nil, fmt.Errorf("Can't grow for %d-byte read", readSize)
} }
o = to
sh := (*reflect.SliceHeader)(unsafe.Pointer(&to.Dst))
sh.Data = uintptr(p)
sh.Len = readSize
sh.Cap = readSize
case fusekernel.OpReaddir: case fusekernel.OpReaddir:
in := (*fusekernel.ReadIn)(inMsg.Consume(fusekernel.ReadInSize(protocol))) in := (*fusekernel.ReadIn)(inMsg.Consume(fusekernel.ReadInSize(protocol)))
@ -318,7 +314,7 @@ func convertInMessage(
o = to o = to
readSize := int(in.Size) readSize := int(in.Size)
p := outMsg.GrowNoZero(readSize) p := outMsg.Grow(readSize)
if p == nil { if p == nil {
return nil, fmt.Errorf("Can't grow for %d-byte read", readSize) return nil, fmt.Errorf("Can't grow for %d-byte read", readSize)
} }
@ -489,15 +485,19 @@ func convertInMessage(
o = to o = to
readSize := int(in.Size) readSize := int(in.Size)
p := outMsg.GrowNoZero(readSize) if readSize > 0 {
if p == nil { p := outMsg.Grow(readSize)
return nil, fmt.Errorf("Can't grow for %d-byte read", readSize) if p == nil {
} return nil, fmt.Errorf("Can't grow for %d-byte read", readSize)
}
sh := (*reflect.SliceHeader)(unsafe.Pointer(&to.Dst)) sh := (*reflect.SliceHeader)(unsafe.Pointer(&to.Dst))
sh.Data = uintptr(p) sh.Data = uintptr(p)
sh.Len = readSize sh.Len = readSize
sh.Cap = readSize sh.Cap = readSize
} else {
to.Dst = nil
}
case fusekernel.OpListxattr: case fusekernel.OpListxattr:
type input fusekernel.ListxattrIn type input fusekernel.ListxattrIn
@ -514,7 +514,7 @@ func convertInMessage(
readSize := int(in.Size) readSize := int(in.Size)
if readSize != 0 { if readSize != 0 {
p := outMsg.GrowNoZero(readSize) p := outMsg.Grow(readSize)
if p == nil { if p == nil {
return nil, fmt.Errorf("Can't grow for %d-byte read", readSize) return nil, fmt.Errorf("Can't grow for %d-byte read", readSize)
} }
@ -718,9 +718,11 @@ func (c *Connection) kernelResponseForOp(
} }
case *fuseops.ReadFileOp: case *fuseops.ReadFileOp:
// convertInMessage already set up the destination buffer to be at the end if o.Dst != nil {
// of the out message. We need only shrink to the right size based on how m.Append(o.Dst)
// much the user read. } else {
m.Append(o.Data...)
}
m.ShrinkTo(buffer.OutMessageHeaderSize + o.BytesRead) m.ShrinkTo(buffer.OutMessageHeaderSize + o.BytesRead)
case *fuseops.WriteFileOp: case *fuseops.WriteFileOp:

View File

@ -95,7 +95,7 @@ func describeRequest(op interface{}) (s string) {
case *fuseops.ReadFileOp: case *fuseops.ReadFileOp:
addComponent("handle %d", typed.Handle) addComponent("handle %d", typed.Handle)
addComponent("offset %d", typed.Offset) addComponent("offset %d", typed.Offset)
addComponent("%d bytes", len(typed.Dst)) addComponent("%d bytes", typed.Size)
case *fuseops.WriteFileOp: case *fuseops.WriteFileOp:
addComponent("handle %d", typed.Handle) addComponent("handle %d", typed.Handle)

View File

@ -637,9 +637,17 @@ type ReadFileOp struct {
// The offset within the file at which to read. // The offset within the file at which to read.
Offset int64 Offset int64
// The size of the read.
Size int64
// The destination buffer, whose length gives the size of the read. // The destination buffer, whose length gives the size of the read.
// For vectored reads, this field is always nil as the buffer is not provided.
Dst []byte Dst []byte
// Set by the file system:
// A list of slices of data to send back to the client for vectored reads.
Data [][]byte
// Set by the file system: the number of bytes read. // Set by the file system: the number of bytes read.
// //
// The FUSE documentation requires that exactly the requested number of bytes // The FUSE documentation requires that exactly the requested number of bytes

View File

@ -42,6 +42,7 @@ func init() {
type InMessage struct { type InMessage struct {
remaining []byte remaining []byte
storage []byte storage []byte
size int
} }
// NewInMessage creates a new InMessage with its storage initialized. // NewInMessage creates a new InMessage with its storage initialized.
@ -66,6 +67,7 @@ func (m *InMessage) Init(r io.Reader) error {
return fmt.Errorf("Unexpectedly read only %d bytes.", n) return fmt.Errorf("Unexpectedly read only %d bytes.", n)
} }
m.size = n
m.remaining = m.storage[headerSize:n] m.remaining = m.storage[headerSize:n]
// Check the header's length. // Check the header's length.
@ -114,3 +116,11 @@ func (m *InMessage) ConsumeBytes(n uintptr) []byte {
return b return b
} }
// Get the next n bytes after the message to use them as a temporary buffer
func (m *InMessage) GetFree(n int) []byte {
if n <= 0 || n > len(m.storage)-m.size {
return nil
}
return m.storage[m.size : m.size+n]
}

View File

@ -16,7 +16,6 @@ package buffer
import ( import (
"fmt" "fmt"
"log"
"reflect" "reflect"
"unsafe" "unsafe"
@ -33,30 +32,15 @@ const OutMessageHeaderSize = int(unsafe.Sizeof(fusekernel.OutHeader{}))
// //
// Must be initialized with Reset. // Must be initialized with Reset.
type OutMessage struct { type OutMessage struct {
// The offset into payload to which we're currently writing. header fusekernel.OutHeader
payloadOffset int Sglist [][]byte
header fusekernel.OutHeader
payload [MaxReadSize]byte
}
// Make sure that the header and payload are contiguous.
func init() {
a := unsafe.Offsetof(OutMessage{}.header) + uintptr(OutMessageHeaderSize)
b := unsafe.Offsetof(OutMessage{}.payload)
if a != b {
log.Panicf(
"header ends at offset %d, but payload starts at offset %d",
a, b)
}
} }
// Reset resets m so that it's ready to be used again. Afterward, the contents // Reset resets m so that it's ready to be used again. Afterward, the contents
// are solely a zeroed fusekernel.OutHeader struct. // are solely a zeroed fusekernel.OutHeader struct.
func (m *OutMessage) Reset() { func (m *OutMessage) Reset() {
m.payloadOffset = 0
m.header = fusekernel.OutHeader{} m.header = fusekernel.OutHeader{}
m.Sglist = nil
} }
// OutHeader returns a pointer to the header at the start of the message. // OutHeader returns a pointer to the header at the start of the message.
@ -64,30 +48,12 @@ func (m *OutMessage) OutHeader() *fusekernel.OutHeader {
return &m.header return &m.header
} }
// Grow grows m's buffer by the given number of bytes, returning a pointer to // Grow adds a new buffer of <n> bytes to the message, returning a pointer to
// the start of the new segment, which is guaranteed to be zeroed. If there is // the start of the new segment, which is guaranteed to be zeroed.
// insufficient space, it returns nil.
func (m *OutMessage) Grow(n int) unsafe.Pointer { func (m *OutMessage) Grow(n int) unsafe.Pointer {
p := m.GrowNoZero(n) b := make([]byte, n)
if p != nil { m.Append(b)
jacobsa_fuse_memclr(p, uintptr(n)) p := unsafe.Pointer(&b[0])
}
return p
}
// GrowNoZero is equivalent to Grow, except the new segment is not zeroed. Use
// with caution!
func (m *OutMessage) GrowNoZero(n int) unsafe.Pointer {
// Will we overflow the buffer?
o := m.payloadOffset
if len(m.payload)-o < n {
return nil
}
p := unsafe.Pointer(uintptr(unsafe.Pointer(&m.payload)) + uintptr(o))
m.payloadOffset = o + n
return p return p
} }
@ -100,51 +66,62 @@ func (m *OutMessage) ShrinkTo(n int) {
n, n,
m.Len())) m.Len()))
} }
if n == OutMessageHeaderSize {
m.payloadOffset = n - OutMessageHeaderSize m.Sglist = nil
} else {
i := 1
n -= OutMessageHeaderSize
for len(m.Sglist) > i && n >= len(m.Sglist[i]) {
n -= len(m.Sglist[i])
i++
}
if n > 0 {
m.Sglist[i] = m.Sglist[i][0:n]
i++
}
m.Sglist = m.Sglist[0:i]
}
} }
// Append is equivalent to growing by len(src), then copying src over the new // Append is equivalent to growing by len(src), then copying src over the new
// segment. Int panics if there is not enough room available. // segment. Int panics if there is not enough room available.
func (m *OutMessage) Append(src []byte) { func (m *OutMessage) Append(src ...[]byte) {
p := m.GrowNoZero(len(src)) if m.Sglist == nil {
if p == nil { // First element of Sglist is pre-filled with a pointer to the header
panic(fmt.Sprintf("Can't grow %d bytes", len(src))) // to allow sending it with a single writev() call without copying the
// slice again
m.Sglist = append(m.Sglist, m.OutHeaderBytes())
} }
m.Sglist = append(m.Sglist, src...)
sh := (*reflect.SliceHeader)(unsafe.Pointer(&src))
jacobsa_fuse_memmove(p, unsafe.Pointer(sh.Data), uintptr(sh.Len))
return return
} }
// AppendString is like Append, but accepts string input. // AppendString is like Append, but accepts string input.
func (m *OutMessage) AppendString(src string) { func (m *OutMessage) AppendString(src string) {
p := m.GrowNoZero(len(src)) m.Append([]byte(src))
if p == nil {
panic(fmt.Sprintf("Can't grow %d bytes", len(src)))
}
sh := (*reflect.StringHeader)(unsafe.Pointer(&src))
jacobsa_fuse_memmove(p, unsafe.Pointer(sh.Data), uintptr(sh.Len))
return return
} }
// Len returns the current size of the message, including the leading header. // Len returns the current size of the message, including the leading header.
func (m *OutMessage) Len() int { func (m *OutMessage) Len() int {
return OutMessageHeaderSize + m.payloadOffset if m.Sglist == nil {
return OutMessageHeaderSize
}
// First element of Sglist is the header, so we don't need to count it here
r := 0
for _, b := range m.Sglist {
r += len(b)
}
return r
} }
// Bytes returns a reference to the current contents of the buffer, including // OutHeaderBytes returns a byte slice containing the current header.
// the leading header. func (m *OutMessage) OutHeaderBytes() []byte {
func (m *OutMessage) Bytes() []byte { l := OutMessageHeaderSize
l := m.Len()
sh := reflect.SliceHeader{ sh := reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(&m.header)), Data: uintptr(unsafe.Pointer(&m.header)),
Len: l, Len: l,
Cap: l, Cap: l,
} }
return *(*[]byte)(unsafe.Pointer(&sh)) return *(*[]byte)(unsafe.Pointer(&sh))
} }

View File

@ -107,9 +107,12 @@ func TestOutMessageAppend(t *testing.T) {
t.Errorf("om.Len() = %d, want %d", got, want) t.Errorf("om.Len() = %d, want %d", got, want)
} }
b := om.Bytes() b := []byte(nil)
for i := 0; i < len(om.Sglist); i++ {
b = append(b, om.Sglist[i]...)
}
if got, want := len(b), wantLen; got != want { if got, want := len(b), wantLen; got != want {
t.Fatalf("len(om.Bytes()) = %d, want %d", got, want) t.Fatalf("len(om.OutHeaderBytes()) = %d, want %d", got, want)
} }
want := append( want := append(
@ -137,9 +140,12 @@ func TestOutMessageAppendString(t *testing.T) {
t.Errorf("om.Len() = %d, want %d", got, want) t.Errorf("om.Len() = %d, want %d", got, want)
} }
b := om.Bytes() b := []byte(nil)
for i := 0; i < len(om.Sglist); i++ {
b = append(b, om.Sglist[i]...)
}
if got, want := len(b), wantLen; got != want { if got, want := len(b), wantLen; got != want {
t.Fatalf("len(om.Bytes()) = %d, want %d", got, want) t.Fatalf("len(om.OutHeaderBytes()) = %d, want %d", got, want)
} }
want := append( want := append(
@ -168,9 +174,12 @@ func TestOutMessageShrinkTo(t *testing.T) {
t.Errorf("om.Len() = %d, want %d", got, want) t.Errorf("om.Len() = %d, want %d", got, want)
} }
b := om.Bytes() b := []byte(nil)
for i := 0; i < len(om.Sglist); i++ {
b = append(b, om.Sglist[i]...)
}
if got, want := len(b), wantLen; got != want { if got, want := len(b), wantLen; got != want {
t.Fatalf("len(om.Bytes()) = %d, want %d", got, want) t.Fatalf("len(om.OutHeaderBytes()) = %d, want %d", got, want)
} }
want := append( want := append(
@ -201,7 +210,7 @@ func TestOutMessageHeader(t *testing.T) {
*h = want *h = want
// Check that the result is as expected. // Check that the result is as expected.
b := om.Bytes() b := om.OutHeaderBytes()
if len(b) != int(unsafe.Sizeof(want)) { if len(b) != int(unsafe.Sizeof(want)) {
t.Fatalf("unexpected length %d; want %d", len(b), unsafe.Sizeof(want)) t.Fatalf("unexpected length %d; want %d", len(b), unsafe.Sizeof(want))
} }
@ -225,9 +234,7 @@ func TestOutMessageReset(t *testing.T) {
} }
// Ensure a non-zero payload length. // Ensure a non-zero payload length.
if p := om.GrowNoZero(128); p == nil { p := om.Grow(128)
t.Fatal("GrowNoZero failed")
}
// Reset. // Reset.
om.Reset() om.Reset()
@ -259,10 +266,7 @@ func TestOutMessageGrow(t *testing.T) {
// Set up garbage where the payload will soon be. // Set up garbage where the payload will soon be.
const payloadSize = 1234 const payloadSize = 1234
{ {
p := om.GrowNoZero(payloadSize) p := om.Grow(payloadSize)
if p == nil {
t.Fatal("GrowNoZero failed")
}
err := fillWithGarbage(p, payloadSize) err := fillWithGarbage(p, payloadSize)
if err != nil { if err != nil {
@ -283,7 +287,10 @@ func TestOutMessageGrow(t *testing.T) {
t.Errorf("om.Len() = %d, want %d", got, want) t.Errorf("om.Len() = %d, want %d", got, want)
} }
b := om.Bytes() b := []byte(nil)
for i := 0; i < len(om.Sglist); i++ {
b = append(b, om.Sglist[i]...)
}
if got, want := len(b), wantLen; got != want { if got, want := len(b), wantLen; got != want {
t.Fatalf("len(om.Len()) = %d, want %d", got, want) t.Fatalf("len(om.Len()) = %d, want %d", got, want)
} }
@ -304,7 +311,7 @@ func BenchmarkOutMessageReset(b *testing.B) {
om.Reset() om.Reset()
} }
b.SetBytes(int64(unsafe.Offsetof(om.payload))) b.SetBytes(int64(om.Len()))
}) })
// Many megabytes worth of buffers, which should defeat the CPU cache. // Many megabytes worth of buffers, which should defeat the CPU cache.
@ -321,7 +328,7 @@ func BenchmarkOutMessageReset(b *testing.B) {
oms[i%numMessages].Reset() oms[i%numMessages].Reset()
} }
b.SetBytes(int64(unsafe.Offsetof(oms[0].payload))) b.SetBytes(int64(oms[0].Len()))
}) })
} }

View File

@ -156,6 +156,13 @@ type MountConfig struct {
// actually utilise any form of qualifiable UNIX permissions. // actually utilise any form of qualifiable UNIX permissions.
DisableDefaultPermissions bool DisableDefaultPermissions bool
// Use vectored reads.
// Vectored read allows file systems to avoid memory copying overhead if
// the data is already in memory when they return it to FUSE.
// When turned on, ReadFileOp.Dst is always nil and the FS must return data
// being read from the file as a list of slices in ReadFileOp.Data.
UseVectoredRead bool
// OS X only. // OS X only.
// //
// The name of the mounted volume, as displayed in the Finder. If empty, a // The name of the mounted volume, as displayed in the Finder. If empty, a

29
writev.go Normal file
View File

@ -0,0 +1,29 @@
package fuse
import (
"syscall"
"unsafe"
)
func writev(fd int, packet [][]byte) (n int, err error) {
iovecs := make([]syscall.Iovec, 0, len(packet))
for _, v := range packet {
if len(v) == 0 {
continue
}
vec := syscall.Iovec{
Base: &v[0],
}
vec.SetLen(len(v))
iovecs = append(iovecs, vec)
}
n1, _, e1 := syscall.Syscall(
syscall.SYS_WRITEV,
uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs)),
)
n = int(n1)
if e1 != 0 {
err = syscall.Errno(e1)
}
return
}