Implement jerasure algorithm of matrix generation for interoperability

jerasure-matrix
Vitaliy Filippov 2022-08-15 14:27:17 +03:00
parent 7b7dbe6919
commit b933ef1add
3 changed files with 128 additions and 1 deletions

View File

@ -16,6 +16,7 @@ type options struct {
perRound int
useAVX512, useAVX2, useSSSE3, useSSE2 bool
useJerasureMatrix bool
usePAR1Matrix bool
useCauchy bool
fastOneParity bool
@ -163,12 +164,25 @@ func WithAVX512(enabled bool) Option {
}
}
// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
// matrix in the same way as done by the Jerasure library.
// The first row and column of the coding matrix only contains 1's in this method
// so the first parity chunk is always equal to XOR of all data chunks.
func WithJerasureMatrix() Option {
return func(o *options) {
o.useJerasureMatrix = true
o.usePAR1Matrix = false
o.useCauchy = false
}
}
// WithPAR1Matrix causes the encoder to build the matrix how PARv1
// does. Note that the method they use is buggy, and may lead to cases
// where recovery is impossible, even if there are enough parity
// shards.
func WithPAR1Matrix() Option {
return func(o *options) {
o.useJerasureMatrix = false
o.usePAR1Matrix = true
o.useCauchy = false
}
@ -180,8 +194,9 @@ func WithPAR1Matrix() Option {
// but will result in slightly faster start-up time.
func WithCauchyMatrix() Option {
return func(o *options) {
o.useCauchy = true
o.useJerasureMatrix = false
o.usePAR1Matrix = false
o.useCauchy = true
}
}

View File

@ -191,6 +191,87 @@ func buildMatrix(dataShards, totalShards int) (matrix, error) {
return vm.Multiply(topInv)
}
// buildMatrixJerasure creates the same encoding matrix as Jerasure library
//
// The top square of the matrix is guaranteed to be an identity
// matrix, which means that the data shards are unchanged after
// encoding.
func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) {
// Start with a Vandermonde matrix. This matrix would work,
// in theory, but doesn't have the property that the data
// shards are unchanged after encoding.
vm, err := vandermonde(totalShards, dataShards)
if err != nil {
return nil, err
}
// Jerasure does this:
// first row is always 100..00
vm[0][0] = 1
for i := 1; i < dataShards; i++ {
vm[0][i] = 0
}
// last row is always 000..01
for i := 0; i < dataShards-1; i++ {
vm[totalShards-1][i] = 0
}
vm[totalShards-1][dataShards-1] = 1
for i := 0; i < dataShards; i++ {
// Find the row where i'th col is not 0
r := i
for ; r < totalShards && vm[r][i] == 0; r++ {
}
if r != i {
// Swap it with i'th row if not already
t := vm[r]
vm[r] = vm[i]
vm[i] = t
}
// Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert()))
if vm[i][i] != 1 {
// Make vm[i][i] = 1 by dividing the column by vm[i][i]
tmp := galDivide(1, vm[i][i])
for j := 0; j < totalShards; j++ {
vm[j][i] = galMultiply(vm[j][i], tmp)
}
}
for j := 0; j < dataShards; j++ {
// Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column
tmp := vm[i][j]
if j != i && tmp != 0 {
for r := 0; r < totalShards; r++ {
vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i]))
}
}
}
}
// Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j]
for j := 0; j < dataShards; j++ {
tmp := vm[dataShards][j]
if tmp != 1 {
tmp = galDivide(1, tmp)
for i := dataShards; i < totalShards; i++ {
vm[i][j] = galMultiply(vm[i][j], tmp)
}
}
}
// Make vm[dataShards...totalShards-1][0] column all ones - divide each row
for i := dataShards + 1; i < totalShards; i++ {
tmp := vm[i][0]
if tmp != 1 {
tmp = galDivide(1, tmp)
for j := 0; j < dataShards; j++ {
vm[i][j] = galMultiply(vm[i][j], tmp)
}
}
}
return vm, nil
}
// buildMatrixPAR1 creates the matrix to use for encoding according to
// the PARv1 spec, given the number of data shards and the number of
// total shards. Note that the method they use is buggy, and may lead
@ -323,6 +404,8 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
r.m, err = buildMatrixCauchy(dataShards, r.Shards)
case r.o.usePAR1Matrix:
r.m, err = buildMatrixPAR1(dataShards, r.Shards)
case r.o.useJerasureMatrix:
r.m, err = buildMatrixJerasure(dataShards, r.Shards)
default:
r.m, err = buildMatrix(dataShards, r.Shards)
}

View File

@ -110,6 +110,35 @@ func findSingularSubMatrix(m matrix) (matrix, error) {
return nil, nil
}
func TestBuildMatrixJerasure(t *testing.T) {
totalShards := 12
dataShards := 8
m, err := buildMatrixJerasure(dataShards, totalShards)
if err != nil {
t.Fatal(err)
}
refMatrix := matrix{
{1, 1, 1, 1, 1, 1, 1, 1},
{1, 55, 39, 73, 84, 181, 225, 217},
{1, 39, 217, 161, 92, 60, 172, 90},
{1, 172, 70, 235, 143, 34, 200, 101},
}
for i := 0; i < 8; i++ {
for j := 0; j < 8; j++ {
if i != j && m[i][j] != 0 || i == j && m[i][j] != 1 {
t.Fatal("Top part of the matrix is not identity")
}
}
}
for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ {
if m[8+i][j] != refMatrix[i][j] {
t.Fatal("Coding matrix for EC 8+4 differs from Jerasure")
}
}
}
}
func TestBuildMatrixPAR1Singular(t *testing.T) {
totalShards := 8
dataShards := 4