Implement jerasure algorithm of matrix generation for interoperability

2022-08-15 14:27:17 +03:00 · 2022-08-15 14:27:17 +03:00 · b933ef1add
parent 7b7dbe6919
commit b933ef1add
3 changed files with 128 additions and 1 deletions
--- a/options.go
+++ b/options.go
@ -16,6 +16,7 @@ type options struct {
 	perRound      int
 	useAVX512, useAVX2, useSSSE3, useSSE2 bool
 	useJerasureMatrix                     bool
 	usePAR1Matrix                         bool
 	useCauchy                             bool
 	fastOneParity                         bool
@ -163,12 +164,25 @@ func WithAVX512(enabled bool) Option {
 	}
 }
 // WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
 // matrix in the same way as done by the Jerasure library.
 // The first row and column of the coding matrix only contains 1's in this method
 // so the first parity chunk is always equal to XOR of all data chunks.
 func WithJerasureMatrix() Option {
 	return func(o *options) {
 		o.useJerasureMatrix = true
 		o.usePAR1Matrix = false
 		o.useCauchy = false
 	}
 }
 // WithPAR1Matrix causes the encoder to build the matrix how PARv1
 // does. Note that the method they use is buggy, and may lead to cases
 // where recovery is impossible, even if there are enough parity
 // shards.
 func WithPAR1Matrix() Option {
 	return func(o *options) {
 		o.useJerasureMatrix = false
 		o.usePAR1Matrix = true
 		o.useCauchy = false
 	}
@ -180,8 +194,9 @@ func WithPAR1Matrix() Option {
 // but will result in slightly faster start-up time.
 func WithCauchyMatrix() Option {
 	return func(o *options) {
-		o.useCauchy = true
+		o.useJerasureMatrix = false
 		o.usePAR1Matrix = false
 		o.useCauchy = true
 	}
 }
--- a/reedsolomon.go
+++ b/reedsolomon.go
@ -191,6 +191,87 @@ func buildMatrix(dataShards, totalShards int) (matrix, error) {
 	return vm.Multiply(topInv)
 }
 // buildMatrixJerasure creates the same encoding matrix as Jerasure library
 //
 // The top square of the matrix is guaranteed to be an identity
 // matrix, which means that the data shards are unchanged after
 // encoding.
 func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) {
 	// Start with a Vandermonde matrix.  This matrix would work,
 	// in theory, but doesn't have the property that the data
 	// shards are unchanged after encoding.
 	vm, err := vandermonde(totalShards, dataShards)
 	if err != nil {
 		return nil, err
 	}
 	// Jerasure does this:
 	// first row is always 100..00
 	vm[0][0] = 1
 	for i := 1; i < dataShards; i++ {
 		vm[0][i] = 0
 	}
 	// last row is always 000..01
 	for i := 0; i < dataShards-1; i++ {
 		vm[totalShards-1][i] = 0
 	}
 	vm[totalShards-1][dataShards-1] = 1
 	for i := 0; i < dataShards; i++ {
 		// Find the row where i'th col is not 0
 		r := i
 		for ; r < totalShards && vm[r][i] == 0; r++ {
 		}
 		if r != i {
 			// Swap it with i'th row if not already
 			t := vm[r]
 			vm[r] = vm[i]
 			vm[i] = t
 		}
 		// Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert()))
 		if vm[i][i] != 1 {
 			// Make vm[i][i] = 1 by dividing the column by vm[i][i]
 			tmp := galDivide(1, vm[i][i])
 			for j := 0; j < totalShards; j++ {
 				vm[j][i] = galMultiply(vm[j][i], tmp)
 			}
 		}
 		for j := 0; j < dataShards; j++ {
 			// Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column
 			tmp := vm[i][j]
 			if j != i && tmp != 0 {
 				for r := 0; r < totalShards; r++ {
 					vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i]))
 				}
 			}
 		}
 	}
 	// Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j]
 	for j := 0; j < dataShards; j++ {
 		tmp := vm[dataShards][j]
 		if tmp != 1 {
 			tmp = galDivide(1, tmp)
 			for i := dataShards; i < totalShards; i++ {
 				vm[i][j] = galMultiply(vm[i][j], tmp)
 			}
 		}
 	}
 	// Make vm[dataShards...totalShards-1][0] column all ones - divide each row
 	for i := dataShards + 1; i < totalShards; i++ {
 		tmp := vm[i][0]
 		if tmp != 1 {
 			tmp = galDivide(1, tmp)
 			for j := 0; j < dataShards; j++ {
 				vm[i][j] = galMultiply(vm[i][j], tmp)
 			}
 		}
 	}
 	return vm, nil
 }
 // buildMatrixPAR1 creates the matrix to use for encoding according to
 // the PARv1 spec, given the number of data shards and the number of
 // total shards. Note that the method they use is buggy, and may lead
@ -323,6 +404,8 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
 		r.m, err = buildMatrixCauchy(dataShards, r.Shards)
 	case r.o.usePAR1Matrix:
 		r.m, err = buildMatrixPAR1(dataShards, r.Shards)
 	case r.o.useJerasureMatrix:
 		r.m, err = buildMatrixJerasure(dataShards, r.Shards)
 	default:
 		r.m, err = buildMatrix(dataShards, r.Shards)
 	}
--- a/reedsolomon_test.go
+++ b/reedsolomon_test.go
@ -110,6 +110,35 @@ func findSingularSubMatrix(m matrix) (matrix, error) {
 	return nil, nil
 }
 func TestBuildMatrixJerasure(t *testing.T) {
 	totalShards := 12
 	dataShards := 8
 	m, err := buildMatrixJerasure(dataShards, totalShards)
 	if err != nil {
 		t.Fatal(err)
 	}
 	refMatrix := matrix{
 		{1, 1, 1, 1, 1, 1, 1, 1},
 		{1, 55, 39, 73, 84, 181, 225, 217},
 		{1, 39, 217, 161, 92, 60, 172, 90},
 		{1, 172, 70, 235, 143, 34, 200, 101},
 	}
 	for i := 0; i < 8; i++ {
 		for j := 0; j < 8; j++ {
 			if i != j && m[i][j] != 0 || i == j && m[i][j] != 1 {
 				t.Fatal("Top part of the matrix is not identity")
 			}
 		}
 	}
 	for i := 0; i < 4; i++ {
 		for j := 0; j < 8; j++ {
 			if m[8+i][j] != refMatrix[i][j] {
 				t.Fatal("Coding matrix for EC 8+4 differs from Jerasure")
 			}
 		}
 	}
 }
 func TestBuildMatrixPAR1Singular(t *testing.T) {
 	totalShards := 8
 	dataShards := 4