Add an efficient implementation of shard counts up to 65536 (#191)

This is a O(n*log n) implementation of Reed-Solomon
codes, ported from the C++ library https://github.com/catid/leopard.

The implementation is based on the paper

"Novel Polynomial Basis with Fast Fourier Transform
and Its Application to Reed-Solomon Erasure Codes"

Several TODOs are left for future commits:

- Performance optimizations, in particular SIMD and multiple goroutines
- Documentation
- Detailed Tests
- Merging of reedSolomonFF16 and reedSolomon types
- Turn the straight C++ port into more idiomatic Go

This change also bumps one race testing timeout, to ensure adequate
time on CI.
master
Elias Naur 2022-07-21 14:27:10 +02:00 committed by GitHub
parent 6ef9059245
commit 49be604db0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 887 additions and 11 deletions

View File

@ -64,12 +64,12 @@ jobs:
- name: Test Races, noasm, 1 cpu
env:
CGO_ENABLED: 1
run: go test -tags=noasm -cpu=1 -short -race .
run: go test -tags=noasm -cpu=1 -short -race -timeout 20m .
- name: Test Races, noasm, 4 cpu
env:
CGO_ENABLED: 1
run: go test -tags=noasm -cpu=4 -short -race .
run: go test -tags=noasm -cpu=4 -short -race -timeout 20m .
- name: Test Races, no avx512
env:

859
leopard.go Normal file
View File

@ -0,0 +1,859 @@
package reedsolomon
// This is a O(n*log n) implementation of Reed-Solomon
// codes, ported from the C++ library https://github.com/catid/leopard.
//
// The implementation is based on the paper
//
// S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
// "Novel Polynomial Basis with Fast Fourier Transform
// and Its Application to Reed-Solomon Erasure Codes"
// IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
import (
"bytes"
"errors"
"io"
"math/bits"
"sync"
"unsafe"
)
// reedSolomonFF16 is like reedSolomon but for more than 256 total shards.
type reedSolomonFF16 struct {
DataShards int // Number of data shards, should not be modified.
ParityShards int // Number of parity shards, should not be modified.
Shards int // Total number of shards. Calculated, and should not be modified.
o options
}
// newFF16 is like New, but for more than 256 total shards.
func newFF16(dataShards, parityShards int, opts ...Option) (*reedSolomonFF16, error) {
initConstants()
if dataShards <= 0 || parityShards < 0 {
return nil, ErrInvShardNum
}
if dataShards+parityShards > 65536 {
return nil, ErrMaxShardNum
}
r := &reedSolomonFF16{
DataShards: dataShards,
ParityShards: parityShards,
Shards: dataShards + parityShards,
o: defaultOptions,
}
for _, opt := range opts {
opt(&r.o)
}
return r, nil
}
type ffe uint16
const (
bitwidth = 16
order = 1 << bitwidth
modulus = order - 1
polynomial = 0x1002D
)
var (
fftSkew *[modulus]ffe
logWalsh *[order]ffe
)
// Logarithm Tables
var (
logLUT *[order]ffe
expLUT *[order]ffe
)
// Stores the partial products of x * y at offset x + y * 65536
// Repeated accesses from the same y value are faster
var mul16LUTs *[order]mul16LUT
type mul16LUT struct {
LUT [4 * 16]ffe
}
func (r *reedSolomonFF16) Encode(shards [][]byte) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
if err := checkShards(shards, false); err != nil {
return err
}
return r.encode(shards)
}
func (r *reedSolomonFF16) encode(shards [][]byte) error {
shardSize := len(shards[0])
if shardSize%64 != 0 {
return ErrShardSize
}
m := ceilPow2(r.ParityShards)
work := make([][]byte, m*2)
for i := range work {
work[i] = make([]byte, shardSize)
}
mtrunc := m
if r.DataShards < mtrunc {
mtrunc = r.DataShards
}
skewLUT := fftSkew[m-1:]
sh := shards
ifftDITEncoder(
sh[:r.DataShards],
mtrunc,
work,
nil, // No xor output
m,
skewLUT,
&r.o,
)
lastCount := r.DataShards % m
if m >= r.DataShards {
goto skip_body
}
// For sets of m data pieces:
for i := m; i+m <= r.DataShards; i += m {
sh = sh[m:]
skewLUT = skewLUT[m:]
// work <- work xor IFFT(data + i, m, m + i)
ifftDITEncoder(
sh, // data source
m,
work[m:], // temporary workspace
work, // xor destination
m,
skewLUT,
&r.o,
)
}
// Handle final partial set of m pieces:
if lastCount != 0 {
sh = sh[m:]
skewLUT = skewLUT[m:]
// work <- work xor IFFT(data + i, m, m + i)
ifftDITEncoder(
sh, // data source
lastCount,
work[m:], // temporary workspace
work, // xor destination
m,
skewLUT,
&r.o,
)
}
skip_body:
// work <- FFT(work, m, 0)
fftDIT(work, r.ParityShards, m, fftSkew[:], &r.o)
for i, w := range work[:r.ParityShards] {
sh := shards[i+r.DataShards]
if cap(sh) >= shardSize {
sh = append(sh[:0], w...)
} else {
sh = w
}
shards[i+r.DataShards] = sh
}
return nil
}
func (r *reedSolomonFF16) EncodeIdx(dataShard []byte, idx int, parity [][]byte) error {
return errors.New("not implemented")
}
func (r *reedSolomonFF16) Join(dst io.Writer, shards [][]byte, outSize int) error {
return errors.New("not implemented")
}
func (r *reedSolomonFF16) Update(shards [][]byte, newDatashards [][]byte) error {
return errors.New("not implemented")
}
func (r *reedSolomonFF16) Split(data []byte) ([][]byte, error) {
return nil, errors.New("not implemented")
}
func (r *reedSolomonFF16) ReconstructSome(shards [][]byte, required []bool) error {
return r.ReconstructData(shards)
}
func (r *reedSolomonFF16) Reconstruct(shards [][]byte) error {
return r.reconstruct(shards, true)
}
func (r *reedSolomonFF16) ReconstructData(shards [][]byte) error {
return r.reconstruct(shards, false)
}
func (r *reedSolomonFF16) Verify(shards [][]byte) (bool, error) {
if len(shards) != r.Shards {
return false, ErrTooFewShards
}
if err := checkShards(shards, false); err != nil {
return false, err
}
// Re-encode parity shards to temporary storage.
shardSize := len(shards[0])
outputs := make([][]byte, r.Shards)
copy(outputs, shards[:r.DataShards])
for i := r.DataShards; i < r.Shards; i++ {
outputs[i] = make([]byte, shardSize)
}
if err := r.Encode(outputs); err != nil {
return false, err
}
// Compare.
for i := r.DataShards; i < r.Shards; i++ {
if !bytes.Equal(outputs[i], shards[i]) {
return false, nil
}
}
return true, nil
}
func (r *reedSolomonFF16) reconstruct(shards [][]byte, recoverAll bool) error {
if len(shards) != r.Shards {
return ErrTooFewShards
}
if err := checkShards(shards, true); err != nil {
return err
}
shardSize := len(shards[0])
if shardSize%64 != 0 {
return ErrShardSize
}
m := ceilPow2(r.ParityShards)
n := ceilPow2(m + r.DataShards)
// Fill in error locations.
var errLocs [order]ffe
for i := 0; i < r.ParityShards; i++ {
if len(shards[i+r.DataShards]) == 0 {
errLocs[i] = 1
}
}
for i := r.ParityShards; i < m; i++ {
errLocs[i] = 1
}
for i := 0; i < r.DataShards; i++ {
if len(shards[i]) == 0 {
errLocs[i+m] = 1
}
}
// Evaluate error locator polynomial
fwht(errLocs[:], order, m+r.DataShards)
for i := 0; i < order; i++ {
errLocs[i] = ffe((uint(errLocs[i]) * uint(logWalsh[i])) % modulus)
}
fwht(errLocs[:], order, order)
work := make([][]byte, n)
for i := range work {
work[i] = make([]byte, shardSize)
}
// work <- recovery data
for i := 0; i < r.ParityShards; i++ {
if len(shards[i+r.DataShards]) != 0 {
mul(work[i], shards[i+r.DataShards], errLocs[i])
} else {
memclr(work[i])
}
}
for i := r.ParityShards; i < m; i++ {
memclr(work[i])
}
// work <- original data
for i := 0; i < r.DataShards; i++ {
if len(shards[i]) != 0 {
mul(work[m+i], shards[i], errLocs[m+i])
} else {
memclr(work[m+i])
}
}
for i := m + r.DataShards; i < n; i++ {
memclr(work[i])
}
// work <- IFFT(work, n, 0)
ifftDITDecoder(
m+r.DataShards,
work,
n,
fftSkew[:],
&r.o,
)
// work <- FormalDerivative(work, n)
for i := 1; i < n; i++ {
width := ((i ^ (i - 1)) + 1) >> 1
slicesXor(work[i-width:i], work[i:i+width], &r.o)
}
// work <- FFT(work, n, 0) truncated to m + dataShards
outputCount := m + r.DataShards
fftDIT(work, outputCount, n, fftSkew[:], &r.o)
// Reveal erasures
//
// Original = -ErrLocator * FFT( Derivative( IFFT( ErrLocator * ReceivedData ) ) )
// mul_mem(x, y, log_m, ) equals x[] = y[] * log_m
//
// mem layout: [Recovery Data (Power of Two = M)] [Original Data (K)] [Zero Padding out to N]
end := r.DataShards
if recoverAll {
end = r.Shards
}
for i := 0; i < end; i++ {
if len(shards[i]) != 0 {
continue
}
if cap(shards[i]) >= shardSize {
shards[i] = shards[i][:shardSize]
} else {
shards[i] = make([]byte, shardSize)
}
if i >= r.DataShards {
// Parity shard.
mul(shards[i], work[i-r.DataShards], modulus-errLocs[i-r.DataShards])
} else {
// Data shard.
mul(shards[i], work[i+m], modulus-errLocs[i+m])
}
}
return nil
}
// Basic no-frills version for decoder
func ifftDITDecoder(mtrunc int, work [][]byte, m int, skewLUT []ffe, o *options) {
// Decimation in time: Unroll 2 layers at a time
dist := 1
dist4 := 4
for dist4 <= m {
// For each set of dist*4 elements:
for r := 0; r < mtrunc; r += dist4 {
iend := r + dist
log_m01 := skewLUT[iend-1]
log_m02 := skewLUT[iend+dist-1]
log_m23 := skewLUT[iend+dist*2-1]
// For each set of dist elements:
for i := r; i < iend; i++ {
ifftDIT4(work[i:], dist, log_m01, log_m23, log_m02, o)
}
}
dist = dist4
dist4 <<= 2
}
// If there is one layer left:
if dist < m {
// Assuming that dist = m / 2
if dist*2 != m {
panic("internal error")
}
log_m := skewLUT[dist-1]
if log_m == modulus {
slicesXor(work[dist:2*dist], work[:dist], o)
} else {
for i := 0; i < dist; i++ {
ifftDIT2(
work[i],
work[i+dist],
log_m,
o,
)
}
}
}
}
// In-place FFT for encoder and decoder
func fftDIT(work [][]byte, mtrunc, m int, skewLUT []ffe, o *options) {
// Decimation in time: Unroll 2 layers at a time
dist4 := m
dist := m >> 2
for dist != 0 {
// For each set of dist*4 elements:
for r := 0; r < mtrunc; r += dist4 {
iend := r + dist
log_m01 := skewLUT[iend-1]
log_m02 := skewLUT[iend+dist-1]
log_m23 := skewLUT[iend+dist*2-1]
// For each set of dist elements:
for i := r; i < iend; i++ {
fftDIT4(
work[i:],
dist,
log_m01,
log_m23,
log_m02,
o,
)
}
}
dist4 = dist
dist >>= 2
}
// If there is one layer left:
if dist4 == 2 {
for r := 0; r < mtrunc; r += 2 {
log_m := skewLUT[r+1-1]
if log_m == modulus {
sliceXor(work[r], work[r+1], o)
} else {
fftDIT2(work[r], work[r+1], log_m, o)
}
}
}
}
// 4-way butterfly
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
// First layer:
if log_m02 == modulus {
sliceXor(work[0], work[dist*2], o)
sliceXor(work[dist], work[dist*3], o)
} else {
fftDIT2(work[0], work[dist*2], log_m02, o)
fftDIT2(work[dist], work[dist*3], log_m02, o)
}
// Second layer:
if log_m01 == modulus {
sliceXor(work[0], work[dist], o)
} else {
fftDIT2(work[0], work[dist], log_m01, o)
}
if log_m23 == modulus {
sliceXor(work[dist*2], work[dist*3], o)
} else {
fftDIT2(work[dist*2], work[dist*3], log_m23, o)
}
}
// 2-way butterfly
func fftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
refMulAdd(x, y, log_m)
sliceXor(x, y, o)
}
// Unrolled IFFT for encoder
func ifftDITEncoder(data [][]byte, mtrunc int, work [][]byte, xorRes [][]byte, m int, skewLUT []ffe, o *options) {
// I tried rolling the memcpy/memset into the first layer of the FFT and
// found that it only yields a 4% performance improvement, which is not
// worth the extra complexity.
for i := 0; i < mtrunc; i++ {
copy(work[i], data[i])
}
for i := mtrunc; i < m; i++ {
memclr(work[i])
}
// I tried splitting up the first few layers into L3-cache sized blocks but
// found that it only provides about 5% performance boost, which is not
// worth the extra complexity.
// Decimation in time: Unroll 2 layers at a time
dist := 1
dist4 := 4
for dist4 <= m {
// For each set of dist*4 elements:
for r := 0; r < mtrunc; r += dist4 {
iend := r + dist
log_m01 := skewLUT[iend]
log_m02 := skewLUT[iend+dist]
log_m23 := skewLUT[iend+dist*2]
// For each set of dist elements:
for i := r; i < iend; i++ {
ifftDIT4(
work[i:],
dist,
log_m01,
log_m23,
log_m02,
o,
)
}
}
dist = dist4
dist4 <<= 2
// I tried alternating sweeps left->right and right->left to reduce cache misses.
// It provides about 1% performance boost when done for both FFT and IFFT, so it
// does not seem to be worth the extra complexity.
}
// If there is one layer left:
if dist < m {
// Assuming that dist = m / 2
if dist*2 != m {
panic("internal error")
}
logm := skewLUT[dist]
if logm == modulus {
slicesXor(work[dist:dist*2], work[:dist], o)
} else {
for i := 0; i < dist; i++ {
ifftDIT2(work[i], work[i+dist], logm, o)
}
}
}
// I tried unrolling this but it does not provide more than 5% performance
// improvement for 16-bit finite fields, so it's not worth the complexity.
if xorRes != nil {
slicesXor(xorRes[:m], work[:m], o)
}
}
// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
// First layer:
if log_m01 == modulus {
sliceXor(work[0], work[dist], o)
} else {
ifftDIT2(work[0], work[dist], log_m01, o)
}
if log_m23 == modulus {
sliceXor(work[dist*2], work[dist*3], o)
} else {
ifftDIT2(work[dist*2], work[dist*3], log_m23, o)
}
// Second layer:
if log_m02 == modulus {
sliceXor(work[0], work[dist*2], o)
sliceXor(work[dist], work[dist*3], o)
} else {
ifftDIT2(work[0], work[dist*2], log_m02, o)
ifftDIT2(work[dist], work[dist*3], log_m02, o)
}
}
// 2-way butterfly
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
sliceXor(x, y, o)
refMulAdd(x, y, log_m)
}
// Reference version of muladd: x[] ^= y[] * log_m
func refMulAdd(x, y []byte, log_m ffe) {
lut := mul16LUTs[log_m].LUT
for off := 0; off < len(x); off += 64 {
for i := 0; i < 32; i++ {
lo := y[off+i]
hi := y[off+i+32]
prod :=
lut[(lo&15)] ^
lut[(lo>>4)+16] ^
lut[(hi&15)+32] ^
lut[(hi>>4)+48]
x[off+i] ^= byte(prod)
x[off+i+32] ^= byte(prod >> 8)
}
}
}
func memclr(s []byte) {
for i := range s {
s[i] = 0
}
}
// slicesXor calls xor for every slice pair in v1, v2.
func slicesXor(v1, v2 [][]byte, o *options) {
for i, v := range v1 {
sliceXor(v2[i], v, o)
}
}
func mul(x, y []byte, log_m ffe) {
refMul(x, y, log_m)
}
// Reference version of mul: x[] = y[] * log_m
func refMul(x, y []byte, log_m ffe) {
lut := mul16LUTs[log_m].LUT
for off := 0; off < len(x); off += 64 {
for i := 0; i < 32; i++ {
lo := y[off+i]
hi := y[off+i+32]
prod :=
lut[(lo&15)] ^
lut[(lo>>4)+16] ^
lut[(hi&15)+32] ^
lut[(hi>>4)+48]
x[off+i] = byte(prod)
x[off+i+32] = byte(prod >> 8)
}
}
}
// Returns a * Log(b)
func mulLog(a, log_b ffe) ffe {
/*
Note that this operation is not a normal multiplication in a finite
field because the right operand is already a logarithm. This is done
because it moves K table lookups from the Decode() method into the
initialization step that is less performance critical. The LogWalsh[]
table below contains precalculated logarithms so it is easier to do
all the other multiplies in that form as well.
*/
if a == 0 {
return 0
}
return expLUT[addMod(logLUT[a], log_b)]
}
// z = x + y (mod kModulus)
func addMod(a, b ffe) ffe {
sum := uint(a) + uint(b)
// Partial reduction step, allowing for kModulus to be returned
return ffe(sum + sum>>bitwidth)
}
// z = x - y (mod kModulus)
func subMod(a, b ffe) ffe {
dif := uint(a) - uint(b)
// Partial reduction step, allowing for kModulus to be returned
return ffe(dif + dif>>bitwidth)
}
// ceilPow2 returns power of two at or above n.
func ceilPow2(n int) int {
const w = int(unsafe.Sizeof(n) * 8)
return 1 << (w - bits.LeadingZeros(uint(n-1)))
}
// Decimation in time (DIT) Fast Walsh-Hadamard Transform
// Unrolls pairs of layers to perform cross-layer operations in registers
// mtrunc: Number of elements that are non-zero at the front of data
func fwht(data []ffe, m, mtrunc int) {
// Decimation in time: Unroll 2 layers at a time
dist := 1
dist4 := 4
for dist4 <= m {
// For each set of dist*4 elements:
for r := 0; r < mtrunc; r += dist4 {
// For each set of dist elements:
for i := r; i < r+dist; i++ {
fwht4(data[i:], dist)
}
}
dist = dist4
dist4 <<= 2
}
// If there is one layer left:
if dist < m {
for i := 0; i < dist; i++ {
fwht2(&data[i], &data[i+dist])
}
}
}
func fwht4(data []ffe, s int) {
s2 := s << 1
t0 := &data[0]
t1 := &data[s]
t2 := &data[s2]
t3 := &data[s2+s]
fwht2(t0, t1)
fwht2(t2, t3)
fwht2(t0, t2)
fwht2(t1, t3)
}
// {a, b} = {a + b, a - b} (Mod Q)
func fwht2(a, b *ffe) {
sum := addMod(*a, *b)
dif := subMod(*a, *b)
*a = sum
*b = dif
}
var initOnce sync.Once
func initConstants() {
initOnce.Do(func() {
initLUTs()
initFFTSkew()
initMul16LUT()
})
}
// Initialize logLUT, expLUT.
func initLUTs() {
cantorBasis := [bitwidth]ffe{
0x0001, 0xACCA, 0x3C0E, 0x163E,
0xC582, 0xED2E, 0x914C, 0x4012,
0x6C98, 0x10D8, 0x6A72, 0xB900,
0xFDB8, 0xFB34, 0xFF38, 0x991E,
}
expLUT = &[order]ffe{}
logLUT = &[order]ffe{}
// LFSR table generation:
state := 1
for i := ffe(0); i < modulus; i++ {
expLUT[state] = i
state <<= 1
if state >= order {
state ^= polynomial
}
}
expLUT[0] = modulus
// Conversion to Cantor basis:
logLUT[0] = 0
for i := 0; i < bitwidth; i++ {
basis := cantorBasis[i]
width := 1 << i
for j := 0; j < width; j++ {
logLUT[j+width] = logLUT[j] ^ basis
}
}
for i := 0; i < order; i++ {
logLUT[i] = expLUT[logLUT[i]]
}
for i := 0; i < order; i++ {
expLUT[logLUT[i]] = ffe(i)
}
expLUT[modulus] = expLUT[0]
}
// Initialize fftSkew.
func initFFTSkew() {
var temp [bitwidth - 1]ffe
// Generate FFT skew vector {1}:
for i := 1; i < bitwidth; i++ {
temp[i-1] = ffe(1 << i)
}
fftSkew = &[modulus]ffe{}
logWalsh = &[order]ffe{}
for m := 0; m < bitwidth-1; m++ {
step := 1 << (m + 1)
fftSkew[1<<m-1] = 0
for i := m; i < bitwidth-1; i++ {
s := 1 << (i + 1)
for j := 1<<m - 1; j < s; j += step {
fftSkew[j+s] = fftSkew[j] ^ temp[i]
}
}
temp[m] = modulus - logLUT[mulLog(temp[m], logLUT[temp[m]^1])]
for i := m + 1; i < bitwidth-1; i++ {
sum := addMod(logLUT[temp[i]^1], temp[m])
temp[i] = mulLog(temp[i], sum)
}
}
for i := 0; i < modulus; i++ {
fftSkew[i] = logLUT[fftSkew[i]]
}
// Precalculate FWHT(Log[i]):
for i := 0; i < order; i++ {
logWalsh[i] = logLUT[i]
}
logWalsh[0] = 0
fwht(logWalsh[:], order, order)
}
func initMul16LUT() {
mul16LUTs = &[order]mul16LUT{}
// For each log_m multiplicand:
for log_m := 0; log_m < order; log_m++ {
lut := &mul16LUTs[log_m]
for nibble, shift := 0, 0; nibble < 4; {
nibble_lut := lut.LUT[nibble*16:]
for xnibble := 0; xnibble < 16; xnibble++ {
prod := mulLog(ffe(xnibble<<shift), ffe(log_m))
nibble_lut[xnibble] = prod
}
nibble++
shift += 4
}
}
}

View File

@ -270,9 +270,17 @@ func buildXorMatrix(dataShards, totalShards int) (matrix, error) {
// New creates a new encoder and initializes it to
// the number of data shards and parity shards that
// you want to use. You can reuse this encoder.
// Note that the maximum number of total shards is 256.
// Note that the maximum number of total shards is 65536, with some
// restrictions for a total larger than 256:
//
// - Shard sizes must be multiple of 64
// - The methods Join/Split/Update/EncodeIdx are not supported
//
// If no options are supplied, default options are used.
func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
if dataShards+parityShards > 256 {
return newFF16(dataShards, parityShards, opts...)
}
r := reedSolomon{
DataShards: dataShards,
ParityShards: parityShards,
@ -287,10 +295,6 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
return nil, ErrInvShardNum
}
if dataShards+parityShards > 256 {
return nil, ErrMaxShardNum
}
if parityShards == 0 {
return &r, nil
}

View File

@ -191,7 +191,9 @@ func TestEncoding(t *testing.T) {
// note that par1 matric will fail on some combinations.
var testSizes = [][2]int{
{1, 0}, {3, 0}, {5, 0}, {8, 0}, {10, 0}, {12, 0}, {14, 0}, {41, 0}, {49, 0},
{1, 1}, {1, 2}, {3, 3}, {3, 1}, {5, 3}, {8, 4}, {10, 30}, {12, 10}, {14, 7}, {41, 17}, {49, 1}, {5, 20}}
{1, 1}, {1, 2}, {3, 3}, {3, 1}, {5, 3}, {8, 4}, {10, 30}, {12, 10}, {14, 7}, {41, 17}, {49, 1}, {5, 20},
{256, 1},
}
var testDataSizes = []int{10, 100, 1000, 10001, 100003, 1000055}
var testDataSizesShort = []int{10, 10001, 100003}
@ -201,10 +203,14 @@ func testEncoding(t *testing.T, o ...Option) {
rng := rand.New(rand.NewSource(0xabadc0cac01a))
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
sz := testDataSizes
if testing.Short() {
if testing.Short() || data+parity > 256 {
sz = testDataSizesShort
}
for _, perShard := range sz {
if data+parity > 256 {
// Round up to 64 bytes.
perShard = (perShard + 63) &^ 63
}
t.Run(fmt.Sprint(perShard), func(t *testing.T) {
r, err := New(data, parity, testOptions(o...)...)
@ -295,6 +301,9 @@ func testEncodingIdx(t *testing.T, o ...Option) {
data, parity := size[0], size[1]
rng := rand.New(rand.NewSource(0xabadc0cac01a))
t.Run(fmt.Sprintf("%dx%d", data, parity), func(t *testing.T) {
if data+parity > 256 {
t.Skip("EncodingIdx not supported for total shards > 256")
}
sz := testDataSizes
if testing.Short() {
sz = testDataSizesShort
@ -1663,11 +1672,11 @@ func TestNew(t *testing.T) {
{255, 1, nil},
{255, 0, nil},
{1, 0, nil},
{256, 256, ErrMaxShardNum},
{65536, 65536, ErrMaxShardNum},
{0, 1, ErrInvShardNum},
{1, -1, ErrInvShardNum},
{256, 1, ErrMaxShardNum},
{65636, 1, ErrMaxShardNum},
// overflow causes r.Shards to be negative
{256, int(^uint(0) >> 1), errInvalidRowSize},

View File

@ -147,6 +147,10 @@ type rsStream struct {
// you want to use. You can reuse this encoder.
// Note that the maximum number of data shards is 256.
func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
if dataShards+parityShards > 256 {
return nil, ErrMaxShardNum
}
r := rsStream{o: defaultOptions}
for _, opt := range o {
opt(&r.o)