Do fast by one multiplication (#130)

When multiplying by one we can use faster math.
master
Klaus Post 2020-05-06 11:14:25 +02:00 committed by GitHub
parent 0e9e10435f
commit cb7a0b5aef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 52 additions and 12 deletions

View File

@ -53,6 +53,10 @@ func galMulSSSE3Xor(low, high, in, out []byte) {
const bigSwitchover = 128 const bigSwitchover = 128
func galMulSlice(c byte, in, out []byte, o *options) { func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
return
}
if o.useAVX2 { if o.useAVX2 {
if len(in) >= bigSwitchover { if len(in) >= bigSwitchover {
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
@ -80,6 +84,11 @@ func galMulSlice(c byte, in, out []byte, o *options) {
} }
func galMulSliceXor(c byte, in, out []byte, o *options) { func galMulSliceXor(c byte, in, out []byte, o *options) {
if c == 1 {
sliceXor(in, out, o)
return
}
if o.useAVX2 { if o.useAVX2 {
if len(in) >= bigSwitchover { if len(in) >= bigSwitchover {
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
@ -107,8 +116,8 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
} }
// slice galois add // slice galois add
func sliceXor(in, out []byte, sse2 bool) { func sliceXor(in, out []byte, o *options) {
if sse2 { if o.useSSE2 {
if len(in) >= bigSwitchover { if len(in) >= bigSwitchover {
sSE2XorSlice_64(in, out) sSE2XorSlice_64(in, out)
done := (len(in) >> 6) << 6 done := (len(in) >> 6) << 6

View File

@ -14,6 +14,10 @@ func galMulNEON(c uint64, in, out []byte)
func galMulXorNEON(c uint64, in, out []byte) func galMulXorNEON(c uint64, in, out []byte)
func galMulSlice(c byte, in, out []byte, o *options) { func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
return
}
var done int var done int
galMulNEON(uint64(c), in, out) galMulNEON(uint64(c), in, out)
done = (len(in) >> 5) << 5 done = (len(in) >> 5) << 5
@ -28,6 +32,10 @@ func galMulSlice(c byte, in, out []byte, o *options) {
} }
func galMulSliceXor(c byte, in, out []byte, o *options) { func galMulSliceXor(c byte, in, out []byte, o *options) {
if c == 1 {
sliceXor(in, out, o)
return
}
var done int var done int
galMulXorNEON(uint64(c), in, out) galMulXorNEON(uint64(c), in, out)
done = (len(in) >> 5) << 5 done = (len(in) >> 5) << 5
@ -42,7 +50,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
} }
// slice galois add // slice galois add
func sliceXor(in, out []byte, sse2 bool) { func sliceXor(in, out []byte, o *options) {
for n, input := range in { for n, input := range in {
out[n] ^= input out[n] ^= input
} }

View File

@ -7,23 +7,33 @@
package reedsolomon package reedsolomon
func galMulSlice(c byte, in, out []byte, o *options) { func galMulSlice(c byte, in, out []byte, o *options) {
mt := mulTable[c][:256]
out = out[:len(in)] out = out[:len(in)]
if c == 1 {
copy(out, in)
return
}
mt := mulTable[c][:256]
for n, input := range in { for n, input := range in {
out[n] = mt[input] out[n] = mt[input]
} }
} }
func galMulSliceXor(c byte, in, out []byte, o *options) { func galMulSliceXor(c byte, in, out []byte, o *options) {
mt := mulTable[c][:256]
out = out[:len(in)] out = out[:len(in)]
if c == 1 {
for n, input := range in {
out[n] ^= input
}
return
}
mt := mulTable[c][:256]
for n, input := range in { for n, input := range in {
out[n] ^= mt[input] out[n] ^= mt[input]
} }
} }
// slice galois add // slice galois add
func sliceXor(in, out []byte, sse2 bool) { func sliceXor(in, out []byte, o *options) {
for n, input := range in { for n, input := range in {
out[n] ^= input out[n] ^= input
} }

View File

@ -32,6 +32,10 @@ func galMulPpcXor(low, high, in, out []byte) {
*/ */
func galMulSlice(c byte, in, out []byte, o *options) { func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
return
}
done := (len(in) >> 4) << 4 done := (len(in) >> 4) << 4
if done > 0 { if done > 0 {
galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
@ -46,6 +50,10 @@ func galMulSlice(c byte, in, out []byte, o *options) {
} }
func galMulSliceXor(c byte, in, out []byte, o *options) { func galMulSliceXor(c byte, in, out []byte, o *options) {
if c == 1 {
sliceXor(in, out, o)
return
}
done := (len(in) >> 4) << 4 done := (len(in) >> 4) << 4
if done > 0 { if done > 0 {
galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
@ -60,7 +68,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
} }
// slice galois add // slice galois add
func sliceXor(in, out []byte, sse2 bool) { func sliceXor(in, out []byte, o *options) {
for n, input := range in { for n, input := range in {
out[n] ^= input out[n] ^= input
} }

View File

@ -190,7 +190,9 @@ func TestSliceGalAdd(t *testing.T) {
for i := range expect { for i := range expect {
expect[i] = in[i] ^ out[i] expect[i] = in[i] ^ out[i]
} }
sliceXor(in, out, false) noSSE2 := defaultOptions
noSSE2.useSSE2 = false
sliceXor(in, out, &noSSE2)
if 0 != bytes.Compare(out, expect) { if 0 != bytes.Compare(out, expect) {
t.Errorf("got %#v, expected %#v", out, expect) t.Errorf("got %#v, expected %#v", out, expect)
} }
@ -198,7 +200,7 @@ func TestSliceGalAdd(t *testing.T) {
for i := range expect { for i := range expect {
expect[i] = in[i] ^ out[i] expect[i] = in[i] ^ out[i]
} }
sliceXor(in, out, true) sliceXor(in, out, &defaultOptions)
if 0 != bytes.Compare(out, expect) { if 0 != bytes.Compare(out, expect) {
t.Errorf("got %#v, expected %#v", out, expect) t.Errorf("got %#v, expected %#v", out, expect)
} }

View File

@ -392,7 +392,7 @@ func (r reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, output
} }
oldin := oldinputs[c] oldin := oldinputs[c]
// oldinputs data will be change // oldinputs data will be change
sliceXor(in, oldin, r.o.useSSE2) sliceXor(in, oldin, &r.o)
for iRow := 0; iRow < outputCount; iRow++ { for iRow := 0; iRow < outputCount; iRow++ {
galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o) galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
} }
@ -419,7 +419,7 @@ func (r reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outpu
} }
oldin := oldinputs[c] oldin := oldinputs[c]
// oldinputs data will be change // oldinputs data will be change
sliceXor(in[start:stop], oldin[start:stop], r.o.useSSE2) sliceXor(in[start:stop], oldin[start:stop], &r.o)
for iRow := 0; iRow < outputCount; iRow++ { for iRow := 0; iRow < outputCount; iRow++ {
galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o) galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
} }

View File

@ -1171,7 +1171,10 @@ func TestCodeSomeShards(t *testing.T) {
} }
func TestStandardMatrices(t *testing.T) { func TestStandardMatrices(t *testing.T) {
t.Skip("Skipping slow matrix check (~2 min)") if testing.Short() || runtime.GOMAXPROCS(0) < 4 {
// Runtime ~15s.
t.Skip("Skipping slow matrix check")
}
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(256 - 1) wg.Add(256 - 1)
for i := 1; i < 256; i++ { for i := 1; i < 256; i++ {