Use VPTERNLOGD on GOAMD64=v4 (#182)

* Use VPTERNLOGD on GOAMD64=v4
* Bump to Go 1.18
master
Klaus Post 2022-03-16 03:10:29 -07:00 committed by GitHub
parent 8e17d64e52
commit daf81ef0bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 58767 additions and 13771 deletions

View File

@ -11,7 +11,7 @@ jobs:
build:
strategy:
matrix:
go-version: [1.15.x, 1.16.x, 1.17.x]
go-version: [1.16.x, 1.17.x, 1.18.x]
os: [ubuntu-latest, macos-latest, windows-latest]
env:
CGO_ENABLED: 0
@ -47,7 +47,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.17.x
go-version: 1.18.x
- name: Checkout code
uses: actions/checkout@v2
@ -86,3 +86,5 @@ jobs:
CGO_ENABLED: 1
run: go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
- name: Test Microarch v4
run: go run testlevel.go 4;if [ $? -eq 0 ]; then GOAMD64=v4 go test -no-avx512 ./...; else true; fi

View File

@ -1,65 +0,0 @@
language: go
os:
- linux
- osx
- windows
arch:
- amd64
- arm64
- ppc64le
- s390x
go:
- 1.14.x
- 1.15.x
- 1.16.x
- master
env:
- GO111MODULE=off CGO_ENABLED=0
install:
- go get ./...
script:
- go vet ./...
- go test -cpu=1,2 .
- go test -tags=noasm -cpu=1,2 .
- go build examples/simple-decoder.go
- go build examples/simple-encoder.go
- go build examples/stream-decoder.go
- go build examples/stream-encoder.go
jobs:
allow_failures:
- go: 'master'
- arch: s390x
fast_finish: true
include:
- stage: other
go: 1.16.x
os: linux
arch: amd64
script:
- diff <(gofmt -d .) <(printf "")
- diff <(gofmt -d ./examples) <(printf "")
- go get github.com/klauspost/asmfmt&&go install github.com/klauspost/asmfmt/cmd/asmfmt
- diff <(asmfmt -d .) <(printf "")
- CGO_ENABLED=1 go test -cpu=1 -short -race .
- CGO_ENABLED=1 go test -cpu=2 -short -race .
- CGO_ENABLED=1 go test -tags=noasm -cpu=1 -short -race .
- CGO_ENABLED=1 go test -tags=noasm -cpu=4 -short -race .
- CGO_ENABLED=1 go test -no-avx512 -short -race .
- CGO_ENABLED=1 go test -no-avx512 -no-avx2 -short -race .
- CGO_ENABLED=1 go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
- GOOS=linux GOARCH=386 go test -short .
- stage: other
go: 1.15.x
os: linux
arch: amd64
script:
- go test -no-avx512
- go test -no-avx512 -no-avx2
- go test -no-avx512 -no-avx2 -no-ssse3

View File

@ -26,6 +26,7 @@ Using Go modules recommended.
# Changes
## 2021
* Use `GOAMD64=v4` to enable faster AVX2.
* Add progressive shard encoding.
* Wider AVX2 loops
* Limit concurrency on AVX2, since we are likely memory bound.

34
_gen/cleanup.go Normal file
View File

@ -0,0 +1,34 @@
//go:build custom
// +build custom
package main
import (
"bytes"
"flag"
"io/ioutil"
"log"
"os"
"github.com/klauspost/asmfmt"
)
func main() {
flag.Parse()
args := flag.Args()
for _, file := range args {
data, err := ioutil.ReadFile(file)
if err != nil {
log.Fatalln(err)
}
data = bytes.Replace(data, []byte("\t// #"), []byte("#"), -1)
data, err = asmfmt.Format(bytes.NewBuffer(data))
if err != nil {
log.Fatalln(err)
}
err = ioutil.WriteFile(file, data, os.ModePerm)
if err != nil {
log.Fatalln(err)
}
}
}

View File

@ -4,6 +4,7 @@
//go:generate go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon
//go:generate go fmt ../galois_gen_switch_amd64.go
//go:generate go fmt ../galois_gen_amd64.go
//go:generate go run cleanup.go ../galois_gen_amd64.s
package main
@ -120,6 +121,17 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
Generate()
}
// VPXOR3way will 3-way xor a and b and dst.
func VPXOR3way(a, b, dst reg.VecVirtual) {
Comment("#ifdef GOAMD64_v4")
// AVX512F and AVX512VL required
VPTERNLOGD(U8(0x96), a, b, dst)
Comment("#else")
VPXOR(a, dst, dst) // dst = a^dst
VPXOR(b, dst, dst) // dst = (a^dst)^b
Comment("#endif")
}
func genMulAvx2(name string, inputs int, outputs int, xor bool) {
const perLoopBits = 5
const perLoop = 1 << perLoopBits
@ -342,8 +354,7 @@ func genMulAvx2(name string, inputs int, outputs int, xor bool) {
// We don't have any existing data, write directly.
VPXOR(lookLow, lookHigh, dst[j])
} else {
VPXOR(lookLow, lookHigh, lookLow)
VPXOR(lookLow, dst[j], dst[j])
VPXOR3way(lookLow, lookHigh, dst[j])
}
}
}
@ -587,9 +598,9 @@ func genMulAvx2Sixty64(name string, inputs int, outputs int, xor bool) {
VMOVDQU(Mem{Base: matrixBase, Disp: 64 * (i*outputs + j)}, lookLow)
VMOVDQU(Mem{Base: matrixBase, Disp: 32 + 64*(i*outputs+j)}, lookHigh)
VPSHUFB(in2Low, lookLow, lookLow2)
VPSHUFB(inLow, lookLow, lookLow)
VPSHUFB(inLow, lookLow, lookLow) // Reuse lookLow to save a reg
VPSHUFB(in2High, lookHigh, lookHigh2)
VPSHUFB(inHigh, lookHigh, lookHigh)
VPSHUFB(inHigh, lookHigh, lookHigh) // Reuse lookHigh to save a reg
} else {
VPSHUFB(inLow, inLo[i*outputs+j], lookLow)
VPSHUFB(in2Low, inLo[i*outputs+j], lookLow2)
@ -601,10 +612,8 @@ func genMulAvx2Sixty64(name string, inputs int, outputs int, xor bool) {
VPXOR(lookLow, lookHigh, dst[j])
VPXOR(lookLow2, lookHigh2, dst2[j])
} else {
VPXOR(lookLow, lookHigh, lookLow)
VPXOR(lookLow2, lookHigh2, lookLow2)
VPXOR(lookLow, dst[j], dst[j])
VPXOR(lookLow2, dst2[j], dst2[j])
VPXOR3way(lookLow, lookHigh, dst[j])
VPXOR3way(lookLow2, lookHigh2, dst2[j])
}
}
}

View File

@ -1,5 +1,8 @@
module github.com/klauspost/reedsolomon/_gen
go 1.14
go 1.16
require github.com/mmcloughlin/avo v0.2.0
require (
github.com/klauspost/asmfmt v1.3.1
github.com/mmcloughlin/avo v0.4.0
)

View File

@ -1,29 +1,32 @@
github.com/mmcloughlin/avo v0.2.0 h1:6vhoSaKtxb6f4RiH+LK2qL6GSMpFzhEwJYTTSZNy09w=
github.com/mmcloughlin/avo v0.2.0/go.mod h1:5tidO2Z9Z7N6X7UMcGg+1KTj51O8OxYDCMHxCZTVpEA=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/arch v0.0.0-20210405154355-08b684f594a5/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4=
github.com/klauspost/asmfmt v1.3.1 h1:7xZi1N7s9gTLbqiM8KUv8TLyysavbTRGBT5/ly0bRtw=
github.com/klauspost/asmfmt v1.3.1/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
github.com/mmcloughlin/avo v0.4.0 h1:jeHDRktVD+578ULxWpQHkilor6pkdLF7u7EiTzDbfcU=
github.com/mmcloughlin/avo v0.4.0/go.mod h1:RW9BfYA3TgO9uCdNrKU2h6J8cPD8ZLznvfgHAeszb1s=
github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57 h1:F5Gozwx4I1xtr/sr/8CFbb57iKi3297KFs0QDbGN60A=
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211030160813-b3129d9d1021 h1:giLT+HuUP/gXYrG2Plg9WTjj4qhfgaW424ZIFog3rlk=
golang.org/x/sys v0.0.0-20211030160813-b3129d9d1021/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.0 h1:po9/4sTYwZU9lPhi1tOrb4hCv3qrhiQ77LZfGa2OjwY=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.7 h1:6j8CgantCy3yc8JGBqkDLMKWqZ0RDU2g1HVgacojGWQ=
golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=

View File

@ -1,20 +0,0 @@
os: Visual Studio 2015
platform: x64
clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
# environment variables
environment:
GOPATH: c:\gopath
install:
- echo %PATH%
- echo %GOPATH%
- go version
- go env
- go get -d ./...
build_script:
- go test -v -cpu=2 ./...
- go test -cpu=1,2,4 -short -race ./...

View File

@ -104,7 +104,7 @@ func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[
// Invoke AVX512 routine for single output row in parallel
func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) {
done := stop - start
if done <= 0 {
if done <= 0 || len(in) == 0 || len(out) == 0 {
return
}
@ -139,7 +139,7 @@ func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset,
// Invoke AVX512 routine for 2 output rows in parallel
func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) {
done := stop - start
if done <= 0 {
if done <= 0 || len(in) == 0 || len(out) == 0 {
return
}
@ -174,7 +174,7 @@ func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset,
// Invoke AVX512 routine for 4 output rows in parallel
func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) {
done := stop - start
if done <= 0 {
if done <= 0 || len(in) == 0 || len(out) == 0 {
return
}

File diff suppressed because it is too large Load Diff

4
go.mod
View File

@ -1,5 +1,5 @@
module github.com/klauspost/reedsolomon
go 1.14
go 1.15
require github.com/klauspost/cpuid/v2 v2.0.6
require github.com/klauspost/cpuid/v2 v2.0.11

4
go.sum
View File

@ -1,2 +1,2 @@
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.11 h1:i2lw1Pm7Yi/4O6XCSyJWqEHI2MDw2FzUK6o/D21xn2A=
github.com/klauspost/cpuid/v2 v2.0.11/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c=

31
testlevel.go Normal file
View File

@ -0,0 +1,31 @@
//go:build ignore
// +build ignore
package main
import (
"flag"
"log"
"strconv"
"github.com/klauspost/cpuid/v2"
)
func main() {
flag.Parse()
args := flag.Args()
if len(args) != 1 {
log.Fatalln("Supply CPU level 1-4 to test as argument")
}
l, err := strconv.Atoi(args[0])
if err != nil {
log.Fatalln("Unable to parse level:", err)
}
if l < 1 || l > 4 {
log.Fatalln("Supply CPU level 1-4 to test as argument")
}
if cpuid.CPU.X64Level() < l {
// Does os.Exit(1)
log.Fatalln("CPU level not supported")
}
}