remove unused variables from #if SSE blocs
Signed-off-by: Loic Dachary <loic@dachary.org>master
parent
59cb51046b
commit
191b86b5d2
|
@ -90,7 +90,6 @@ int xor)
|
|||
int i;
|
||||
gf_val_128_t s128;
|
||||
gf_val_128_t d128;
|
||||
uint64_t c128[2];
|
||||
gf_region_data rd;
|
||||
__m128i a,b;
|
||||
__m128i result0,result1;
|
||||
|
@ -106,8 +105,6 @@ int xor)
|
|||
if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; }
|
||||
}
|
||||
|
||||
set_zero(c128, 0);
|
||||
|
||||
s128 = (gf_val_128_t) src;
|
||||
d128 = (gf_val_128_t) dest;
|
||||
|
||||
|
@ -384,7 +381,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
|
|||
{
|
||||
#if defined(INTEL_SSE4)
|
||||
int i;
|
||||
__m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
|
||||
__m128i a, b, pp, prod, amask, u_middle_one;
|
||||
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
|
||||
uint32_t topbit, middlebit, pmask; /* this is used as a boolean value */
|
||||
gf_internal_t *h;
|
||||
|
@ -400,7 +397,6 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
|
|||
pmask = 0x80000000;
|
||||
amask = _mm_insert_epi32(prod, 0x80000000, 0x3);
|
||||
u_middle_one = _mm_insert_epi32(prod, 1, 0x2);
|
||||
l_middle_one = _mm_insert_epi32(prod, 1 << 31, 0x1);
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
topbit = (_mm_extract_epi32(prod, 0x3) & pmask);
|
||||
|
@ -605,7 +601,7 @@ void
|
|||
gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int i, m, j, k, tindex;
|
||||
int i, j, k;
|
||||
uint64_t pp, v[2], s, *s64, *d64, *top;
|
||||
__m128i p, tables[32][16];
|
||||
struct gf_w128_split_4_128_data *ld;
|
||||
|
@ -704,8 +700,8 @@ void
|
|||
gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int i, m, j, k, tindex;
|
||||
uint64_t pp, v[2], s, *s64, *d64, *top;
|
||||
int i, j, k;
|
||||
uint64_t pp, v[2], *s64, *d64, *top;
|
||||
__m128i si, tables[32][16], p[16], v0, mask1;
|
||||
struct gf_w128_split_4_128_data *ld;
|
||||
uint8_t btable[16];
|
||||
|
|
20
src/gf_w16.c
20
src/gf_w16.c
|
@ -450,7 +450,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
|
||||
|
@ -497,7 +497,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
|
||||
|
@ -537,7 +537,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
|
||||
|
@ -1001,12 +1001,12 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v
|
|||
{
|
||||
#ifdef INTEL_SSSE3
|
||||
uint64_t i, j, *s64, *d64, *top64;;
|
||||
uint64_t a, c, prod;
|
||||
uint64_t c, prod;
|
||||
uint8_t low[4][16];
|
||||
uint8_t high[4][16];
|
||||
gf_region_data rd;
|
||||
|
||||
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, shuffler, unshuffler, lmask;
|
||||
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, lmask;
|
||||
|
||||
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
|
||||
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
|
||||
|
@ -1138,7 +1138,6 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
|
|||
uint8_t low[4][16];
|
||||
uint8_t high[4][16];
|
||||
gf_region_data rd;
|
||||
struct gf_single_table_data *std;
|
||||
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4];
|
||||
|
||||
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
|
||||
|
@ -1553,7 +1552,6 @@ gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
|
|||
int i;
|
||||
uint8_t *s8, *d8;
|
||||
uint32_t vrev;
|
||||
uint64_t amask;
|
||||
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
|
||||
struct gf_w16_bytwo_data *btd;
|
||||
gf_region_data rd;
|
||||
|
@ -1614,9 +1612,8 @@ static
|
|||
void
|
||||
gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
@ -1640,8 +1637,7 @@ static
|
|||
void
|
||||
gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
|
|
29
src/gf_w32.c
29
src/gf_w32.c
|
@ -414,7 +414,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
|
||||
|
@ -458,7 +458,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
|
||||
|
@ -497,7 +497,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
|
||||
|
@ -991,7 +991,6 @@ gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
|
|||
int i;
|
||||
uint8_t *s8, *d8;
|
||||
uint32_t vrev;
|
||||
uint64_t amask;
|
||||
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
|
||||
struct gf_w32_bytwo_data *btd;
|
||||
gf_region_data rd;
|
||||
|
@ -1182,9 +1181,8 @@ static
|
|||
void
|
||||
gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
@ -1208,8 +1206,7 @@ static
|
|||
void
|
||||
gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
|
@ -1558,8 +1555,8 @@ void
|
|||
gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int i, m, j, tindex;
|
||||
uint32_t pp, v, v2, s, *s32, *d32, *top;
|
||||
int i, tindex;
|
||||
uint32_t pp, v, v2, *s32, *d32, *top;
|
||||
__m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2;
|
||||
gf_region_data rd;
|
||||
|
||||
|
@ -1695,8 +1692,8 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
|
|||
{
|
||||
#ifdef INTEL_SSSE3
|
||||
gf_internal_t *h;
|
||||
int i, m, j, k, tindex;
|
||||
uint32_t pp, v, s, *s32, *d32, *top, *realtop;
|
||||
int i, j, k;
|
||||
uint32_t pp, v, *s32, *d32, *top;
|
||||
__m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3;
|
||||
struct gf_split_4_32_lazy_data *ld;
|
||||
uint8_t btable[16];
|
||||
|
@ -1887,9 +1884,9 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
|
|||
{
|
||||
#ifdef INTEL_SSSE3
|
||||
gf_internal_t *h;
|
||||
int i, m, j, k, tindex;
|
||||
uint32_t pp, v, s, *s32, *d32, *top, tmp_table[16];
|
||||
__m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
|
||||
int i, j, k;
|
||||
uint32_t pp, v, *s32, *d32, *top, tmp_table[16];
|
||||
__m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
|
||||
__m128i tv1, tv2, tv3, tv0;
|
||||
uint8_t btable[16];
|
||||
gf_region_data rd;
|
||||
|
|
79
src/gf_w4.c
79
src/gf_w4.c
|
@ -421,7 +421,7 @@ gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
|
|||
{
|
||||
gf_region_data rd;
|
||||
uint8_t *base, *sptr, *dptr, *top;
|
||||
__m128i tl, loset, h4, r, va, th;
|
||||
__m128i tl, loset, r, va, th;
|
||||
|
||||
struct gf_single_table_data *std;
|
||||
|
||||
|
@ -924,8 +924,7 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
|
|||
int i;
|
||||
uint8_t *s8, *d8;
|
||||
uint8_t vrev;
|
||||
uint64_t amask;
|
||||
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
|
||||
__m128i pp, m1, ta, prod, t1, t2, tp, one, v;
|
||||
struct gf_bytwo_data *btd;
|
||||
gf_region_data rd;
|
||||
|
||||
|
@ -948,7 +947,6 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
|
|||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
one = _mm_set1_epi8(1);
|
||||
|
||||
while (d8 < (uint8_t *) rd.d_top) {
|
||||
|
@ -1039,16 +1037,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1065,16 +1061,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1093,16 +1087,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1120,16 +1112,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1150,16 +1140,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1178,16 +1166,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1206,16 +1192,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1235,16 +1219,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1264,16 +1246,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1294,16 +1274,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1324,16 +1302,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1353,16 +1329,14 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
||||
pp = _mm_set1_epi8(btd->prim_poly&0xff);
|
||||
m1 = _mm_set1_epi8((btd->mask1)&0xff);
|
||||
m2 = _mm_set1_epi8((btd->mask2)&0xff);
|
||||
|
||||
while (d8 < (uint8_t *) rd->d_top) {
|
||||
va = _mm_load_si128 ((__m128i *)(s8));
|
||||
|
@ -1977,7 +1951,6 @@ int gf_w4_shift_init(gf_t *gf)
|
|||
|
||||
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
|
||||
{
|
||||
int region_tbl_size;
|
||||
int issse3 = 0;
|
||||
|
||||
#ifdef INTEL_SSSE3
|
||||
|
|
27
src/gf_w64.c
27
src/gf_w64.c
|
@ -93,14 +93,13 @@ void
|
|||
gf_w64_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
|
||||
xor)
|
||||
{
|
||||
int i, size;
|
||||
gf_val_64_t *s64, *d64, *top;
|
||||
gf_region_data rd;
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result, r1;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
__m128i m1, m2, m3, m4;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
|
@ -121,7 +120,6 @@ xor)
|
|||
s64 = (gf_val_64_t *) rd.s_start;
|
||||
d64 = (gf_val_64_t *) rd.d_start;
|
||||
top = (gf_val_64_t *) rd.d_top;
|
||||
size = bytes/sizeof(gf_val_64_t);
|
||||
|
||||
if (xor) {
|
||||
while (d64 != top) {
|
||||
|
@ -184,7 +182,6 @@ void
|
|||
gf_w64_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
|
||||
xor)
|
||||
{
|
||||
int i, size;
|
||||
gf_val_64_t *s64, *d64, *top;
|
||||
gf_region_data rd;
|
||||
|
||||
|
@ -210,7 +207,6 @@ xor)
|
|||
s64 = (gf_val_64_t *) rd.s_start;
|
||||
d64 = (gf_val_64_t *) rd.d_start;
|
||||
top = (gf_val_64_t *) rd.d_top;
|
||||
size = bytes/sizeof(gf_val_64_t);
|
||||
|
||||
if (xor) {
|
||||
while (d64 != top) {
|
||||
|
@ -468,9 +464,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
|
|||
{
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
gf_internal_t *h;
|
||||
int i, j, k;
|
||||
uint8_t *s8, *d8, *dtop;
|
||||
uint64_t *s64, *d64;
|
||||
gf_region_data rd;
|
||||
__m128i v, b, m, prim_poly, c, fr, w, result;
|
||||
|
||||
|
@ -492,7 +486,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
|
|||
|
||||
if (xor) {
|
||||
while (d8 != dtop) {
|
||||
s64 = (uint64_t *) s8;
|
||||
b = _mm_load_si128((__m128i *) s8);
|
||||
result = _mm_clmulepi64_si128 (b, v, 0);
|
||||
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
|
||||
|
@ -521,7 +514,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
|
|||
}
|
||||
} else {
|
||||
while (d8 < dtop) {
|
||||
s64 = (uint64_t *) s8;
|
||||
b = _mm_load_si128((__m128i *) s8);
|
||||
result = _mm_clmulepi64_si128 (b, v, 0);
|
||||
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
|
||||
|
@ -1374,9 +1366,8 @@ static
|
|||
void
|
||||
gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
|
||||
{
|
||||
int i;
|
||||
uint64_t one64, amask;
|
||||
uint8_t *d8, *s8, tb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
gf_internal_t *h;
|
||||
|
||||
|
@ -1408,9 +1399,8 @@ static
|
|||
void
|
||||
gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
|
||||
{
|
||||
int i;
|
||||
uint64_t one64, amask;
|
||||
uint8_t *d8, *s8, tb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va;
|
||||
gf_internal_t *h;
|
||||
|
||||
|
@ -1443,7 +1433,6 @@ gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t
|
|||
uint64_t itb, amask, one64;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
struct gf_w32_bytwo_data *btd;
|
||||
gf_region_data rd;
|
||||
gf_internal_t *h;
|
||||
|
||||
|
@ -1718,8 +1707,8 @@ static
|
|||
gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int i, m, j, k, tindex;
|
||||
uint64_t pp, v, s, *s64, *d64, *top;
|
||||
int i, j, k;
|
||||
uint64_t pp, v, *s64, *d64, *top;
|
||||
__m128i si, tables[16][8], p[8], v0, mask1;
|
||||
struct gf_split_4_64_lazy_data *ld;
|
||||
uint8_t btable[16];
|
||||
|
@ -1802,9 +1791,9 @@ static
|
|||
gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int i, m, j, k, tindex;
|
||||
uint64_t pp, v, s, *s64, *d64, *top;
|
||||
__m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1, t2;
|
||||
int i, j, k;
|
||||
uint64_t pp, v, *s64, *d64, *top;
|
||||
__m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1;
|
||||
struct gf_split_4_64_lazy_data *ld;
|
||||
uint8_t btable[16];
|
||||
gf_region_data rd;
|
||||
|
|
26
src/gf_w8.c
26
src/gf_w8.c
|
@ -216,7 +216,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
|
||||
|
@ -262,7 +262,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
|
||||
|
@ -301,7 +301,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
|
||||
|
@ -377,7 +377,7 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
|
||||
|
@ -435,7 +435,7 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
|
||||
|
@ -497,7 +497,7 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
|
|||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i v, w;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
|
||||
|
@ -1058,9 +1058,8 @@ static
|
|||
void
|
||||
gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
|
||||
{
|
||||
uint8_t *s8, *d8, *bh, *bl, *sptr, *dptr, *top;
|
||||
__m128i tbl, loset, t1, r, va, mth, mtl;
|
||||
uint64_t altable[4];
|
||||
uint8_t *bh, *bl, *sptr, *dptr;
|
||||
__m128i loset, t1, r, va, mth, mtl;
|
||||
struct gf_w8_half_table_data *htd;
|
||||
gf_region_data rd;
|
||||
|
||||
|
@ -1676,7 +1675,6 @@ gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
|
|||
int i;
|
||||
uint8_t *s8, *d8;
|
||||
uint8_t vrev;
|
||||
uint64_t amask;
|
||||
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
|
||||
struct gf_w8_bytwo_data *btd;
|
||||
gf_region_data rd;
|
||||
|
@ -1729,9 +1727,8 @@ static
|
|||
void
|
||||
gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
d8 = (uint8_t *) rd->d_start;
|
||||
|
@ -1755,8 +1752,7 @@ static
|
|||
void
|
||||
gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
|
||||
{
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
uint8_t *d8, *s8;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
||||
s8 = (uint8_t *) rd->s_start;
|
||||
|
|
|
@ -70,18 +70,15 @@ int main(int argc, char **argv)
|
|||
{
|
||||
signal(SIGSEGV, SigHandler);
|
||||
|
||||
int w, i, verbose, single, region, tested, top;
|
||||
int w, i, verbose, single, region, top;
|
||||
int s_start, d_start, bytes, xor, alignment_test;
|
||||
gf_t gf, gf_def;
|
||||
time_t t0;
|
||||
gf_internal_t *h;
|
||||
gf_general_t *a, *b, *c, *d, *ai, *bi;
|
||||
uint8_t a8, b8, c8, *mult4, *div4, *mult8, *div8;
|
||||
uint16_t a16, b16, c16, d16, *log16, *alog16;
|
||||
char as[50], bs[50], cs[50], ds[50], ais[50], bis[50];
|
||||
uint32_t mask;
|
||||
gf_general_t *a, *b, *c, *d;
|
||||
uint8_t a8, b8, c8, *mult4 = NULL, *mult8 = NULL;
|
||||
uint16_t a16, b16, c16, *log16 = NULL, *alog16 = NULL;
|
||||
char as[50], bs[50], cs[50], ds[50];
|
||||
uint32_t mask = 0;
|
||||
char *ra, *rb, *rc, *rd, *target;
|
||||
int align;
|
||||
|
@ -118,8 +115,6 @@ int main(int argc, char **argv)
|
|||
b = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
c = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
d = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
ai = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
bi = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
|
||||
//15 bytes extra to make sure it's 16byte aligned
|
||||
ra = (char *) malloc(sizeof(char)*REGION_SIZE+15);
|
||||
|
@ -148,12 +143,10 @@ int main(int argc, char **argv)
|
|||
problem("No default for this value of w");
|
||||
if (w == 4) {
|
||||
mult4 = gf_w4_get_mult_table(&gf);
|
||||
div4 = gf_w4_get_div_table(&gf);
|
||||
}
|
||||
|
||||
if (w == 8) {
|
||||
mult8 = gf_w8_get_mult_table(&gf);
|
||||
div8 = gf_w8_get_div_table(&gf);
|
||||
}
|
||||
|
||||
if (w == 16) {
|
||||
|
@ -243,7 +236,6 @@ int main(int argc, char **argv)
|
|||
}
|
||||
}
|
||||
|
||||
tested = 0;
|
||||
gf_general_multiply(&gf, a, b, c);
|
||||
|
||||
/* If w is 4, 8 or 16, then there are inline multiplication/division methods.
|
||||
|
@ -288,7 +280,6 @@ int main(int argc, char **argv)
|
|||
/* If this is not composite, then first test against the default: */
|
||||
|
||||
if (h->mult_type != GF_MULT_COMPOSITE) {
|
||||
tested = 1;
|
||||
gf_general_multiply(&gf_def, a, b, d);
|
||||
|
||||
if (!gf_general_are_equal(c, d, w)) {
|
||||
|
@ -309,7 +300,6 @@ int main(int argc, char **argv)
|
|||
|
||||
if (gf_general_is_zero(a, w) || gf_general_is_zero(b, w) ||
|
||||
gf_general_is_one(a, w) || gf_general_is_one(b, w)) {
|
||||
tested = 1;
|
||||
if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) ||
|
||||
(gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) ||
|
||||
(gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) {
|
||||
|
|
|
@ -62,7 +62,7 @@ void print_128(uint64_t *v)
|
|||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int hex, al, bl, w;
|
||||
int hex, w;
|
||||
uint32_t a, b, c, top;
|
||||
uint64_t a64, b64, c64;
|
||||
uint64_t a128[2], b128[2], c128[2];
|
||||
|
|
|
@ -76,7 +76,7 @@ int main(int argc, char *argv[])
|
|||
int listing;
|
||||
char *gf_argv[50], *x;
|
||||
gf_t gf;
|
||||
char divs[200], ks[10], ls[10];
|
||||
char ls[10];
|
||||
char * w_str;
|
||||
|
||||
if (argc != 4) usage(NULL);
|
||||
|
|
|
@ -84,7 +84,6 @@ int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod)
|
|||
{
|
||||
gf_general_t *a, *b, zero, factor, p;
|
||||
int i, j, da, db;
|
||||
char buf[30];
|
||||
|
||||
gf_general_set_zero(&zero, w);
|
||||
|
||||
|
@ -123,7 +122,6 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
|
|||
gf_general_t *product;
|
||||
gf_general_t p, zero, factor;
|
||||
int j, k, lq;
|
||||
char buf[20];
|
||||
|
||||
gf_general_set_zero(&zero, w);
|
||||
product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2);
|
||||
|
@ -183,7 +181,7 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
|
|||
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int w, i, power, n, ap, success, j;
|
||||
int w, i, power, n, ap, success;
|
||||
gf_t gf;
|
||||
gf_general_t *poly, *prod;
|
||||
char *string, *ptr;
|
||||
|
|
Loading…
Reference in New Issue