remove unused variables from #if SSE blocs

Signed-off-by: Loic Dachary <loic@dachary.org>
master
Loic Dachary 2014-03-06 15:11:24 +01:00
parent 59cb51046b
commit 191b86b5d2
10 changed files with 76 additions and 141 deletions

View File

@ -90,7 +90,6 @@ int xor)
int i;
gf_val_128_t s128;
gf_val_128_t d128;
uint64_t c128[2];
gf_region_data rd;
__m128i a,b;
__m128i result0,result1;
@ -106,8 +105,6 @@ int xor)
if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; }
}
set_zero(c128, 0);
s128 = (gf_val_128_t) src;
d128 = (gf_val_128_t) dest;
@ -384,7 +381,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
{
#if defined(INTEL_SSE4)
int i;
__m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
__m128i a, b, pp, prod, amask, u_middle_one;
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
uint32_t topbit, middlebit, pmask; /* this is used as a boolean value */
gf_internal_t *h;
@ -400,7 +397,6 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
pmask = 0x80000000;
amask = _mm_insert_epi32(prod, 0x80000000, 0x3);
u_middle_one = _mm_insert_epi32(prod, 1, 0x2);
l_middle_one = _mm_insert_epi32(prod, 1 << 31, 0x1);
for (i = 0; i < 64; i++) {
topbit = (_mm_extract_epi32(prod, 0x3) & pmask);
@ -605,7 +601,7 @@ void
gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
gf_internal_t *h;
int i, m, j, k, tindex;
int i, j, k;
uint64_t pp, v[2], s, *s64, *d64, *top;
__m128i p, tables[32][16];
struct gf_w128_split_4_128_data *ld;
@ -704,8 +700,8 @@ void
gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
gf_internal_t *h;
int i, m, j, k, tindex;
uint64_t pp, v[2], s, *s64, *d64, *top;
int i, j, k;
uint64_t pp, v[2], *s64, *d64, *top;
__m128i si, tables[32][16], p[16], v0, mask1;
struct gf_w128_split_4_128_data *ld;
uint8_t btable[16];

View File

@ -450,7 +450,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@ -497,7 +497,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@ -537,7 +537,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@ -1001,12 +1001,12 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v
{
#ifdef INTEL_SSSE3
uint64_t i, j, *s64, *d64, *top64;;
uint64_t a, c, prod;
uint64_t c, prod;
uint8_t low[4][16];
uint8_t high[4][16];
gf_region_data rd;
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, shuffler, unshuffler, lmask;
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, lmask;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
@ -1138,7 +1138,6 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
uint8_t low[4][16];
uint8_t high[4][16];
gf_region_data rd;
struct gf_single_table_data *std;
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4];
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
@ -1553,7 +1552,6 @@ gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
int i;
uint8_t *s8, *d8;
uint32_t vrev;
uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w16_bytwo_data *btd;
gf_region_data rd;
@ -1614,9 +1612,8 @@ static
void
gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@ -1640,8 +1637,7 @@ static
void
gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;

View File

@ -414,7 +414,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
@ -458,7 +458,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
@ -497,7 +497,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
@ -991,7 +991,6 @@ gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
int i;
uint8_t *s8, *d8;
uint32_t vrev;
uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w32_bytwo_data *btd;
gf_region_data rd;
@ -1182,9 +1181,8 @@ static
void
gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@ -1208,8 +1206,7 @@ static
void
gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@ -1558,8 +1555,8 @@ void
gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
gf_internal_t *h;
int i, m, j, tindex;
uint32_t pp, v, v2, s, *s32, *d32, *top;
int i, tindex;
uint32_t pp, v, v2, *s32, *d32, *top;
__m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2;
gf_region_data rd;
@ -1695,8 +1692,8 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
{
#ifdef INTEL_SSSE3
gf_internal_t *h;
int i, m, j, k, tindex;
uint32_t pp, v, s, *s32, *d32, *top, *realtop;
int i, j, k;
uint32_t pp, v, *s32, *d32, *top;
__m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3;
struct gf_split_4_32_lazy_data *ld;
uint8_t btable[16];
@ -1887,9 +1884,9 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
{
#ifdef INTEL_SSSE3
gf_internal_t *h;
int i, m, j, k, tindex;
uint32_t pp, v, s, *s32, *d32, *top, tmp_table[16];
__m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
int i, j, k;
uint32_t pp, v, *s32, *d32, *top, tmp_table[16];
__m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
__m128i tv1, tv2, tv3, tv0;
uint8_t btable[16];
gf_region_data rd;

View File

@ -421,7 +421,7 @@ gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
{
gf_region_data rd;
uint8_t *base, *sptr, *dptr, *top;
__m128i tl, loset, h4, r, va, th;
__m128i tl, loset, r, va, th;
struct gf_single_table_data *std;
@ -924,8 +924,7 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
int i;
uint8_t *s8, *d8;
uint8_t vrev;
uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
__m128i pp, m1, ta, prod, t1, t2, tp, one, v;
struct gf_bytwo_data *btd;
gf_region_data rd;
@ -948,7 +947,6 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
one = _mm_set1_epi8(1);
while (d8 < (uint8_t *) rd.d_top) {
@ -1039,16 +1037,14 @@ static
void
gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1065,16 +1061,14 @@ static
void
gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1093,16 +1087,14 @@ static
void
gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1120,16 +1112,14 @@ static
void
gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1150,16 +1140,14 @@ static
void
gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1178,16 +1166,14 @@ static
void
gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1206,16 +1192,14 @@ static
void
gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1235,16 +1219,14 @@ static
void
gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1264,16 +1246,14 @@ static
void
gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1294,16 +1274,14 @@ static
void
gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1324,16 +1302,14 @@ static
void
gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1353,16 +1329,14 @@ static
void
gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
@ -1977,7 +1951,6 @@ int gf_w4_shift_init(gf_t *gf)
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
int region_tbl_size;
int issse3 = 0;
#ifdef INTEL_SSSE3

View File

@ -93,14 +93,13 @@ void
gf_w64_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
xor)
{
int i, size;
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
__m128i v, w;
__m128i w;
__m128i m1, m2, m3, m4;
gf_internal_t * h = gf->scratch;
@ -121,7 +120,6 @@ xor)
s64 = (gf_val_64_t *) rd.s_start;
d64 = (gf_val_64_t *) rd.d_start;
top = (gf_val_64_t *) rd.d_top;
size = bytes/sizeof(gf_val_64_t);
if (xor) {
while (d64 != top) {
@ -184,7 +182,6 @@ void
gf_w64_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
xor)
{
int i, size;
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
@ -210,7 +207,6 @@ xor)
s64 = (gf_val_64_t *) rd.s_start;
d64 = (gf_val_64_t *) rd.d_start;
top = (gf_val_64_t *) rd.d_top;
size = bytes/sizeof(gf_val_64_t);
if (xor) {
while (d64 != top) {
@ -468,9 +464,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
{
#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
int i, j, k;
uint8_t *s8, *d8, *dtop;
uint64_t *s64, *d64;
gf_region_data rd;
__m128i v, b, m, prim_poly, c, fr, w, result;
@ -492,7 +486,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
if (xor) {
while (d8 != dtop) {
s64 = (uint64_t *) s8;
b = _mm_load_si128((__m128i *) s8);
result = _mm_clmulepi64_si128 (b, v, 0);
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
@ -521,7 +514,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
}
} else {
while (d8 < dtop) {
s64 = (uint64_t *) s8;
b = _mm_load_si128((__m128i *) s8);
result = _mm_clmulepi64_si128 (b, v, 0);
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
@ -1374,9 +1366,8 @@ static
void
gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
{
int i;
uint64_t one64, amask;
uint8_t *d8, *s8, tb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
gf_internal_t *h;
@ -1408,9 +1399,8 @@ static
void
gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
{
int i;
uint64_t one64, amask;
uint8_t *d8, *s8, tb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va;
gf_internal_t *h;
@ -1443,7 +1433,6 @@ gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t
uint64_t itb, amask, one64;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
struct gf_w32_bytwo_data *btd;
gf_region_data rd;
gf_internal_t *h;
@ -1718,8 +1707,8 @@ static
gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
gf_internal_t *h;
int i, m, j, k, tindex;
uint64_t pp, v, s, *s64, *d64, *top;
int i, j, k;
uint64_t pp, v, *s64, *d64, *top;
__m128i si, tables[16][8], p[8], v0, mask1;
struct gf_split_4_64_lazy_data *ld;
uint8_t btable[16];
@ -1802,9 +1791,9 @@ static
gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
gf_internal_t *h;
int i, m, j, k, tindex;
uint64_t pp, v, s, *s64, *d64, *top;
__m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1, t2;
int i, j, k;
uint64_t pp, v, *s64, *d64, *top;
__m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1;
struct gf_split_4_64_lazy_data *ld;
uint8_t btable[16];
gf_region_data rd;

View File

@ -216,7 +216,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@ -262,7 +262,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@ -301,7 +301,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@ -377,7 +377,7 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@ -435,7 +435,7 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@ -497,7 +497,7 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i v, w;
__m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@ -1058,9 +1058,8 @@ static
void
gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
uint8_t *s8, *d8, *bh, *bl, *sptr, *dptr, *top;
__m128i tbl, loset, t1, r, va, mth, mtl;
uint64_t altable[4];
uint8_t *bh, *bl, *sptr, *dptr;
__m128i loset, t1, r, va, mth, mtl;
struct gf_w8_half_table_data *htd;
gf_region_data rd;
@ -1676,7 +1675,6 @@ gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
int i;
uint8_t *s8, *d8;
uint8_t vrev;
uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w8_bytwo_data *btd;
gf_region_data rd;
@ -1729,9 +1727,8 @@ static
void
gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@ -1755,8 +1752,7 @@ static
void
gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
{
int i;
uint8_t *d8, *s8, tb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;

View File

@ -70,18 +70,15 @@ int main(int argc, char **argv)
{
signal(SIGSEGV, SigHandler);
int w, i, verbose, single, region, tested, top;
int w, i, verbose, single, region, top;
int s_start, d_start, bytes, xor, alignment_test;
gf_t gf, gf_def;
time_t t0;
gf_internal_t *h;
gf_general_t *a, *b, *c, *d, *ai, *bi;
uint8_t a8, b8, c8, *mult4, *div4, *mult8, *div8;
uint16_t a16, b16, c16, d16, *log16, *alog16;
char as[50], bs[50], cs[50], ds[50], ais[50], bis[50];
uint32_t mask;
gf_general_t *a, *b, *c, *d;
uint8_t a8, b8, c8, *mult4 = NULL, *mult8 = NULL;
uint16_t a16, b16, c16, *log16 = NULL, *alog16 = NULL;
char as[50], bs[50], cs[50], ds[50];
uint32_t mask = 0;
char *ra, *rb, *rc, *rd, *target;
int align;
@ -118,8 +115,6 @@ int main(int argc, char **argv)
b = (gf_general_t *) malloc(sizeof(gf_general_t));
c = (gf_general_t *) malloc(sizeof(gf_general_t));
d = (gf_general_t *) malloc(sizeof(gf_general_t));
ai = (gf_general_t *) malloc(sizeof(gf_general_t));
bi = (gf_general_t *) malloc(sizeof(gf_general_t));
//15 bytes extra to make sure it's 16byte aligned
ra = (char *) malloc(sizeof(char)*REGION_SIZE+15);
@ -148,12 +143,10 @@ int main(int argc, char **argv)
problem("No default for this value of w");
if (w == 4) {
mult4 = gf_w4_get_mult_table(&gf);
div4 = gf_w4_get_div_table(&gf);
}
if (w == 8) {
mult8 = gf_w8_get_mult_table(&gf);
div8 = gf_w8_get_div_table(&gf);
}
if (w == 16) {
@ -243,7 +236,6 @@ int main(int argc, char **argv)
}
}
tested = 0;
gf_general_multiply(&gf, a, b, c);
/* If w is 4, 8 or 16, then there are inline multiplication/division methods.
@ -288,7 +280,6 @@ int main(int argc, char **argv)
/* If this is not composite, then first test against the default: */
if (h->mult_type != GF_MULT_COMPOSITE) {
tested = 1;
gf_general_multiply(&gf_def, a, b, d);
if (!gf_general_are_equal(c, d, w)) {
@ -309,7 +300,6 @@ int main(int argc, char **argv)
if (gf_general_is_zero(a, w) || gf_general_is_zero(b, w) ||
gf_general_is_one(a, w) || gf_general_is_one(b, w)) {
tested = 1;
if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) ||
(gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) ||
(gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) {

View File

@ -62,7 +62,7 @@ void print_128(uint64_t *v)
int main(int argc, char **argv)
{
int hex, al, bl, w;
int hex, w;
uint32_t a, b, c, top;
uint64_t a64, b64, c64;
uint64_t a128[2], b128[2], c128[2];

View File

@ -76,7 +76,7 @@ int main(int argc, char *argv[])
int listing;
char *gf_argv[50], *x;
gf_t gf;
char divs[200], ks[10], ls[10];
char ls[10];
char * w_str;
if (argc != 4) usage(NULL);

View File

@ -84,7 +84,6 @@ int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod)
{
gf_general_t *a, *b, zero, factor, p;
int i, j, da, db;
char buf[30];
gf_general_set_zero(&zero, w);
@ -123,7 +122,6 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
gf_general_t *product;
gf_general_t p, zero, factor;
int j, k, lq;
char buf[20];
gf_general_set_zero(&zero, w);
product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2);
@ -183,7 +181,7 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
main(int argc, char **argv)
{
int w, i, power, n, ap, success, j;
int w, i, power, n, ap, success;
gf_t gf;
gf_general_t *poly, *prod;
char *string, *ptr;