Simplify SIMD make scripts

ax_ext.m4 no longer performs any CPU checks. Instead it just checks
if the the compile supports SIMD flags.

Runtime detection will choose the right methods base on CPU
instructions available.

Intel AVX support is still done through the build since it would
require a major refactoring of the code base to support it at runtime.
For now I added a configuration flag --enable-avx that can be used
to compile with AVX support.

Also use cpu intrinsics instead of __asm__
master
Bassam Tabbara 2016-09-06 23:48:39 -07:00
parent 4339569f14
commit ad11042132
9 changed files with 79 additions and 434 deletions

View File

@ -66,5 +66,14 @@ AC_ARG_ENABLE([valgrind],
[enable_valgrind=no])
AM_CONDITIONAL(ENABLE_VALGRIND, test "x$enable_valgrind" != xno)
AC_ARG_ENABLE([avx], AS_HELP_STRING([--enable-avx], [Build with AVX optimizations]))
AX_CHECK_COMPILE_FLAG(-mavx, [ax_cv_support_avx=yes], [])
AS_IF([test "x$enable_avx" = "xyes"],
[AS_IF([test "x$ax_cv_support_avx" = "xno"],
[AC_MSG_ERROR([AVX requested but compiler does not support -mavx])],
[SIMD_FLAGS="$SIMD_FLAGS -mavx"])
])
AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile])
AC_OUTPUT

View File

@ -1,40 +1,7 @@
#
# Updated by KMG to support -DINTEL_SSE for GF-Complete
# This macro is based on http://www.gnu.org/software/autoconf-archive/ax_ext.html
# but simplified to do compile time SIMD checks only
#
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_EXT
#
# DESCRIPTION
#
# Find supported SIMD extensions by requesting cpuid. When an SIMD
# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
# compiler supports it. For example, if "sse2" is available, then "-msse2"
# is added to SIMD_FLAGS.
#
# This macro calls:
#
# AC_SUBST(SIMD_FLAGS)
#
# And defines:
#
# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
#
# LICENSE
#
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 12
AC_DEFUN([AX_EXT],
[
@ -45,263 +12,63 @@ AC_DEFUN([AX_EXT],
AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
[
# TODO: detect / cross-compile
ax_cv_have_neon_ext=yes
])
AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
[
# TODO: detect / cross-compile
ax_cv_have_arm_crypt_ext=yes
])
if test "$ax_cv_have_arm_crypt_ext" = yes; then
AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
fi
AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, [SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
elif test "$ax_cv_have_arm_crypt_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
elif test "$ax_cv_have_neon_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
fi
;;
;;
arm*)
AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
[
# TODO: detect / cross-compile
ax_cv_have_neon_ext=yes
])
AC_CACHE_CHECK([whether NEON is enabled], [ax_cv_have_neon_ext], [ax_cv_have_neon_ext=yes])
if test "$ax_cv_have_neon_ext" = yes; then
AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
AX_CHECK_COMPILE_FLAG(-mfpu=neon,
SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
AX_CHECK_COMPILE_FLAG(-mfpu=neon, [SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON"], [ax_cv_have_neon_ext=no])
fi
;;
;;
powerpc*)
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
[
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
ax_cv_have_altivec_ext=yes
fi
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
fi
;;
AC_CACHE_CHECK([whether altivec is enabled], [ax_cv_have_altivec_ext], [ax_cv_have_altivec_ext=yes])
if test "$ax_cv_have_altivec_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [ax_cv_have_altivec_ext=no])
fi
;;
i[[3456]]86*|x86_64*|amd64*)
AC_REQUIRE([AX_GCC_X86_CPUID])
AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
AX_GCC_X86_CPUID(0x00000001)
ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
[
ax_cv_have_mmx_ext=no
if test "$((0x$edx>>23&0x01))" = 1; then
ax_cv_have_mmx_ext=yes
fi
])
AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
[
ax_cv_have_sse_ext=no
if test "$((0x$edx>>25&0x01))" = 1; then
ax_cv_have_sse_ext=yes
fi
])
AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
[
ax_cv_have_sse2_ext=no
if test "$((0x$edx>>26&0x01))" = 1; then
ax_cv_have_sse2_ext=yes
fi
])
AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
[
ax_cv_have_sse3_ext=no
if test "$((0x$ecx&0x01))" = 1; then
ax_cv_have_sse3_ext=yes
fi
])
AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],
[
ax_cv_have_pclmuldq_ext=no
if test "$((0x$ecx>>1&0x01))" = 1; then
ax_cv_have_pclmuldq_ext=yes
fi
])
AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
[
ax_cv_have_ssse3_ext=no
if test "$((0x$ecx>>9&0x01))" = 1; then
ax_cv_have_ssse3_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
[
ax_cv_have_sse41_ext=no
if test "$((0x$ecx>>19&0x01))" = 1; then
ax_cv_have_sse41_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
[
ax_cv_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
ax_cv_have_sse42_ext=yes
fi
])
AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
[
ax_cv_have_avx_cpu_ext=no
if test "$((0x$ecx>>28&0x01))" = 1; then
ax_cv_have_avx_cpu_ext=yes
fi
])
if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
AX_GCC_X86_AVX_XGETBV(0x00000000)
xgetbv_eax="0"
if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
fi
AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
[
ax_cv_have_avx_ext=no
if test "$((0x$ecx>>27&0x01))" = 1; then
if test "$((0x$xgetbv_eax&0x6))" = 6; then
ax_cv_have_avx_ext=yes
fi
fi
])
if test x"$ax_cv_have_avx_ext" = x"no"; then
AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
fi
fi
if test "$ax_cv_have_mmx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
if test x"$ax_cv_support_mmx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mmmx"
AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
else
AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
fi
fi
AC_CACHE_CHECK([whether sse is enabled], [ax_cv_have_sse_ext], [ax_cv_have_sse_ext=yes])
if test "$ax_cv_have_sse_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
if test x"$ax_cv_support_sse_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"
AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
else
AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
fi
AX_CHECK_COMPILE_FLAG(-msse, [SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"], [ax_cv_have_sse_ext=no])
fi
AC_CACHE_CHECK([whether sse2 is enabled], [ax_cv_have_sse2_ext], [ax_cv_have_sse2_ext=yes])
if test "$ax_cv_have_sse2_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
if test x"$ax_cv_support_sse2_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"
AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
else
AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
fi
AX_CHECK_COMPILE_FLAG(-msse2, [SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"], [ax_cv_have_sse2_ext=no])
fi
AC_CACHE_CHECK([whether sse3 is enabled], [ax_cv_have_sse3_ext], [ax_cv_have_sse3_ext=yes])
if test "$ax_cv_have_sse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"
AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_pclmuldq_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])
if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"
AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])
else
AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])
fi
AX_CHECK_COMPILE_FLAG(-msse3, [SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"], [ax_cv_have_sse3_ext=no])
fi
AC_CACHE_CHECK([whether ssse3 is enabled], [ax_cv_have_ssse3_ext], [ax_cv_have_ssse3_ext=yes])
if test "$ax_cv_have_ssse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"
AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
fi
AX_CHECK_COMPILE_FLAG(-mssse3, [SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"], [ax_cv_have_ssse3_ext=no])
fi
AC_CACHE_CHECK([whether pclmuldq is enabled], [ax_cv_have_pclmuldq_ext], [ax_cv_have_pclmuldq_ext=yes])
if test "$ax_cv_have_pclmuldq_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mpclmul, [SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"], [ax_cv_have_pclmuldq_ext=no])
fi
AC_CACHE_CHECK([whether sse4.1 is enabled], [ax_cv_have_sse41_ext], [ax_cv_have_sse41_ext=yes])
if test "$ax_cv_have_sse41_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
if test x"$ax_cv_support_sse41_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"
AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
else
AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
fi
AX_CHECK_COMPILE_FLAG(-msse4.1, [SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"], [ax_cv_have_sse41_ext=no])
fi
AC_CACHE_CHECK([whether sse4.2 is enabled], [ax_cv_have_sse42_ext], [ax_cv_have_sse42_ext=yes])
if test "$ax_cv_have_sse42_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
if test x"$ax_cv_support_sse42_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"
AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
else
AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
fi
AX_CHECK_COMPILE_FLAG(-msse4.2, [SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"], [ax_cv_have_sse42_ext=no])
fi
if test "$ax_cv_have_avx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
if test x"$ax_cv_support_avx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mavx"
AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
else
AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
fi
fi
;;
;;
esac
AC_SUBST(SIMD_FLAGS)

View File

@ -1,79 +0,0 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_AVX_XGETBV
#
# DESCRIPTION
#
# On later x86 processors with AVX SIMD support, with gcc or a compiler
# that has a compatible syntax for inline assembly instructions, run a
# small program that executes the xgetbv instruction with input OP. This
# can be used to detect if the OS supports AVX instruction usage.
#
# On output, the values of the eax and edx registers are stored as
# hexadecimal strings as "eax:edx" in the cache variable
# ax_cv_gcc_x86_avx_xgetbv.
#
# If the xgetbv instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction),
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
#
# This macro mainly exists to be used in AX_EXT.
#
# LICENSE
#
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 1
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, edx;
FILE *f;
/* Opcodes for xgetbv */
__asm__(".byte 0x0f, 0x01, 0xd0"
: "=a" (eax), "=d" (edx)
: "c" (op));
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
fprintf(f, "%x:%x\n", eax, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
AC_LANG_POP([C])
])

View File

@ -1,79 +0,0 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_CPUID(OP)
#
# DESCRIPTION
#
# On Pentium and later x86 processors, with gcc or a compiler that has a
# compatible syntax for inline assembly instructions, run a small program
# that executes the cpuid instruction with input OP. This can be used to
# detect the CPU type.
#
# On output, the values of the eax, ebx, ecx, and edx registers are stored
# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
# ax_cv_gcc_x86_cpuid_OP.
#
# If the cpuid instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
# is set to the string "unknown".
#
# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
# Copyright (c) 2008 Matteo Frigo
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 7
AC_DEFUN([AX_GCC_X86_CPUID],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, ebx, ecx, edx;
FILE *f;
__asm__("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (op));
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown])])
AC_LANG_POP([C])
])

View File

@ -22,20 +22,35 @@ int gf_cpu_supports_arm_neon = 0;
#if defined(__x86_64__)
#if defined(_MSC_VER)
#define cpuid(info, x) __cpuidex(info, x, 0)
#elif defined(__GNUC__)
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}
#else
#error please add a way to detect CPU SIMD support at runtime
#endif
void gf_cpu_identify(void)
{
if (gf_cpu_identified) {
return;
}
int op = 1, eax, ebx, ecx, edx;
int reg[4];
__asm__("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (op));
cpuid(reg, 1);
#if defined(INTEL_SSE4_PCLMUL)
if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
gf_cpu_supports_intel_pclmul = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_pclmul\n");
@ -44,7 +59,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE4)
if (((ecx & (1<<20)) != 0 || (ecx & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
gf_cpu_supports_intel_sse4 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse4\n");
@ -53,7 +68,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSSE3)
if ((ecx & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
gf_cpu_supports_intel_ssse3 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_ssse3\n");
@ -62,7 +77,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE3)
if ((ecx & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
gf_cpu_supports_intel_sse3 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse3\n");
@ -71,7 +86,7 @@ void gf_cpu_identify(void)
#endif
#if defined(INTEL_SSE2)
if ((edx & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
gf_cpu_supports_intel_sse2 = 1;
#ifdef DEBUG_CPU_DETECTION
printf("#gf_cpu_supports_intel_sse2\n");

View File

@ -1,7 +1,7 @@
# GF-Complete 'test' AM file
AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
AM_CFLAGS = -O3 -fPIC
bin_PROGRAMS = gf_unit

View File

@ -1,7 +1,7 @@
# GF-Complete 'tools' AM file
AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC
AM_CFLAGS = -O3 -fPIC
bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time

View File

@ -27,6 +27,16 @@ test_functions() {
return ${failed}
}
# build with DEBUG_CPU_FUNCTIONS and print out CPU detection
test_detection() {
failed=0
{ ./configure && make clean && make CFLAGS="-DDEBUG_CPU_DETECTION"; } || { echo "Compile FAILED" >> ${results}; return 1; }
{ ${script_dir}/gf_methods 32 -ACD -L | grep '#' >> ${results}; } || { echo "gf_methods $i FAILED" >> ${results}; ((++failed)); }
return ${failed}
}
compile_arm() {
failed=0
@ -167,7 +177,7 @@ runtime_intel_flags() {
{ ${script_dir}/gf_methods $i -ACD -X >> ${1}; } || { echo "gf_methods $i FAILED" >> ${1}; ((++failed)); }
done
echo "====SSE2 support..." >> ${1}
echo "====SSE2 support..." >> ${1}
export ax_cv_have_sse_ext=no
export ax_cv_have_sse2_ext=yes
export ax_cv_have_sse3_ext=no

View File

@ -224,6 +224,8 @@ run_test_simd_basic() {
{ run_test $arch $cpu "unit" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running functions test"
{ run_test $arch $cpu "functions" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running detection test"
{ run_test $arch $cpu "detection" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
echo "=====running runtime test"
{ run_test $arch $cpu "runtime" && echo "SUCCESS"; } || { echo "FAILED"; ((++failed)); }
stop_qemu