comment unused cpu feature test code, align 512bits for AVX-512
This commit is contained in:
parent
89e8560cd2
commit
553d660d39
3 changed files with 92 additions and 92 deletions
|
@ -145,8 +145,8 @@ int UTF8GetPrev(const std::string &text, int curpos);
|
|||
/*----------------------------------------------------------------------------
|
||||
-- SIMD support
|
||||
----------------------------------------------------------------------------*/
|
||||
bool supportsSSE2();
|
||||
bool supportsAVX();
|
||||
// bool supportsSSE2();
|
||||
// bool supportsAVX();
|
||||
void *aligned_malloc(size_t alignment, size_t size);
|
||||
void aligned_free(void *block);
|
||||
|
||||
|
|
|
@ -260,19 +260,19 @@ void InitAStar(int mapWidth, int mapHeight)
|
|||
AStarMapMax = AStarMapWidth * AStarMapHeight;
|
||||
|
||||
// align the matrix, the open set, and the cost to move cache
|
||||
// on 32-byte boundary, in case the memset/memmove operations
|
||||
// on 64-byte boundary, in case the memset/memmove operations
|
||||
// of the libc we're using has a 128/256/512bit SIMD vector
|
||||
// instruction branch, since we might be clearing 8M of
|
||||
// memory for a 2048x2048 map
|
||||
AStarMatrixSize = sizeof(Node) * AStarMapMax;
|
||||
AStarMatrix = (Node *)aligned_malloc(32, AStarMatrixSize);
|
||||
AStarMatrix = (Node *)aligned_malloc(64, AStarMatrixSize);
|
||||
memset(AStarMatrix, 0, AStarMatrixSize);
|
||||
|
||||
OpenSetMaxSize = AStarMapMax / MAX_OPEN_SET_RATIO;
|
||||
OpenSet = (Open *)aligned_malloc(32, OpenSetMaxSize * sizeof(Open));
|
||||
OpenSet = (Open *)aligned_malloc(64, OpenSetMaxSize * sizeof(Open));
|
||||
|
||||
CostMoveToCacheSize = sizeof(int32_t) * AStarMapMax;
|
||||
CostMoveToCache = (int32_t*)aligned_malloc(32, CostMoveToCacheSize);
|
||||
CostMoveToCache = (int32_t*)aligned_malloc(64, CostMoveToCacheSize);
|
||||
memset(CostMoveToCache, CacheNotSet, CostMoveToCacheSize);
|
||||
|
||||
for (int i = 0; i < 9; ++i) {
|
||||
|
|
|
@ -503,110 +503,110 @@ void PrintOnStdOut(const char *format, ...)
|
|||
fflush(stdout);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
Check SSE/AVX support.
|
||||
This can detect the instruction support of
|
||||
SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX.
|
||||
----------------------------------------------------------------------------*/
|
||||
// /*----------------------------------------------------------------------------
|
||||
// Check SSE/AVX support.
|
||||
// This can detect the instruction support of
|
||||
// SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX.
|
||||
// ----------------------------------------------------------------------------*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
// #ifdef __GNUC__
|
||||
|
||||
static void __cpuid(int* cpuinfo, int info)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"xchg %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchg %%ebx, %%edi;"
|
||||
:"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3])
|
||||
:"0" (info)
|
||||
);
|
||||
}
|
||||
// static void __cpuid(int* cpuinfo, int info)
|
||||
// {
|
||||
// __asm__ __volatile__(
|
||||
// "xchg %%ebx, %%edi;"
|
||||
// "cpuid;"
|
||||
// "xchg %%ebx, %%edi;"
|
||||
// :"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3])
|
||||
// :"0" (info)
|
||||
// );
|
||||
// }
|
||||
|
||||
static unsigned long long _xgetbv(unsigned int index)
|
||||
{
|
||||
unsigned int eax, edx;
|
||||
__asm__ __volatile__(
|
||||
"xgetbv;"
|
||||
: "=a" (eax), "=d"(edx)
|
||||
: "c" (index)
|
||||
);
|
||||
return ((unsigned long long)edx << 32) | eax;
|
||||
}
|
||||
// static unsigned long long _xgetbv(unsigned int index)
|
||||
// {
|
||||
// unsigned int eax, edx;
|
||||
// __asm__ __volatile__(
|
||||
// "xgetbv;"
|
||||
// : "=a" (eax), "=d"(edx)
|
||||
// : "c" (index)
|
||||
// );
|
||||
// return ((unsigned long long)edx << 32) | eax;
|
||||
// }
|
||||
|
||||
#endif
|
||||
// #endif
|
||||
|
||||
struct SIMDSupport {
|
||||
bool sseSupportted = false;
|
||||
bool sse2Supportted = false;
|
||||
bool sse3Supportted = false;
|
||||
bool ssse3Supportted = false;
|
||||
bool sse4_1Supportted = false;
|
||||
bool sse4_2Supportted = false;
|
||||
bool sse4aSupportted = false;
|
||||
bool sse5Supportted = false;
|
||||
bool avxSupportted = false;
|
||||
};
|
||||
// struct SIMDSupport {
|
||||
// bool sseSupportted = false;
|
||||
// bool sse2Supportted = false;
|
||||
// bool sse3Supportted = false;
|
||||
// bool ssse3Supportted = false;
|
||||
// bool sse4_1Supportted = false;
|
||||
// bool sse4_2Supportted = false;
|
||||
// bool sse4aSupportted = false;
|
||||
// bool sse5Supportted = false;
|
||||
// bool avxSupportted = false;
|
||||
// };
|
||||
|
||||
static struct SIMDSupport checkSIMDSupport() {
|
||||
struct SIMDSupport s;
|
||||
// static struct SIMDSupport checkSIMDSupport() {
|
||||
// struct SIMDSupport s;
|
||||
|
||||
int cpuinfo[4];
|
||||
__cpuid(cpuinfo, 1);
|
||||
// int cpuinfo[4];
|
||||
// __cpuid(cpuinfo, 1);
|
||||
|
||||
// Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support
|
||||
s.sseSupportted = cpuinfo[3] & (1 << 25) || false;
|
||||
s.sse2Supportted = cpuinfo[3] & (1 << 26) || false;
|
||||
s.sse3Supportted = cpuinfo[2] & (1 << 0) || false;
|
||||
s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false;
|
||||
s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false;
|
||||
s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false;
|
||||
// // Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support
|
||||
// s.sseSupportted = cpuinfo[3] & (1 << 25) || false;
|
||||
// s.sse2Supportted = cpuinfo[3] & (1 << 26) || false;
|
||||
// s.sse3Supportted = cpuinfo[2] & (1 << 0) || false;
|
||||
// s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false;
|
||||
// s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false;
|
||||
// s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// // ----------------------------------------------------------------------
|
||||
|
||||
// Check AVX support
|
||||
// References
|
||||
// http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/
|
||||
// http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/
|
||||
// // Check AVX support
|
||||
// // References
|
||||
// // http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/
|
||||
// // http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/
|
||||
|
||||
s.avxSupportted = cpuinfo[2] & (1 << 28) || false;
|
||||
bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false;
|
||||
if (osxsaveSupported && s.avxSupportted)
|
||||
{
|
||||
// _XCR_XFEATURE_ENABLED_MASK = 0
|
||||
unsigned long long xcrFeatureMask = _xgetbv(0);
|
||||
s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6;
|
||||
}
|
||||
// s.avxSupportted = cpuinfo[2] & (1 << 28) || false;
|
||||
// bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false;
|
||||
// if (osxsaveSupported && s.avxSupportted)
|
||||
// {
|
||||
// // _XCR_XFEATURE_ENABLED_MASK = 0
|
||||
// unsigned long long xcrFeatureMask = _xgetbv(0);
|
||||
// s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6;
|
||||
// }
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// // ----------------------------------------------------------------------
|
||||
|
||||
// Check SSE4a and SSE5 support
|
||||
// // Check SSE4a and SSE5 support
|
||||
|
||||
// Get the number of valid extended IDs
|
||||
__cpuid(cpuinfo, 0x80000000);
|
||||
int numExtendedIds = cpuinfo[0];
|
||||
if (numExtendedIds >= 0x80000001)
|
||||
{
|
||||
__cpuid(cpuinfo, 0x80000001);
|
||||
s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false;
|
||||
s.sse5Supportted = cpuinfo[2] & (1 << 11) || false;
|
||||
}
|
||||
// // Get the number of valid extended IDs
|
||||
// __cpuid(cpuinfo, 0x80000000);
|
||||
// int numExtendedIds = cpuinfo[0];
|
||||
// if (numExtendedIds >= 0x80000001)
|
||||
// {
|
||||
// __cpuid(cpuinfo, 0x80000001);
|
||||
// s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false;
|
||||
// s.sse5Supportted = cpuinfo[2] & (1 << 11) || false;
|
||||
// }
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// // ----------------------------------------------------------------------
|
||||
|
||||
return s;
|
||||
}
|
||||
// return s;
|
||||
// }
|
||||
|
||||
bool supportsSSE2()
|
||||
{
|
||||
static struct SIMDSupport s = checkSIMDSupport();
|
||||
return s.sse2Supportted;
|
||||
}
|
||||
// bool supportsSSE2()
|
||||
// {
|
||||
// static struct SIMDSupport s = checkSIMDSupport();
|
||||
// return s.sse2Supportted;
|
||||
// }
|
||||
|
||||
bool supportsAVX()
|
||||
{
|
||||
static struct SIMDSupport s = checkSIMDSupport();
|
||||
return s.avxSupportted;
|
||||
}
|
||||
// bool supportsAVX()
|
||||
// {
|
||||
// static struct SIMDSupport s = checkSIMDSupport();
|
||||
// return s.avxSupportted;
|
||||
// }
|
||||
|
||||
void *aligned_malloc(size_t alignment, size_t size)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue