From f7c12a74f0d2d9048d50f14821fae7eb69edc139 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Mon, 27 Jun 2022 18:26:13 +0200 Subject: [PATCH 01/13] improve performance of AiEnemyUnitsInDistance --- src/ai/ai_local.h | 4 ++-- src/ai/ai_resource.cpp | 26 ++++++++++++++------------ src/include/unit_find.h | 26 ++++++++++++++++++-------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/ai/ai_local.h b/src/ai/ai_local.h index e6a423213..67dddb12f 100644 --- a/src/ai/ai_local.h +++ b/src/ai/ai_local.h @@ -459,8 +459,8 @@ extern int AiFindWall(AiForce *force); /// Plan the an attack /// Send explorers around the map extern void AiSendExplorers(); -/// Enemy units in distance -extern int AiEnemyUnitsInDistance(const CPlayer &player, const CUnitType *type, +/// Check if there are enemy units in a given range (optionally of type) +extern bool AiEnemyUnitsInDistance(const CPlayer &player, const CUnitType *type, const Vec2i &pos, unsigned range); // diff --git a/src/ai/ai_resource.cpp b/src/ai/ai_resource.cpp index 34ed404dc..59e939716 100644 --- a/src/ai/ai_resource.cpp +++ b/src/ai/ai_resource.cpp @@ -169,18 +169,20 @@ static int AiCheckUnitTypeCosts(const CUnitType &type) return AiCheckCosts(type.Stats[AiPlayer->Player->Index].Costs); } +template class IsAEnemyUnitOf { public: explicit IsAEnemyUnitOf(const CPlayer &_player) : player(&_player) {} bool operator()(const CUnit *unit) const { - return unit->IsVisibleAsGoal(*player) && unit->IsEnemy(*player); + return (ignoreVisibility || unit->IsVisibleAsGoal(*player)) && unit->IsEnemy(*player); } private: const CPlayer *player; }; +template class IsAEnemyUnitWhichCanCounterAttackOf { public: @@ -189,7 +191,7 @@ public: {} bool operator()(const CUnit *unit) const { - return unit->IsVisibleAsGoal(*player) + return (ignoreVisibility || unit->IsVisibleAsGoal(*player)) && unit->IsEnemy(*player) && CanTarget(*unit->Type, *type); } @@ -199,42 +201,42 @@ private: }; /** -** Enemy units in distance. +** Check if there are enemy units in a given range. ** ** @param player Find enemies of this player ** @param type Optional unit type to check if enemy can target this ** @param pos location ** @param range Distance range to look. ** -** @return Number of enemy units. +** @return If there are any enemy units in the range */ -int AiEnemyUnitsInDistance(const CPlayer &player, - const CUnitType *type, const Vec2i &pos, unsigned range) +bool AiEnemyUnitsInDistance(const CPlayer &player, + const CUnitType *type, const Vec2i &pos, unsigned range) { const Vec2i offset(range, range); std::vector units; if (type == NULL) { - Select(pos - offset, pos + offset, units, IsAEnemyUnitOf(player)); + Select<1>(pos - offset, pos + offset, units, IsAEnemyUnitOf(player)); return static_cast(units.size()); } else { const Vec2i typeSize(type->TileWidth - 1, type->TileHeight - 1); - const IsAEnemyUnitWhichCanCounterAttackOf pred(player, *type); + const IsAEnemyUnitWhichCanCounterAttackOf pred(player, *type); - Select(pos - offset, pos + typeSize + offset, units, pred); + Select<1>(pos - offset, pos + typeSize + offset, units, pred); return static_cast(units.size()); } } /** -** Enemy units in distance. +** Check if there are enemy units in a given range. ** ** @param unit Find in distance for this unit. ** @param range Distance range to look. ** -** @return Number of enemy units. +** @return If there are any enemy units in the range */ -int AiEnemyUnitsInDistance(const CUnit &unit, unsigned range) +bool AiEnemyUnitsInDistance(const CUnit &unit, unsigned range) { return AiEnemyUnitsInDistance(*unit.Player, unit.Type, unit.tilePos, range); } diff --git a/src/include/unit_find.h b/src/include/unit_find.h index d3d8c4a32..690f52d64 100644 --- a/src/include/unit_find.h +++ b/src/include/unit_find.h @@ -221,12 +221,14 @@ void Select(const Vec2i <Pos, const Vec2i &rbPos, std::vector &units) void SelectFixed(const Vec2i <Pos, const Vec2i &rbPos, std::vector &units); void SelectAroundUnit(const CUnit &unit, int range, std::vector &around); -template +template void SelectFixed(const Vec2i <Pos, const Vec2i &rbPos, std::vector &units, Pred pred) { Assert(Map.Info.IsPointOnMap(ltPos)); Assert(Map.Info.IsPointOnMap(rbPos)); Assert(units.empty()); + units.reserve(selectMax << 1); + int max = selectMax || INT_MAX; for (Vec2i posIt = ltPos; posIt.y != rbPos.y + 1; ++posIt.y) { for (posIt.x = ltPos.x; posIt.x != rbPos.x + 1; ++posIt.x) { @@ -236,9 +238,17 @@ void SelectFixed(const Vec2i <Pos, const Vec2i &rbPos, std::vector &u for (size_t i = 0; i != cache.size(); ++i) { CUnit &unit = *cache[i]; - if (unit.CacheLock == 0 && pred(&unit)) { - unit.CacheLock = 1; - units.push_back(&unit); + if ((selectMax == 1 || unit.CacheLock == 0) && pred(&unit)) { + if (selectMax == 1) { + units.push_back(&unit); + return; + } else { + unit.CacheLock = 1; + units.push_back(&unit); + if (--max == 0) { + break; + } + } } } } @@ -248,25 +258,25 @@ void SelectFixed(const Vec2i <Pos, const Vec2i &rbPos, std::vector &u } } -template +template void SelectAroundUnit(const CUnit &unit, int range, std::vector &around, Pred pred) { const Vec2i offset(range, range); const Vec2i typeSize(unit.Type->TileWidth - 1, unit.Type->TileHeight - 1); - Select(unit.tilePos - offset, + Select(unit.tilePos - offset, unit.tilePos + typeSize + offset, around, MakeAndPredicate(IsNotTheSameUnitAs(unit), pred)); } -template +template void Select(const Vec2i <Pos, const Vec2i &rbPos, std::vector &units, Pred pred) { Vec2i minPos = ltPos; Vec2i maxPos = rbPos; Map.FixSelectionArea(minPos, maxPos); - SelectFixed(minPos, maxPos, units, pred); + SelectFixed(minPos, maxPos, units, pred); } template From 71608824e518604de90573d020755794cdb314ae Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 06:42:27 +0200 Subject: [PATCH 02/13] reduce load on AI cycles by limiting how many forces may scan the map in one cycle --- src/ai/ai_force.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ai/ai_force.cpp b/src/ai/ai_force.cpp index 404973640..0e9caa6f4 100644 --- a/src/ai/ai_force.cpp +++ b/src/ai/ai_force.cpp @@ -1137,7 +1137,13 @@ void AiForce::Update() void AiForceManager::Update() { - for (unsigned int f = 0; f < forces.size(); ++f) { + unsigned int fsize = forces.size(); + int maxPathing = 2; // reduce load by stopping after issuing a few map searches + for (unsigned int f = 0; f < fsize; ++f) { + if (maxPathing < 0) { + return; + } + AiForce &force = forces[f]; // Look if our defenders still have enemies in range. @@ -1160,6 +1166,7 @@ void AiForceManager::Update() if (force.Units[i]->MapDistanceTo(force.GoalPos) <= nearDist) { // Look if still enemies in attack range. const CUnit *dummy = NULL; + maxPathing--; if (!AiForceEnemyFinder(force, &dummy).found()) { force.ReturnToHome(); } @@ -1175,6 +1182,7 @@ void AiForceManager::Update() // Don't attack if there aren't our units near goal point std::vector nearGoal; const Vec2i offset(15, 15); + maxPathing--; Select(force.GoalPos - offset, force.GoalPos + offset, nearGoal, IsAnAlliedUnitOf(*force.Units[0]->Player)); if (nearGoal.empty()) { @@ -1206,6 +1214,7 @@ void AiForceManager::Update() } } else if (force.Attacking) { force.RemoveDeadUnit(); + maxPathing--; force.Update(); } } From 8d35af98b2b83ce44879d4de282f4d9a1edc878a Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 21 Jun 2022 22:28:19 +0200 Subject: [PATCH 03/13] change astar reset optimization to instead rely on memory alignment and the compiler optimizations for memset/memmove on aligned memory chunks with modern CPUs --- src/include/util.h | 8 +++ src/pathfinder/astar.cpp | 132 +++++++++++---------------------------- src/stratagus/util.cpp | 127 +++++++++++++++++++++++++++++++++++++ src/video/video.cpp | 2 +- 4 files changed, 174 insertions(+), 95 deletions(-) diff --git a/src/include/util.h b/src/include/util.h index b47e3c14c..692e42b78 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -142,6 +142,14 @@ void SetClipboard(std::string &str); int UTF8GetNext(const std::string &text, int curpos); int UTF8GetPrev(const std::string &text, int curpos); +/*---------------------------------------------------------------------------- +-- SIMD support +----------------------------------------------------------------------------*/ +bool supportsSSE2(); +bool supportsAVX(); +void *aligned_malloc(size_t alignment, size_t size); +void aligned_free(void *block); + //@} #endif /* __UTIL_H__ */ diff --git a/src/pathfinder/astar.cpp b/src/pathfinder/astar.cpp index 5cbd52c3c..2cfc3fc6f 100644 --- a/src/pathfinder/astar.cpp +++ b/src/pathfinder/astar.cpp @@ -51,10 +51,10 @@ ----------------------------------------------------------------------------*/ struct Node { - int CostFromStart; /// Real costs to reach this point - short int CostToGoal; /// Estimated cost to goal - char InGoal; /// is this point in the goal - char Direction; /// Direction for trace back + int32_t CostFromStart; /// Real costs to reach this point + int16_t CostToGoal; /// Estimated cost to goal + int8_t InGoal; /// is this point in the goal + int8_t Direction; /// Direction for trace back }; struct Open { @@ -88,9 +88,6 @@ const int XY2Heading[3][3] = { {7, 6, 5}, {0, 0, 4}, {1, 2, 3}}; static Node *AStarMatrix; /// a list of close nodes, helps to speed up the matrix cleaning -static int *CloseSet; -static int CloseSetSize; -static int Threshold; static int OpenSetMaxSize; static int AStarMatrixSize; #define MAX_CLOSE_SET_RATIO 4 @@ -106,6 +103,7 @@ static bool AStarFixedEnemyUnitsUnpassable = false; static int AStarMapWidth; static int AStarMapHeight; +static int AStarMapMax; static int AStarGoalX; static int AStarGoalY; @@ -120,8 +118,9 @@ static Open *OpenSet; /// The size of the open node set static int OpenSetSize; -static int *CostMoveToCache; -static const int CacheNotSet = -5; +static int32_t *CostMoveToCache; +static int CostMoveToCacheSize; +static constexpr int CacheNotSet = -1; /*---------------------------------------------------------------------------- -- Profile @@ -257,19 +256,23 @@ void InitAStar(int mapWidth, int mapHeight) AStarMapWidth = mapWidth; AStarMapHeight = mapHeight; + AStarMapMax = AStarMapWidth * AStarMapHeight; - AStarMatrixSize = sizeof(Node) * AStarMapWidth * AStarMapHeight; - AStarMatrix = new Node[AStarMapWidth * AStarMapHeight]; + // align the matrix, the open set, and the cost to move cache + // on 32-byte boundary, in case the memset/memmove operations + // of the libc we're using has a 128/256/512bit SIMD vector + // instruction branch, since we might be clearing 8M of + // memory for a 2048x2048 map + AStarMatrixSize = sizeof(Node) * AStarMapMax; + AStarMatrix = (Node *)aligned_malloc(32, AStarMatrixSize); memset(AStarMatrix, 0, AStarMatrixSize); - Threshold = AStarMapWidth * AStarMapHeight / MAX_CLOSE_SET_RATIO; - CloseSetSize = Threshold; - CloseSet = new int[Threshold]; + OpenSetMaxSize = AStarMapMax / MAX_OPEN_SET_RATIO; + OpenSet = (Open *)aligned_malloc(32, OpenSetMaxSize * sizeof(Open)); - OpenSetMaxSize = AStarMapWidth * AStarMapHeight / MAX_OPEN_SET_RATIO; - OpenSet = new Open[OpenSetMaxSize]; - - CostMoveToCache = new int[AStarMapWidth * AStarMapHeight]; + CostMoveToCacheSize = sizeof(int32_t) * AStarMapMax; + CostMoveToCache = (int32_t*)aligned_malloc(32, CostMoveToCacheSize); + memset(CostMoveToCache, CacheNotSet, CostMoveToCacheSize); for (int i = 0; i < 9; ++i) { Heading2O[i] = Heading2Y[i] * AStarMapWidth; @@ -283,15 +286,12 @@ void InitAStar(int mapWidth, int mapHeight) */ void FreeAStar() { - delete[] AStarMatrix; + aligned_free(AStarMatrix); AStarMatrix = NULL; - delete[] CloseSet; - CloseSet = NULL; - CloseSetSize = 0; - delete[] OpenSet; + aligned_free(OpenSet); OpenSet = NULL; OpenSetSize = 0; - delete[] CostMoveToCache; + aligned_free(CostMoveToCache); CostMoveToCache = NULL; ProfilePrint(); @@ -312,62 +312,14 @@ static void CostMoveToCacheCleanUp(); static void AStarCleanUp() { ProfileBegin("AStarCleanUp"); - - if (CloseSetSize >= Threshold) { - AStarPrepare(); - CostMoveToCacheCleanUp(); - } else { - for (int i = 0; i < CloseSetSize; ++i) { - AStarMatrix[CloseSet[i]].CostFromStart = 0; - AStarMatrix[CloseSet[i]].InGoal = 0; - CostMoveToCache[CloseSet[i]] = CacheNotSet; - } - } + AStarPrepare(); + CostMoveToCacheCleanUp(); ProfileEnd("AStarCleanUp"); } static void CostMoveToCacheCleanUp() { - ProfileBegin("CostMoveToCacheCleanUp"); - int AStarMapMax = AStarMapWidth * AStarMapHeight; -#if 1 - int *ptr = CostMoveToCache; -#ifdef __x86_64__ - union { - intptr_t d; - int i[2]; - } conv; - conv.i[0] = CacheNotSet; - conv.i[1] = CacheNotSet; - - if (((uintptr_t)ptr) & 4) { - *ptr++ = CacheNotSet; - --AStarMapMax; - } -#endif - while (AStarMapMax > 3) { -#ifdef __x86_64__ - *((intptr_t *)ptr) = conv.d; - *((intptr_t *)(ptr + 2)) = conv.d; - ptr += 4; -#else - *ptr++ = CacheNotSet; - *ptr++ = CacheNotSet; - *ptr++ = CacheNotSet; - *ptr++ = CacheNotSet; -#endif - AStarMapMax -= 4; - }; - while (AStarMapMax) { - *ptr++ = CacheNotSet; - --AStarMapMax; - } -#else - for (int i = 0; i < AStarMapMax; ++i) { - CostMoveToCache[i] = CacheNotSet; - } -#endif - ProfileEnd("CostMoveToCacheCleanUp"); + memset(CostMoveToCache, CacheNotSet, CostMoveToCacheSize); } /** @@ -496,16 +448,6 @@ static int AStarFindNode(int eo) return -1; } -/** -** Add a node to the closed set -*/ -static void AStarAddToClose(int node) -{ - if (CloseSetSize < Threshold) { - CloseSet[CloseSetSize++] = node; - } -} - #define GetIndex(x, y) (x) + (y) * AStarMapWidth /* build-in costmoveto code */ @@ -605,12 +547,19 @@ static int CostMoveToCallBack_Default(unsigned int index, const CUnit &unit) */ static inline int CostMoveTo(unsigned int index, const CUnit &unit) { - int *c = &CostMoveToCache[index]; + int32_t *c = &CostMoveToCache[index]; if (*c != CacheNotSet) { - return *c; + // for performance reasons, CostMoveToCache uses -1 to + // indicate it is unset, but the algorithm is simpler + // if the range of costs is [-1, INT_MAX]. so we always + // store everything +1 + return *c - 1; } - *c = CostMoveToCallBack_Default(index, unit); - return *c; + *c = CostMoveToCallBack_Default(index, unit) + 1; +#ifdef DEBUG + Assert(c >= 0); +#endif + return *c - 1; } class AStarGoalMarker @@ -626,7 +575,6 @@ public: AStarMatrix[offset].InGoal = 1; *goal_reachable = true; } - AStarAddToClose(offset); } private: const CUnit &unit; @@ -957,7 +905,6 @@ int AStarFindPath(const Vec2i &startPos, const Vec2i &goalPos, int gw, int gh, AStarCleanUp(); OpenSetSize = 0; - CloseSetSize = 0; if (!AStarMarkGoal(goalPos, gw, gh, tilesizex, tilesizey, minrange, maxrange, unit)) { // goal is not reachable @@ -981,7 +928,6 @@ int AStarFindPath(const Vec2i &startPos, const Vec2i &goalPos, int gw, int gh, ProfileEnd("AStarFindPath"); return ret; } - AStarAddToClose(OpenSet[0].O); if (AStarMatrix[eo].InGoal) { ret = PF_REACHED; ProfileEnd("AStarFindPath"); @@ -1067,8 +1013,6 @@ int AStarFindPath(const Vec2i &startPos, const Vec2i &goalPos, int gw, int gh, ProfileEnd("AStarFindPath"); return ret; } - // we add the point to the close set - AStarAddToClose(eo); } else if (new_cost < AStarMatrix[eo].CostFromStart) { // Already visited node, but we have here a better path // I know, it's redundant (but simpler like this) diff --git a/src/stratagus/util.cpp b/src/stratagus/util.cpp index f2fa186a1..3f281ba7f 100644 --- a/src/stratagus/util.cpp +++ b/src/stratagus/util.cpp @@ -41,8 +41,10 @@ #include #include +#include #ifdef WIN32 #include +#include #endif #ifdef USE_STACKTRACE @@ -500,3 +502,128 @@ void PrintOnStdOut(const char *format, ...) va_end(valist); fflush(stdout); } + +/*---------------------------------------------------------------------------- + Check SSE/AVX support. + This can detect the instruction support of + SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX. + ----------------------------------------------------------------------------*/ + +#ifdef __GNUC__ + +static void __cpuid(int* cpuinfo, int info) +{ + __asm__ __volatile__( + "xchg %%ebx, %%edi;" + "cpuid;" + "xchg %%ebx, %%edi;" + :"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) + :"0" (info) + ); +} + +static unsigned long long _xgetbv(unsigned int index) +{ + unsigned int eax, edx; + __asm__ __volatile__( + "xgetbv;" + : "=a" (eax), "=d"(edx) + : "c" (index) + ); + return ((unsigned long long)edx << 32) | eax; +} + +#endif + +struct SIMDSupport { + bool sseSupportted = false; + bool sse2Supportted = false; + bool sse3Supportted = false; + bool ssse3Supportted = false; + bool sse4_1Supportted = false; + bool sse4_2Supportted = false; + bool sse4aSupportted = false; + bool sse5Supportted = false; + bool avxSupportted = false; +}; + +static struct SIMDSupport checkSIMDSupport() { + struct SIMDSupport s; + + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + + // Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support + s.sseSupportted = cpuinfo[3] & (1 << 25) || false; + s.sse2Supportted = cpuinfo[3] & (1 << 26) || false; + s.sse3Supportted = cpuinfo[2] & (1 << 0) || false; + s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false; + s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false; + s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false; + + // ---------------------------------------------------------------------- + + // Check AVX support + // References + // http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/ + // http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/ + + s.avxSupportted = cpuinfo[2] & (1 << 28) || false; + bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false; + if (osxsaveSupported && s.avxSupportted) + { + // _XCR_XFEATURE_ENABLED_MASK = 0 + unsigned long long xcrFeatureMask = _xgetbv(0); + s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6; + } + + // ---------------------------------------------------------------------- + + // Check SSE4a and SSE5 support + + // Get the number of valid extended IDs + __cpuid(cpuinfo, 0x80000000); + int numExtendedIds = cpuinfo[0]; + if (numExtendedIds >= 0x80000001) + { + __cpuid(cpuinfo, 0x80000001); + s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false; + s.sse5Supportted = cpuinfo[2] & (1 << 11) || false; + } + + // ---------------------------------------------------------------------- + + return s; +} + +bool supportsSSE2() +{ + static struct SIMDSupport s = checkSIMDSupport(); + return s.sse2Supportted; +} + +bool supportsAVX() +{ + static struct SIMDSupport s = checkSIMDSupport(); + return s.avxSupportted; +} + +void *aligned_malloc(size_t alignment, size_t size) +{ +#ifdef WIN32 + return _aligned_malloc(size, alignment); +#elif _ISOC11_SOURCE + return aligned_alloc(alignment, size); +#else + return memalign(alignment, size); +#endif +} + +void aligned_free(void *block) +{ +#ifdef WIN32 + _aligned_free(block); +#else + free(block); +#endif +} \ No newline at end of file diff --git a/src/video/video.cpp b/src/video/video.cpp index 12e4b8a03..b978418d7 100644 --- a/src/video/video.cpp +++ b/src/video/video.cpp @@ -201,7 +201,7 @@ void SetClipping(int left, int top, int right, int bottom) { Assert(left <= right && top <= bottom && left >= 0 && left < Video.Width && top >= 0 && top < Video.Height && right >= 0 - && right < Video.Width && bottom >= 0 && bottom < Video.Height); + && right <= Video.Width && bottom >= 0 && bottom <= Video.Height); ClipX1 = left; ClipY1 = top; From 89e8560cd288aaf949252783ee025f9ad0cc515b Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Fri, 24 Jun 2022 20:18:27 +0200 Subject: [PATCH 04/13] make max iterations for astar configurable --- src/include/pathfinder.h | 2 ++ src/pathfinder/astar.cpp | 11 +++++------ src/pathfinder/script_pathfinder.cpp | 9 +++++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/include/pathfinder.h b/src/include/pathfinder.h index 1bdcb86e9..16843c177 100644 --- a/src/include/pathfinder.h +++ b/src/include/pathfinder.h @@ -199,6 +199,8 @@ extern int AStarMovingUnitCrossingCost; extern bool AStarKnowUnseenTerrain; /// Cost of using a square we haven't seen before. extern int AStarUnknownTerrainCost; +/// Maximum number of iterations of A* before giving up. +extern int AStarMaxSearchIterations; // // Convert heading into direction. diff --git a/src/pathfinder/astar.cpp b/src/pathfinder/astar.cpp index 2cfc3fc6f..90a71b77c 100644 --- a/src/pathfinder/astar.cpp +++ b/src/pathfinder/astar.cpp @@ -96,6 +96,7 @@ static int AStarMatrixSize; /// see pathfinder.h int AStarFixedUnitCrossingCost;// = MaxMapWidth * MaxMapHeight; int AStarMovingUnitCrossingCost = 5; +int AStarMaxSearchIterations = INT_MAX; bool AStarKnowUnseenTerrain = false; int AStarUnknownTerrainCost = 2; /// Used to temporary make enemy units unpassable (needs for correct path lenght calculating for automatic targeting alorithm) @@ -935,6 +936,8 @@ int AStarFindPath(const Vec2i &startPos, const Vec2i &goalPos, int gw, int gh, } Vec2i endPos; + int counter = AStarMaxSearchIterations; + // Begin search while (1) { // Find the best node of from the open set @@ -952,17 +955,13 @@ int AStarFindPath(const Vec2i &startPos, const Vec2i &goalPos, int gw, int gh, break; } -#if 0 // If we have looked too long, then exit. if (!counter--) { - // FIXME: Select a "good" point from the open set. - // Nearest point to goal. + // TODO: Select a "good" point from the open set. AstarDebugPrint("way too long\n"); - ret = PF_FAILED; ProfileEnd("AStarFindPath"); - return ret; + return PF_UNREACHABLE; } -#endif // Generate successors of this node. diff --git a/src/pathfinder/script_pathfinder.cpp b/src/pathfinder/script_pathfinder.cpp index 316a32191..6e19e5c35 100644 --- a/src/pathfinder/script_pathfinder.cpp +++ b/src/pathfinder/script_pathfinder.cpp @@ -93,6 +93,15 @@ static int CclAStar(lua_State *l) } else { AStarUnknownTerrainCost = i; } + } else if (!strcmp(value, "max-search-iterations")) { + ++j; + i = LuaToNumber(l, j + 1); + if (i <= 0) { + PrintFunction(); + fprintf(stdout, "Max A* search iterations must be strictly > 0\n"); + } else { + AStarMaxSearchIterations = i; + } } else { LuaError(l, "Unsupported tag: %s" _C_ value); } From 553d660d395df9f2e4188764a9efb4187f4fd077 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 07:00:17 +0200 Subject: [PATCH 05/13] comment unused cpu feature test code, align 512bits for AVX-512 --- src/include/util.h | 4 +- src/pathfinder/astar.cpp | 8 +- src/stratagus/util.cpp | 172 +++++++++++++++++++-------------------- 3 files changed, 92 insertions(+), 92 deletions(-) diff --git a/src/include/util.h b/src/include/util.h index 692e42b78..1aacecdb6 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -145,8 +145,8 @@ int UTF8GetPrev(const std::string &text, int curpos); /*---------------------------------------------------------------------------- -- SIMD support ----------------------------------------------------------------------------*/ -bool supportsSSE2(); -bool supportsAVX(); +// bool supportsSSE2(); +// bool supportsAVX(); void *aligned_malloc(size_t alignment, size_t size); void aligned_free(void *block); diff --git a/src/pathfinder/astar.cpp b/src/pathfinder/astar.cpp index 90a71b77c..f52f1f3ab 100644 --- a/src/pathfinder/astar.cpp +++ b/src/pathfinder/astar.cpp @@ -260,19 +260,19 @@ void InitAStar(int mapWidth, int mapHeight) AStarMapMax = AStarMapWidth * AStarMapHeight; // align the matrix, the open set, and the cost to move cache - // on 32-byte boundary, in case the memset/memmove operations + // on 64-byte boundary, in case the memset/memmove operations // of the libc we're using has a 128/256/512bit SIMD vector // instruction branch, since we might be clearing 8M of // memory for a 2048x2048 map AStarMatrixSize = sizeof(Node) * AStarMapMax; - AStarMatrix = (Node *)aligned_malloc(32, AStarMatrixSize); + AStarMatrix = (Node *)aligned_malloc(64, AStarMatrixSize); memset(AStarMatrix, 0, AStarMatrixSize); OpenSetMaxSize = AStarMapMax / MAX_OPEN_SET_RATIO; - OpenSet = (Open *)aligned_malloc(32, OpenSetMaxSize * sizeof(Open)); + OpenSet = (Open *)aligned_malloc(64, OpenSetMaxSize * sizeof(Open)); CostMoveToCacheSize = sizeof(int32_t) * AStarMapMax; - CostMoveToCache = (int32_t*)aligned_malloc(32, CostMoveToCacheSize); + CostMoveToCache = (int32_t*)aligned_malloc(64, CostMoveToCacheSize); memset(CostMoveToCache, CacheNotSet, CostMoveToCacheSize); for (int i = 0; i < 9; ++i) { diff --git a/src/stratagus/util.cpp b/src/stratagus/util.cpp index 3f281ba7f..6359b8860 100644 --- a/src/stratagus/util.cpp +++ b/src/stratagus/util.cpp @@ -503,110 +503,110 @@ void PrintOnStdOut(const char *format, ...) fflush(stdout); } -/*---------------------------------------------------------------------------- - Check SSE/AVX support. - This can detect the instruction support of - SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX. - ----------------------------------------------------------------------------*/ +// /*---------------------------------------------------------------------------- +// Check SSE/AVX support. +// This can detect the instruction support of +// SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX. +// ----------------------------------------------------------------------------*/ -#ifdef __GNUC__ +// #ifdef __GNUC__ -static void __cpuid(int* cpuinfo, int info) -{ - __asm__ __volatile__( - "xchg %%ebx, %%edi;" - "cpuid;" - "xchg %%ebx, %%edi;" - :"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) - :"0" (info) - ); -} +// static void __cpuid(int* cpuinfo, int info) +// { +// __asm__ __volatile__( +// "xchg %%ebx, %%edi;" +// "cpuid;" +// "xchg %%ebx, %%edi;" +// :"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) +// :"0" (info) +// ); +// } -static unsigned long long _xgetbv(unsigned int index) -{ - unsigned int eax, edx; - __asm__ __volatile__( - "xgetbv;" - : "=a" (eax), "=d"(edx) - : "c" (index) - ); - return ((unsigned long long)edx << 32) | eax; -} +// static unsigned long long _xgetbv(unsigned int index) +// { +// unsigned int eax, edx; +// __asm__ __volatile__( +// "xgetbv;" +// : "=a" (eax), "=d"(edx) +// : "c" (index) +// ); +// return ((unsigned long long)edx << 32) | eax; +// } -#endif +// #endif -struct SIMDSupport { - bool sseSupportted = false; - bool sse2Supportted = false; - bool sse3Supportted = false; - bool ssse3Supportted = false; - bool sse4_1Supportted = false; - bool sse4_2Supportted = false; - bool sse4aSupportted = false; - bool sse5Supportted = false; - bool avxSupportted = false; -}; +// struct SIMDSupport { +// bool sseSupportted = false; +// bool sse2Supportted = false; +// bool sse3Supportted = false; +// bool ssse3Supportted = false; +// bool sse4_1Supportted = false; +// bool sse4_2Supportted = false; +// bool sse4aSupportted = false; +// bool sse5Supportted = false; +// bool avxSupportted = false; +// }; -static struct SIMDSupport checkSIMDSupport() { - struct SIMDSupport s; +// static struct SIMDSupport checkSIMDSupport() { +// struct SIMDSupport s; - int cpuinfo[4]; - __cpuid(cpuinfo, 1); +// int cpuinfo[4]; +// __cpuid(cpuinfo, 1); - // Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support - s.sseSupportted = cpuinfo[3] & (1 << 25) || false; - s.sse2Supportted = cpuinfo[3] & (1 << 26) || false; - s.sse3Supportted = cpuinfo[2] & (1 << 0) || false; - s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false; - s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false; - s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false; +// // Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support +// s.sseSupportted = cpuinfo[3] & (1 << 25) || false; +// s.sse2Supportted = cpuinfo[3] & (1 << 26) || false; +// s.sse3Supportted = cpuinfo[2] & (1 << 0) || false; +// s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false; +// s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false; +// s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false; - // ---------------------------------------------------------------------- +// // ---------------------------------------------------------------------- - // Check AVX support - // References - // http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/ - // http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/ +// // Check AVX support +// // References +// // http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/ +// // http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/ - s.avxSupportted = cpuinfo[2] & (1 << 28) || false; - bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false; - if (osxsaveSupported && s.avxSupportted) - { - // _XCR_XFEATURE_ENABLED_MASK = 0 - unsigned long long xcrFeatureMask = _xgetbv(0); - s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6; - } +// s.avxSupportted = cpuinfo[2] & (1 << 28) || false; +// bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false; +// if (osxsaveSupported && s.avxSupportted) +// { +// // _XCR_XFEATURE_ENABLED_MASK = 0 +// unsigned long long xcrFeatureMask = _xgetbv(0); +// s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6; +// } - // ---------------------------------------------------------------------- +// // ---------------------------------------------------------------------- - // Check SSE4a and SSE5 support +// // Check SSE4a and SSE5 support - // Get the number of valid extended IDs - __cpuid(cpuinfo, 0x80000000); - int numExtendedIds = cpuinfo[0]; - if (numExtendedIds >= 0x80000001) - { - __cpuid(cpuinfo, 0x80000001); - s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false; - s.sse5Supportted = cpuinfo[2] & (1 << 11) || false; - } +// // Get the number of valid extended IDs +// __cpuid(cpuinfo, 0x80000000); +// int numExtendedIds = cpuinfo[0]; +// if (numExtendedIds >= 0x80000001) +// { +// __cpuid(cpuinfo, 0x80000001); +// s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false; +// s.sse5Supportted = cpuinfo[2] & (1 << 11) || false; +// } - // ---------------------------------------------------------------------- +// // ---------------------------------------------------------------------- - return s; -} +// return s; +// } -bool supportsSSE2() -{ - static struct SIMDSupport s = checkSIMDSupport(); - return s.sse2Supportted; -} +// bool supportsSSE2() +// { +// static struct SIMDSupport s = checkSIMDSupport(); +// return s.sse2Supportted; +// } -bool supportsAVX() -{ - static struct SIMDSupport s = checkSIMDSupport(); - return s.avxSupportted; -} +// bool supportsAVX() +// { +// static struct SIMDSupport s = checkSIMDSupport(); +// return s.avxSupportted; +// } void *aligned_malloc(size_t alignment, size_t size) { From 109bc10e0fdfaa54fb280132e7afed3f9d08db78 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 07:03:42 +0200 Subject: [PATCH 06/13] report benchmark results with cycles --- src/game/game.cpp | 6 ------ src/stratagus/mainloop.cpp | 9 +++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/game/game.cpp b/src/game/game.cpp index 8986dde14..7a6b4d93a 100644 --- a/src/game/game.cpp +++ b/src/game/game.cpp @@ -146,14 +146,8 @@ void StartMap(const std::string &filename, bool clean) UI.StatusLine.Set(NameLine); SetMessage("%s", _("Do it! Do it now!")); - long ticks = SDL_GetTicks(); // Play the game. GameMainLoop(); - if (Parameters::Instance.benchmark) { - ticks = SDL_GetTicks() - ticks; - double fps = FrameCounter * 1000.0 / ticks; - fprintf(stderr, "BENCHMARK RESULT: %f fps (%ldms for %ldframes)\n", fps, ticks, FrameCounter); - } // Clear screen Video.ClearScreen(); diff --git a/src/stratagus/mainloop.cpp b/src/stratagus/mainloop.cpp index 9900c0382..b08248863 100644 --- a/src/stratagus/mainloop.cpp +++ b/src/stratagus/mainloop.cpp @@ -52,6 +52,7 @@ #include "ui.h" #include "unit.h" #include "video.h" +#include "parameters.h" #include void DrawGuichanWidgets(); @@ -421,6 +422,8 @@ void GameMainLoop() CclCommand("if (GameStarting ~= nil) then GameStarting() end"); + long ticks = SDL_GetTicks(); + MultiPlayerReplayEachCycle(); SingleGameLoop(); @@ -445,6 +448,12 @@ void GameMainLoop() NetworkQuitGame(); EndReplayLog(); + if (Parameters::Instance.benchmark) { + ticks = SDL_GetTicks() - ticks; + double fps = FrameCounter * 1000.0 / ticks; + fprintf(stderr, "BENCHMARK RESULT: %f fps, %f fpc (%ldms for %ldframes in %ldcycles)\n", fps, GameCycle * 1000.0 / ticks, ticks, FrameCounter, GameCycle); + } + GameCycle = 0; CParticleManager::exit(); FlagRevealMap = MapRevealModes::cHidden; From beab00da2673eeb1ee2049f5efa362d42df17c6d Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:23 +0200 Subject: [PATCH 07/13] no assert/debugprint in release builds --- src/include/stratagus.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/include/stratagus.h b/src/include/stratagus.h index 1c9ead09d..31315adeb 100644 --- a/src/include/stratagus.h +++ b/src/include/stratagus.h @@ -139,14 +139,22 @@ extern void PrintOnStdOut(const char *format, ...); /** ** Assert a condition. If cond is not true abort with file,line. */ +#ifdef DEBUG #define Assert(cond) \ do { if (EnableAssert && !(cond)) { AbortAt(__FILE__, __LINE__, __func__, #cond); }} while (0) +#else +#define Assert(cond) +#endif /** ** Print debug information with function name. */ +#ifdef DEBUG #define DebugPrint(args) \ do { if (EnableDebugPrint) { PrintFunction(); PrintOnStdOut(args); } } while (0) +#else +#define DebugPrint(args) +#endif /*============================================================================ == Definitions From 13b20e108040ee6f6f481714480ce16c8123e6d6 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:25 +0200 Subject: [PATCH 08/13] XXX: do not crash on exit --- src/map/minimap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/map/minimap.cpp b/src/map/minimap.cpp index 5d4a36a72..da79f1076 100644 --- a/src/map/minimap.cpp +++ b/src/map/minimap.cpp @@ -501,7 +501,7 @@ void CMinimap::Destroy() MinimapSurface = NULL; } if (MinimapFogSurface && MinimapFogSurface->format != NULL) { - SDL_FreeSurface(MinimapFogSurface); + // SDL_FreeSurface(MinimapFogSurface); MinimapSurface = NULL; } delete[] Minimap2MapX; From 1d8c81d09afa5ef30291e2243e23013cb781465f Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:26 +0200 Subject: [PATCH 09/13] re-enable simd test functions --- CMakeLists.txt | 2 + src/include/util.h | 4 +- src/stratagus/util.cpp | 194 +++++++++++++++++++++++------------------ 3 files changed, 111 insertions(+), 89 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b4d392ba..8f2fee2a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -876,6 +876,8 @@ endif() if(LINUX) add_definitions(-DUSE_LINUX) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -ggdb") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb") endif() if(BEOS) diff --git a/src/include/util.h b/src/include/util.h index 1aacecdb6..692e42b78 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -145,8 +145,8 @@ int UTF8GetPrev(const std::string &text, int curpos); /*---------------------------------------------------------------------------- -- SIMD support ----------------------------------------------------------------------------*/ -// bool supportsSSE2(); -// bool supportsAVX(); +bool supportsSSE2(); +bool supportsAVX(); void *aligned_malloc(size_t alignment, size_t size); void aligned_free(void *block); diff --git a/src/stratagus/util.cpp b/src/stratagus/util.cpp index 6359b8860..8633b2ac5 100644 --- a/src/stratagus/util.cpp +++ b/src/stratagus/util.cpp @@ -503,110 +503,130 @@ void PrintOnStdOut(const char *format, ...) fflush(stdout); } -// /*---------------------------------------------------------------------------- -// Check SSE/AVX support. -// This can detect the instruction support of -// SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX. -// ----------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------- + Check SSE/AVX support. + This can detect the instruction support of + SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, SSE4a, SSE5, and AVX. + ----------------------------------------------------------------------------*/ -// #ifdef __GNUC__ +#ifdef __x86_64__ -// static void __cpuid(int* cpuinfo, int info) -// { -// __asm__ __volatile__( -// "xchg %%ebx, %%edi;" -// "cpuid;" -// "xchg %%ebx, %%edi;" -// :"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) -// :"0" (info) -// ); -// } +#ifdef __GNUC__ -// static unsigned long long _xgetbv(unsigned int index) -// { -// unsigned int eax, edx; -// __asm__ __volatile__( -// "xgetbv;" -// : "=a" (eax), "=d"(edx) -// : "c" (index) -// ); -// return ((unsigned long long)edx << 32) | eax; -// } +static void __cpuid(int* cpuinfo, int info) +{ + __asm__ __volatile__( + "xchg %%ebx, %%edi;" + "cpuid;" + "xchg %%ebx, %%edi;" + :"=a" (cpuinfo[0]), "=D" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) + :"0" (info) + ); +} -// #endif +static unsigned long long _my_xgetbv(unsigned int index) +{ + unsigned int eax, edx; + __asm__ __volatile__( + "xgetbv;" + : "=a" (eax), "=d"(edx) + : "c" (index) + ); + return ((unsigned long long)edx << 32) | eax; +} -// struct SIMDSupport { -// bool sseSupportted = false; -// bool sse2Supportted = false; -// bool sse3Supportted = false; -// bool ssse3Supportted = false; -// bool sse4_1Supportted = false; -// bool sse4_2Supportted = false; -// bool sse4aSupportted = false; -// bool sse5Supportted = false; -// bool avxSupportted = false; -// }; +#else // __GNUC__ -// static struct SIMDSupport checkSIMDSupport() { -// struct SIMDSupport s; +#define _my_xgetbv(index) _xgetbv(index) -// int cpuinfo[4]; -// __cpuid(cpuinfo, 1); +#endif // __GNUC__ -// // Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support -// s.sseSupportted = cpuinfo[3] & (1 << 25) || false; -// s.sse2Supportted = cpuinfo[3] & (1 << 26) || false; -// s.sse3Supportted = cpuinfo[2] & (1 << 0) || false; -// s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false; -// s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false; -// s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false; +struct SIMDSupport { + bool sseSupportted = false; + bool sse2Supportted = false; + bool sse3Supportted = false; + bool ssse3Supportted = false; + bool sse4_1Supportted = false; + bool sse4_2Supportted = false; + bool sse4aSupportted = false; + bool sse5Supportted = false; + bool avxSupportted = false; +}; -// // ---------------------------------------------------------------------- +static struct SIMDSupport checkSIMDSupport() { + struct SIMDSupport s; -// // Check AVX support -// // References -// // http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/ -// // http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/ + int cpuinfo[4]; + __cpuid(cpuinfo, 1); -// s.avxSupportted = cpuinfo[2] & (1 << 28) || false; -// bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false; -// if (osxsaveSupported && s.avxSupportted) -// { -// // _XCR_XFEATURE_ENABLED_MASK = 0 -// unsigned long long xcrFeatureMask = _xgetbv(0); -// s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6; -// } + // Check SSE, SSE2, SSE3, SSSE3, SSE4.1, and SSE4.2 support + s.sseSupportted = cpuinfo[3] & (1 << 25) || false; + s.sse2Supportted = cpuinfo[3] & (1 << 26) || false; + s.sse3Supportted = cpuinfo[2] & (1 << 0) || false; + s.ssse3Supportted = cpuinfo[2] & (1 << 9) || false; + s.sse4_1Supportted = cpuinfo[2] & (1 << 19) || false; + s.sse4_2Supportted = cpuinfo[2] & (1 << 20) || false; -// // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- -// // Check SSE4a and SSE5 support + // Check AVX support + // References + // http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled/ + // http://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/ -// // Get the number of valid extended IDs -// __cpuid(cpuinfo, 0x80000000); -// int numExtendedIds = cpuinfo[0]; -// if (numExtendedIds >= 0x80000001) -// { -// __cpuid(cpuinfo, 0x80000001); -// s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false; -// s.sse5Supportted = cpuinfo[2] & (1 << 11) || false; -// } + s.avxSupportted = cpuinfo[2] & (1 << 28) || false; + bool osxsaveSupported = cpuinfo[2] & (1 << 27) || false; + if (osxsaveSupported && s.avxSupportted) + { + // _XCR_XFEATURE_ENABLED_MASK = 0 + unsigned long long xcrFeatureMask = _my_xgetbv(0); + s.avxSupportted = (xcrFeatureMask & 0x6) == 0x6; + } -// // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- -// return s; -// } + // Check SSE4a and SSE5 support -// bool supportsSSE2() -// { -// static struct SIMDSupport s = checkSIMDSupport(); -// return s.sse2Supportted; -// } + // Get the number of valid extended IDs + __cpuid(cpuinfo, 0x80000000); + int numExtendedIds = cpuinfo[0]; + if (numExtendedIds >= 0x80000001) + { + __cpuid(cpuinfo, 0x80000001); + s.sse4aSupportted = cpuinfo[2] & (1 << 6) || false; + s.sse5Supportted = cpuinfo[2] & (1 << 11) || false; + } -// bool supportsAVX() -// { -// static struct SIMDSupport s = checkSIMDSupport(); -// return s.avxSupportted; -// } + // ---------------------------------------------------------------------- + + return s; +} + +bool supportsSSE2() +{ + static struct SIMDSupport s = checkSIMDSupport(); + return s.sse2Supportted; +} + +bool supportsAVX() +{ + static struct SIMDSupport s = checkSIMDSupport(); + return s.avxSupportted; +} + +#else // __x86_64__ + +bool supportsSSE2() +{ + return false; +} + +bool supportsAVX() +{ + return false; +} + +#endif // __x86_64__ void *aligned_malloc(size_t alignment, size_t size) { @@ -626,4 +646,4 @@ void aligned_free(void *block) #else free(block); #endif -} \ No newline at end of file +} From 104d89de917031a0b78e1207ede0530ad1d08768 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:28 +0200 Subject: [PATCH 10/13] re-order to fix compilation --- src/include/unit_find.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/include/unit_find.h b/src/include/unit_find.h index 690f52d64..d03d32e97 100644 --- a/src/include/unit_find.h +++ b/src/include/unit_find.h @@ -258,6 +258,16 @@ void SelectFixed(const Vec2i <Pos, const Vec2i &rbPos, std::vector &u } } +template +void Select(const Vec2i <Pos, const Vec2i &rbPos, std::vector &units, Pred pred) +{ + Vec2i minPos = ltPos; + Vec2i maxPos = rbPos; + + Map.FixSelectionArea(minPos, maxPos); + SelectFixed(minPos, maxPos, units, pred); +} + template void SelectAroundUnit(const CUnit &unit, int range, std::vector &around, Pred pred) { @@ -269,16 +279,6 @@ void SelectAroundUnit(const CUnit &unit, int range, std::vector &around MakeAndPredicate(IsNotTheSameUnitAs(unit), pred)); } -template -void Select(const Vec2i <Pos, const Vec2i &rbPos, std::vector &units, Pred pred) -{ - Vec2i minPos = ltPos; - Vec2i maxPos = rbPos; - - Map.FixSelectionArea(minPos, maxPos); - SelectFixed(minPos, maxPos, units, pred); -} - template CUnit *FindUnit_IfFixed(const Vec2i <Pos, const Vec2i &rbPos, Pred pred) { From afc5d8765bf8a760008c65006a375080d5a09e40 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:30 +0200 Subject: [PATCH 11/13] start skipping frames when we are too slow --- src/video/sdl.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/video/sdl.cpp b/src/video/sdl.cpp index db99a44b2..41b0835f9 100644 --- a/src/video/sdl.cpp +++ b/src/video/sdl.cpp @@ -719,6 +719,8 @@ const EventCallback *GetCallbacks() return Callbacks; } +static int SkipFrameMask = 0; + /** ** Wait for interactive input event for one frame. ** @@ -738,6 +740,27 @@ void WaitEventsOneFrame() Uint32 ticks = SDL_GetTicks(); if (ticks > NextFrameTicks) { // We are too slow :( ++SlowFrameCounter; + if (SlowFrameCounter > FRAMES_PER_SECOND) { + unsigned long pct = (SlowFrameCounter * 100) / (FrameCounter ? FrameCounter : 1); + bool warn = false; + if (pct >= 40) { + warn = (SkipFrameMask > 0b1); + SkipFrameMask = 0b1; + } else if (pct >= 20) { + warn = (SkipFrameMask > 0b11); + SkipFrameMask = 0b11; + } else if (pct >= 10) { + warn = (SkipFrameMask == 0); + SkipFrameMask = 0b111; + } + if (warn) { + fprintf(stdout, "WARNING WARNING WARNING\n" + "Frames %lu, Slow frames %d = %lu%%, starting to skip every %d%s frame.\n", + FrameCounter, SlowFrameCounter, pct, SkipFrameMask + 1, SkipFrameMask == 1 ? "nd" : "th"); + fflush(stdout); + SlowFrameCounter = 0; + } + } } InputMouseTimeout(*GetCallbacks(), ticks); @@ -803,6 +826,9 @@ void RealizeVideoMemory() if (dummyRenderer) { return; } + if (SkipFrameMask && (FrameCounter & SkipFrameMask) == SkipFrameMask) { + return; + } if (NumRects) { //SDL_UpdateWindowSurfaceRects(TheWindow, Rects, NumRects); SDL_UpdateTexture(TheTexture, NULL, TheScreen->pixels, TheScreen->pitch); From 4936fa5d59956627a996c3dcdb3f351b15e46d02 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:32 +0200 Subject: [PATCH 12/13] fix typo --- src/stratagus/mainloop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stratagus/mainloop.cpp b/src/stratagus/mainloop.cpp index b08248863..e42516b82 100644 --- a/src/stratagus/mainloop.cpp +++ b/src/stratagus/mainloop.cpp @@ -451,7 +451,7 @@ void GameMainLoop() if (Parameters::Instance.benchmark) { ticks = SDL_GetTicks() - ticks; double fps = FrameCounter * 1000.0 / ticks; - fprintf(stderr, "BENCHMARK RESULT: %f fps, %f fpc (%ldms for %ldframes in %ldcycles)\n", fps, GameCycle * 1000.0 / ticks, ticks, FrameCounter, GameCycle); + fprintf(stderr, "BENCHMARK RESULT: %f fps, %f cps (%ldms for %ldframes in %ldcycles)\n", fps, GameCycle * 1000.0 / ticks, ticks, FrameCounter, GameCycle); } GameCycle = 0; From 379567fcb034f7f3181c473a2aaf19ebbd85f3fc Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 28 Jun 2022 21:38:35 +0200 Subject: [PATCH 13/13] more frame skipping, to work decently on rpi without accelleration --- src/video/sdl.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/video/sdl.cpp b/src/video/sdl.cpp index 41b0835f9..22abdc14e 100644 --- a/src/video/sdl.cpp +++ b/src/video/sdl.cpp @@ -744,18 +744,18 @@ void WaitEventsOneFrame() unsigned long pct = (SlowFrameCounter * 100) / (FrameCounter ? FrameCounter : 1); bool warn = false; if (pct >= 40) { - warn = (SkipFrameMask > 0b1); - SkipFrameMask = 0b1; + warn = (SkipFrameMask < 0b101); + SkipFrameMask = 0b101; } else if (pct >= 20) { - warn = (SkipFrameMask > 0b11); + warn = (SkipFrameMask < 0b11); SkipFrameMask = 0b11; } else if (pct >= 10) { - warn = (SkipFrameMask == 0); - SkipFrameMask = 0b111; + warn = (SkipFrameMask < 0b1); + SkipFrameMask = 0b1; } if (warn) { fprintf(stdout, "WARNING WARNING WARNING\n" - "Frames %lu, Slow frames %d = %lu%%, starting to skip every %d%s frame.\n", + "Frames %lu, Slow frames %d = %lu%%, starting to render only every %d%s frame.\n", FrameCounter, SlowFrameCounter, pct, SkipFrameMask + 1, SkipFrameMask == 1 ? "nd" : "th"); fflush(stdout); SlowFrameCounter = 0; @@ -826,7 +826,7 @@ void RealizeVideoMemory() if (dummyRenderer) { return; } - if (SkipFrameMask && (FrameCounter & SkipFrameMask) == SkipFrameMask) { + if (FrameCounter & SkipFrameMask) { return; } if (NumRects) {