Performance Improvement: Use a cache which caches result for getFacePositions.
authorLoic Blot <loic.blot@unix-experience.fr>
Sun, 15 Feb 2015 16:30:38 +0000 (17:30 +0100)
committerLoic Blot <loic.blot@unix-experience.fr>
Mon, 16 Feb 2015 10:27:44 +0000 (11:27 +0100)
This greatly reduce the number of std::list generated by caching the result, which is always constant for each radius selected.
In the callgrind map, you will see original:
  * 3.3M calls to std::list for 9700 calls to getFacePositions
In the modified version, you will see:
  * 3.3K calls to std::list for 6900 call to getFacePositions
Callgrind map is here: #2321

it's a huge performance improvement to l_find_node_near

src/clientiface.cpp
src/script/lua_api/l_env.cpp
src/util/numeric.cpp
src/util/numeric.h

index 9b952e36a31cb656066352817a7562c79d97e7a7..6180cf5da032a4f1280b21bef3a753b9cbe23d78 100644 (file)
@@ -59,7 +59,7 @@ void RemoteClient::ResendBlockIfOnWire(v3s16 p)
        }
 }
 
-void RemoteClient::GetNextBlocks(
+void RemoteClient::GetNextBlocks (
                ServerEnvironment *env,
                EmergeManager * emerge,
                float dtime,
@@ -182,18 +182,15 @@ void RemoteClient::GetNextBlocks(
        //bool queue_is_full = false;
 
        s16 d;
-       for(d = d_start; d <= d_max; d++)
-       {
+       for(d = d_start; d <= d_max; d++) {
                /*
                        Get the border/face dot coordinates of a "d-radiused"
                        box
                */
-               std::list<v3s16> list;
-               getFacePositions(list, d);
+               std::vector<v3s16> list = FacePositionCache::getFacePositions(d);
 
-               std::list<v3s16>::iterator li;
-               for(li=list.begin(); li!=list.end(); ++li)
-               {
+               std::vector<v3s16>::iterator li;
+               for(li = list.begin(); li != list.end(); ++li) {
                        v3s16 p = *li + center;
 
                        /*
index cd5d253ac50716071d856b2ff58e45716a107345..2445803d750495859faa35cc8a8dac04e40da725 100644 (file)
@@ -530,9 +530,8 @@ int ModApiEnvMod::l_find_node_near(lua_State *L)
        }
 
        for(int d=1; d<=radius; d++){
-               std::list<v3s16> list;
-               getFacePositions(list, d);
-               for(std::list<v3s16>::iterator i = list.begin();
+               std::vector<v3s16> list = FacePositionCache::getFacePositions(d);
+               for(std::vector<v3s16>::iterator i = list.begin();
                                i != list.end(); ++i){
                        v3s16 p = pos + (*i);
                        content_t c = env->getMap().getNodeNoEx(p).getContent();
index 6173515ee52489d45a7933217176233acb395c14..19f927134b5e33404a18f173b6ee174c66d60cb5 100644 (file)
@@ -20,79 +20,84 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #include "numeric.h"
 #include "mathconstants.h"
 
-#include "../log.h"
+#include "log.h"
 #include "../constants.h" // BS, MAP_BLOCKSIZE
 #include <string.h>
 #include <iostream>
 
+std::map<u16, std::vector<v3s16> > FacePositionCache::m_cache;
 // Calculate the borders of a "d-radius" cube
-void getFacePositions(std::list<v3s16> &list, u16 d)
+std::vector<v3s16> FacePositionCache::getFacePositions(u16 d)
 {
-       if(d == 0)
-       {
-               list.push_back(v3s16(0,0,0));
+       if (m_cache.find(d) != m_cache.end())
+               return m_cache[d];
+
+       generateFacePosition(d);
+       return m_cache[d];
+
+}
+
+void FacePositionCache::generateFacePosition(u16 d)
+{
+       m_cache[d] = std::vector<v3s16>();
+       if(d == 0) {
+               m_cache[d].push_back(v3s16(0,0,0));
                return;
        }
-       if(d == 1)
-       {
+       if(d == 1) {
                /*
                        This is an optimized sequence of coordinates.
                */
-               list.push_back(v3s16( 0, 1, 0)); // top
-               list.push_back(v3s16( 0, 0, 1)); // back
-               list.push_back(v3s16(-1, 0, 0)); // left
-               list.push_back(v3s16( 1, 0, 0)); // right
-               list.push_back(v3s16( 0, 0,-1)); // front
-               list.push_back(v3s16( 0,-1, 0)); // bottom
+               m_cache[d].push_back(v3s16( 0, 1, 0)); // top
+               m_cache[d].push_back(v3s16( 0, 0, 1)); // back
+               m_cache[d].push_back(v3s16(-1, 0, 0)); // left
+               m_cache[d].push_back(v3s16( 1, 0, 0)); // right
+               m_cache[d].push_back(v3s16( 0, 0,-1)); // front
+               m_cache[d].push_back(v3s16( 0,-1, 0)); // bottom
                // 6
-               list.push_back(v3s16(-1, 0, 1)); // back left
-               list.push_back(v3s16( 1, 0, 1)); // back right
-               list.push_back(v3s16(-1, 0,-1)); // front left
-               list.push_back(v3s16( 1, 0,-1)); // front right
-               list.push_back(v3s16(-1,-1, 0)); // bottom left
-               list.push_back(v3s16( 1,-1, 0)); // bottom right
-               list.push_back(v3s16( 0,-1, 1)); // bottom back
-               list.push_back(v3s16( 0,-1,-1)); // bottom front
-               list.push_back(v3s16(-1, 1, 0)); // top left
-               list.push_back(v3s16( 1, 1, 0)); // top right
-               list.push_back(v3s16( 0, 1, 1)); // top back
-               list.push_back(v3s16( 0, 1,-1)); // top front
+               m_cache[d].push_back(v3s16(-1, 0, 1)); // back left
+               m_cache[d].push_back(v3s16( 1, 0, 1)); // back right
+               m_cache[d].push_back(v3s16(-1, 0,-1)); // front left
+               m_cache[d].push_back(v3s16( 1, 0,-1)); // front right
+               m_cache[d].push_back(v3s16(-1,-1, 0)); // bottom left
+               m_cache[d].push_back(v3s16( 1,-1, 0)); // bottom right
+               m_cache[d].push_back(v3s16( 0,-1, 1)); // bottom back
+               m_cache[d].push_back(v3s16( 0,-1,-1)); // bottom front
+               m_cache[d].push_back(v3s16(-1, 1, 0)); // top left
+               m_cache[d].push_back(v3s16( 1, 1, 0)); // top right
+               m_cache[d].push_back(v3s16( 0, 1, 1)); // top back
+               m_cache[d].push_back(v3s16( 0, 1,-1)); // top front
                // 18
-               list.push_back(v3s16(-1, 1, 1)); // top back-left
-               list.push_back(v3s16( 1, 1, 1)); // top back-right
-               list.push_back(v3s16(-1, 1,-1)); // top front-left
-               list.push_back(v3s16( 1, 1,-1)); // top front-right
-               list.push_back(v3s16(-1,-1, 1)); // bottom back-left
-               list.push_back(v3s16( 1,-1, 1)); // bottom back-right
-               list.push_back(v3s16(-1,-1,-1)); // bottom front-left
-               list.push_back(v3s16( 1,-1,-1)); // bottom front-right
+               m_cache[d].push_back(v3s16(-1, 1, 1)); // top back-left
+               m_cache[d].push_back(v3s16( 1, 1, 1)); // top back-right
+               m_cache[d].push_back(v3s16(-1, 1,-1)); // top front-left
+               m_cache[d].push_back(v3s16( 1, 1,-1)); // top front-right
+               m_cache[d].push_back(v3s16(-1,-1, 1)); // bottom back-left
+               m_cache[d].push_back(v3s16( 1,-1, 1)); // bottom back-right
+               m_cache[d].push_back(v3s16(-1,-1,-1)); // bottom front-left
+               m_cache[d].push_back(v3s16( 1,-1,-1)); // bottom front-right
                // 26
                return;
        }
 
        // Take blocks in all sides, starting from y=0 and going +-y
-       for(s16 y=0; y<=d-1; y++)
-       {
+       for(s16 y=0; y<=d-1; y++) {
                // Left and right side, including borders
-               for(s16 z=-d; z<=d; z++)
-               {
-                       list.push_back(v3s16(d,y,z));
-                       list.push_back(v3s16(-d,y,z));
-                       if(y != 0)
-                       {
-                               list.push_back(v3s16(d,-y,z));
-                               list.push_back(v3s16(-d,-y,z));
+               for(s16 z=-d; z<=d; z++) {
+                       m_cache[d].push_back(v3s16(d,y,z));
+                       m_cache[d].push_back(v3s16(-d,y,z));
+                       if(y != 0) {
+                               m_cache[d].push_back(v3s16(d,-y,z));
+                               m_cache[d].push_back(v3s16(-d,-y,z));
                        }
                }
                // Back and front side, excluding borders
-               for(s16 x=-d+1; x<=d-1; x++)
-               {
-                       list.push_back(v3s16(x,y,d));
-                       list.push_back(v3s16(x,y,-d));
-                       if(y != 0)
-                       {
-                               list.push_back(v3s16(x,-y,d));
-                               list.push_back(v3s16(x,-y,-d));
+               for(s16 x=-d+1; x<=d-1; x++) {
+                       m_cache[d].push_back(v3s16(x,y,d));
+                       m_cache[d].push_back(v3s16(x,y,-d));
+                       if(y != 0) {
+                               m_cache[d].push_back(v3s16(x,-y,d));
+                               m_cache[d].push_back(v3s16(x,-y,-d));
                        }
                }
        }
@@ -100,10 +105,9 @@ void getFacePositions(std::list<v3s16> &list, u16 d)
        // Take the bottom and top face with borders
        // -d<x<d, y=+-d, -d<z<d
        for(s16 x=-d; x<=d; x++)
-       for(s16 z=-d; z<=d; z++)
-       {
-               list.push_back(v3s16(x,-d,z));
-               list.push_back(v3s16(x,d,z));
+       for(s16 z=-d; z<=d; z++) {
+               m_cache[d].push_back(v3s16(x,-d,z));
+               m_cache[d].push_back(v3s16(x,d,z));
        }
 }
 
index db1eb003ed70fd12bb5f94e149e8c54480801980..7fc3d216718107095a4dc75a0d0c6c813e8d07b5 100644 (file)
@@ -25,10 +25,23 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #include "../irr_v3d.h"
 #include "../irr_aabb3d.h"
 #include <list>
+#include <map>
+#include <vector>
 #include <algorithm>
 
-// Calculate the borders of a "d-radius" cube
-void getFacePositions(std::list<v3s16> &list, u16 d);
+
+/*
+ * This class permits to cache getFacePosition call results
+ * This reduces CPU usage and vector calls
+ */
+class FacePositionCache
+{
+public:
+       static std::vector<v3s16> getFacePositions(u16 d);
+private:
+       static void generateFacePosition(u16 d);
+       static std::map<u16, std::vector<v3s16> > m_cache;
+};
 
 class IndentationRaiser
 {