From 5cf4729ab0790084af0b0a9e9a1062ddefff3401 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Wed, 5 Oct 2022 16:38:16 +0200
Subject: [PATCH] use_dll: dynamically limit the number of concurrently running
 GCC processes to the number of available logical processors

Ref. #41
---
 src/DynareMain.cc |  7 ++++---
 src/ModelTree.cc  | 20 ++++++++++++++++++++
 src/ModelTree.hh  | 19 ++++++++++++++-----
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/src/DynareMain.cc b/src/DynareMain.cc
index aa7677ea..ff03f7ce 100644
--- a/src/DynareMain.cc
+++ b/src/DynareMain.cc
@@ -524,9 +524,10 @@ main(int argc, char **argv)
                            nointeractive, config_file, check_model_changes, minimal_workspace, compute_xrefs,
                            mexext, matlabroot, dynareroot, onlymodel, gui, notime);
 
-  /* Not technically needed since those are std::jthread, but ensures that the
-     preprocessor final message is printed after the end of compilation (and is
-     not printed in case of compilation failure). */
+  /* Ensures that the preprocessor final message is printed after the end of
+     compilation (and is not printed in case of compilation failure); also
+     avoids potential issues with destroying the thread synchronization
+     mechanism too soon. */
   ModelTree::joinMEXCompilationThreads();
 
   cout << "Preprocessing completed." << endl;
diff --git a/src/ModelTree.cc b/src/ModelTree.cc
index 59172a84..f7b0bf0a 100644
--- a/src/ModelTree.cc
+++ b/src/ModelTree.cc
@@ -35,8 +35,12 @@
 
 #include <regex>
 #include <utility>
+#include <algorithm>
 
 vector<jthread> ModelTree::mex_compilation_threads {};
+condition_variable ModelTree::mex_compilation_cv;
+mutex ModelTree::mex_compilation_mut;
+unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)};
 
 void
 ModelTree::copyHelper(const ModelTree &m)
@@ -1758,11 +1762,27 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &funcname
   string cmd_str { cmd.str() };
   mex_compilation_threads.emplace_back([cmd_str]
   {
+    // Wait until a logical processor becomes available
+    unique_lock<mutex> lk {mex_compilation_mut};
+    mex_compilation_cv.wait(lk, []
+    {
+      return mex_compilation_available_processors > 0;
+    });
+    // Signal to other threads that we have grabbed a logical processor
+    mex_compilation_available_processors--;
+    lk.unlock();
+
+    // Effectively compile
     if (system(cmd_str.c_str()))
       {
         cerr << "Compilation failed" << endl;
         exit(EXIT_FAILURE);
       }
+
+    // Signal to other threads that we have freed a logical processor
+    lk.lock();
+    mex_compilation_available_processors++;
+    mex_compilation_cv.notify_one();
   });
 }
 
diff --git a/src/ModelTree.hh b/src/ModelTree.hh
index bc1e7224..c22e4e19 100644
--- a/src/ModelTree.hh
+++ b/src/ModelTree.hh
@@ -30,6 +30,8 @@
 #include <optional>
 #include <cassert>
 #include <thread>
+#include <mutex>
+#include <condition_variable>
 
 #include "DataTree.hh"
 #include "EquationTags.hh"
@@ -336,6 +338,15 @@ private:
   // Stores threads for compiling MEX files in parallel
   static vector<jthread> mex_compilation_threads;
 
+  /* The following three variables implement the synchronization mechanism for
+     limiting the number of concurrent GCC processes.
+     TODO: Replace these three variables with std::counting_semaphore (from
+     C++20) when upgrading to GCC 11 (and adjust included headers
+     correspondingly). */
+  static condition_variable mex_compilation_cv;
+  static mutex mex_compilation_mut;
+  static unsigned int mex_compilation_available_processors;
+
   /* Compute a pseudo-Jacobian whose all elements are either zero or one,
      depending on whether the variable symbolically appears in the equation */
   jacob_map_t computeSymbolicJacobian() const;
@@ -485,11 +496,9 @@ private:
   static string findGccOnMacos(const string &mexext);
 #endif
   /* Compiles a MEX file. The compilation is done in a separate asynchronous
-     thread, so the call to this function is not blocking.
-     TODO: further improve the function so that when a MEX has multiple source
-     files, those get compiled in separate threads; this could however
-     require implementing a scheduler, so as to not run more threads than
-     there are logical cores. */
+     thread, so the call to this function is not blocking. The number of
+     concurrently running GCC processes is dynamically limited to the number of
+     available logical processors. */
   void compileMEX(const filesystem::path &output_dir, const string &funcname, const string &mexext, const vector<filesystem::path> &src_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot) const;
 
 public:
-- 
GitLab