diff --git a/src/DynareMain.cc b/src/DynareMain.cc index ff03f7cefe98178189b66029d99dfec143ee8085..5abdb522e84b542a310f0d80f4ef5f9e9f6fafe9 100644 --- a/src/DynareMain.cc +++ b/src/DynareMain.cc @@ -23,6 +23,8 @@ #include <vector> #include <string> #include <regex> +#include <thread> +#include <algorithm> #include <cstdlib> @@ -494,6 +496,9 @@ main(int argc, char **argv) if (use_dll) mod_file->use_dll = true; + if (mod_file->use_dll) + ModelTree::initializeMEXCompilationWorkers(max(jthread::hardware_concurrency(), 1U)); + if (json == JsonOutputPointType::parsing) mod_file->writeJsonOutput(basename, json, json_output_mode, onlyjson); @@ -528,7 +533,8 @@ main(int argc, char **argv) compilation (and is not printed in case of compilation failure); also avoids potential issues with destroying the thread synchronization mechanism too soon. */ - ModelTree::joinMEXCompilationThreads(); + if (mod_file->use_dll) + ModelTree::terminateMEXCompilationWorkers(); cout << "Preprocessing completed." << endl; return EXIT_SUCCESS; diff --git a/src/ModelTree.cc b/src/ModelTree.cc index c2eb674cf8e07ea9d5060f1c090af2917305944a..e9cd191fe12f6b6a52f4cbd280a52b83d63ac345 100644 --- a/src/ModelTree.cc +++ b/src/ModelTree.cc @@ -37,10 +37,10 @@ #include <utility> #include <algorithm> -vector<jthread> ModelTree::mex_compilation_threads {}; +vector<jthread> ModelTree::mex_compilation_workers {}; condition_variable ModelTree::mex_compilation_cv; mutex ModelTree::mex_compilation_mut; -unsigned int ModelTree::mex_compilation_available_processors {max(jthread::hardware_concurrency(), 1U)}; +vector<tuple<filesystem::path, set<filesystem::path>, string>> ModelTree::mex_compilation_queue; set<filesystem::path> ModelTree::mex_compilation_done; void @@ -1626,6 +1626,8 @@ ModelTree::findGccOnMacos(const string &mexext) filesystem::path ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link) const { + assert(!mex_compilation_workers.empty()); + const string opt_flags = "-O3 -g0 --param ira-max-conflict-table-size=1 -fno-forward-propagate -fno-gcse -fno-dce -fno-dse -fno-tree-fre -fno-tree-pre -fno-tree-cselim -fno-tree-dse -fno-tree-dce -fno-tree-pta -fno-gcse-after-reload"; filesystem::path compiler; @@ -1769,37 +1771,10 @@ ModelTree::compileMEX(const filesystem::path &output_dir, const string &output_b return p.extension() == ".o"; }); - // std::ostringstream is not copyable, so capture a std::string - string cmd_str { cmd.str() }; - mex_compilation_threads.emplace_back([cmd_str, output_filename, prerequisites] - { - /* Wait until a logical processor becomes available and all prerequisites - are done */ - unique_lock<mutex> lk {mex_compilation_mut}; - mex_compilation_cv.wait(lk, [prerequisites] - { - return mex_compilation_available_processors > 0 && - includes(mex_compilation_done.begin(), mex_compilation_done.end(), - prerequisites.begin(), prerequisites.end()); - }); - // Signal to other threads that we have grabbed a logical processor - mex_compilation_available_processors--; - lk.unlock(); - - // Effectively compile - if (system(cmd_str.c_str())) - { - cerr << "Compilation failed" << endl; - exit(EXIT_FAILURE); - } - - /* Signal to other threads that we have freed a logical processor and - completed a possible prerequisite */ - lk.lock(); - mex_compilation_available_processors++; - mex_compilation_done.insert(output_filename); - mex_compilation_cv.notify_all(); - }); + unique_lock<mutex> lk {mex_compilation_mut}; + mex_compilation_queue.emplace_back(output_filename, prerequisites, cmd.str()); + lk.unlock(); + mex_compilation_cv.notify_one(); return output_filename; } @@ -1906,9 +1881,74 @@ ModelTree::writeBlockBytecodeAdditionalDerivatives([[maybe_unused]] BytecodeWrit } void -ModelTree::joinMEXCompilationThreads() +ModelTree::initializeMEXCompilationWorkers(int numworkers) +{ + assert(numworkers > 0); + assert(mex_compilation_workers.empty()); + + cout << "Spawning " << numworkers << " threads for compiling MEX files." << endl; + + for (int i {0}; i < numworkers; i++) + mex_compilation_workers.emplace_back([](stop_token stoken) + { + unique_lock<mutex> lk {mex_compilation_mut}; + + look_for_job: + for (auto it {mex_compilation_queue.begin()}; it != mex_compilation_queue.end(); ++it) + { + /* The following is a copy and not a reference, because we need it + after erasing it, and also after releasing the lock (at which + point the mex_compilation_queue may be modified by others). */ + const auto [output, prerequisites, cmd] {*it}; + if (includes(mex_compilation_done.begin(), mex_compilation_done.end(), + prerequisites.begin(), prerequisites.end())) + { + mex_compilation_queue.erase(it); + lk.unlock(); // After that point, the iterator may become invalid + if (system(cmd.c_str())) + { + cerr << "Compilation failed" << endl; + exit(EXIT_FAILURE); + } + lk.lock(); + mex_compilation_done.insert(output); + /* The object just compiled may be a prerequisite for several + other objects, so notify all waiting workers. Also needed to + notify the main thread when in + ModelTree::terminateMEXCompilationWorkers(). */ + mex_compilation_cv.notify_all(); + goto look_for_job; + } + } + + if (stoken.stop_requested()) + return; + + mex_compilation_cv.wait(lk); + + goto look_for_job; + }); +} + +void +ModelTree::terminateMEXCompilationWorkers() { - for (auto &it : mex_compilation_threads) + // Wait until the queue is empty + unique_lock<mutex> lk {mex_compilation_mut}; + mex_compilation_cv.wait(lk, [] { return mex_compilation_queue.empty(); }); + + /* Request stop while still holding the lock, so we are sure that workers are + either compiling or waiting right now. Otherwise there could theoretically + be a race condition where the condition variable is notified just after + the thread has checked for its stoken, and just before it begins waiting; + this would be deadlock. */ + for (auto &it : mex_compilation_workers) + it.request_stop(); + + lk.unlock(); + + mex_compilation_cv.notify_all(); + for (auto &it : mex_compilation_workers) it.join(); } diff --git a/src/ModelTree.hh b/src/ModelTree.hh index 924257816453690bfe797ab5e0ba746b8ce40679..1f3bfe737a420e679de1e684d86e0e1f7b045e0c 100644 --- a/src/ModelTree.hh +++ b/src/ModelTree.hh @@ -339,16 +339,19 @@ private: /*! Maps endogenous type specific IDs to equation numbers */ vector<int> endo2eq; - // Stores threads for compiling MEX files in parallel - static vector<jthread> mex_compilation_threads; + // Stores workers used for compiling MEX files in parallel + static vector<jthread> mex_compilation_workers; /* The following variables implement the thread synchronization mechanism for limiting the number of concurrent GCC processes and tracking dependencies between object files. */ static condition_variable mex_compilation_cv; static mutex mex_compilation_mut; - static unsigned int mex_compilation_available_processors; - static set<filesystem::path> mex_compilation_done; // Object/MEX files already compiled + /* Object/MEX files waiting to be compiled (with their prerequisites as 2nd + element and compilation command as the 3rd element) */ + static vector<tuple<filesystem::path, set<filesystem::path>, string>> mex_compilation_queue; + // Object/MEX files already compiled + static set<filesystem::path> mex_compilation_done; /* Compute a pseudo-Jacobian whose all elements are either zero or one, depending on whether the variable symbolically appears in the equation */ @@ -499,12 +502,11 @@ private: static string findGccOnMacos(const string &mexext); #endif /* Compiles a MEX file (if link=true) or an object file to be linked later - into a MEX file (if link=false). The compilation is done in a separate - asynchronous thread, so the call to this function is not blocking. The - number of concurrently running GCC processes is dynamically limited to the - number of available logical processors. The dependency of a linked MEX - file upon intermediary objects is nicely handled. Returns the name of the - output file (to be reused later as input file if link=false). */ + into a MEX file (if link=false). The compilation is done in separate + worker threads working in parallel, so the call to this function is not + blocking. The dependency of a linked MEX file upon intermediary objects is + nicely handled. Returns the name of the output file (to be reused later as + input file if link=false). */ filesystem::path compileMEX(const filesystem::path &output_dir, const string &output_basename, const string &mexext, const vector<filesystem::path> &input_files, const filesystem::path &matlabroot, const filesystem::path &dynareroot, bool link = true) const; public: @@ -552,8 +554,12 @@ public: If no such equation can be found, throws an ExprNode::MatchFailureExpression */ expr_t getRHSFromLHS(expr_t lhs) const; - // Calls join() on all MEX compilation threads - static void joinMEXCompilationThreads(); + // Initialize the MEX compilation workers + static void initializeMEXCompilationWorkers(int numworkers); + + /* Terminates all MEX compilation workers (after they have emptied the + waiting queue) */ + static void terminateMEXCompilationWorkers(); //! Returns all the equation tags associated to an equation map<string, string>