Dynare++: by default, use as many threads as there are logical CPUs

The former default was 2 threads, since Dynare++ was written at a time when
hyper-threading was being generalized (but multiple cores were not).
parent edda6e30
Pipeline #686 failed with stages
in 76 minutes and 53 seconds
......@@ -1033,12 +1033,8 @@ default.
threads. Complex evaluations of Faa Di Bruno formulas, simulations and
numerical integration can be parallelized, Dynare++ exploits this
advantage. You have to have a hardware support for this, otherwise
there is no gain from the parallelization. As a rule of thumb, set the
number of threads to the number of processors. An exception is a
machine with Pentium 4 with Hyper Threading (abbreviated by HT). This
processor can run two threads concurrently. The same applies to
Dual-Core processors. Since these processors are present in most new
PC desktops/laptops, the default is 2.
there is no gain from the parallelization. The default value is the number of
logical processors present on the machine.
\item[\desc{\tt --ss-tol \it float}] This sets the tolerance of the
non-linear solver of deterministic steady state to {\it float}. It is
......
......@@ -23,8 +23,6 @@
#include <memory>
#include <cstdlib>
const int num_threads = 2; // does nothing if DEBUG defined
// evaluates unfolded (Dx)^k power, where x is a vector, D is a
// Cholesky factor (lower triangular)
class MomentFunction : public VectorFunction
......@@ -252,7 +250,7 @@ TestRunnable::smolyak_normal_moments(const GeneralMatrix &m, int imom, int level
WallTimer tim("\tSmolyak quadrature time: ");
GaussHermite gs;
SmolyakQuadrature quad(dim, level, gs);
quad.integrate(func, level, num_threads, smol_out);
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, smol_out);
std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl;
}
......@@ -281,7 +279,7 @@ TestRunnable::product_normal_moments(const GeneralMatrix &m, int imom, int level
WallTimer tim("\tProduct quadrature time: ");
GaussHermite gs;
ProductQuadrature quad(dim, gs);
quad.integrate(func, level, num_threads, prod_out);
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, prod_out);
std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl;
}
......@@ -309,7 +307,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res
{
WallTimer tim("\tSmolyak quadrature time: ");
SmolyakQuadrature quad(func.indim(), level, glq);
quad.integrate(func, level, num_threads, out);
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out);
out.add(-1.0, res);
smol_error = out.getMax();
std::cout << "\tNumber of Smolyak evaluations: " << quad.numEvals(level) << std::endl;
......@@ -318,7 +316,7 @@ TestRunnable::smolyak_product_cube(const VectorFunction &func, const Vector &res
{
WallTimer tim("\tProduct quadrature time: ");
ProductQuadrature quad(func.indim(), glq);
quad.integrate(func, level, num_threads, out);
quad.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, out);
out.add(-1.0, res);
prod_error = out.getMax();
std::cout << "\tNumber of product evaluations: " << quad.numEvals(level) << std::endl;
......@@ -338,7 +336,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
WarnockPerScheme wps;
QMCarloCubeQuadrature qmc(func.indim(), level, wps);
// qmc.savePoints("warnock.txt", level);
qmc.integrate(func, level, num_threads, r);
qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
error1 = std::max(res - r[0], r[0] - res);
std::cout << "\tQuasi-Monte Carlo (Warnock scrambling) error: " << std::setw(16) << std::setprecision(12) << error1 << std::endl;
}
......@@ -348,7 +346,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
ReversePerScheme rps;
QMCarloCubeQuadrature qmc(func.indim(), level, rps);
// qmc.savePoints("reverse.txt", level);
qmc.integrate(func, level, num_threads, r);
qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
error2 = std::max(res - r[0], r[0] - res);
std::cout << "\tQuasi-Monte Carlo (reverse scrambling) error: " << std::setw(16) << std::setprecision(12) << error2 << std::endl;
}
......@@ -358,7 +356,7 @@ TestRunnable::qmc_cube(const VectorFunction &func, double res, double tol, int l
IdentityPerScheme ips;
QMCarloCubeQuadrature qmc(func.indim(), level, ips);
// qmc.savePoints("identity.txt", level);
qmc.integrate(func, level, num_threads, r);
qmc.integrate(func, level, sthread::detach_thread_group::max_parallel_threads, r);
error3 = std::max(res - r[0], r[0] - res);
std::cout << "\tQuasi-Monte Carlo (no scrambling) error: " << std::setw(16) << std::setprecision(12) << error3 << std::endl;
}
......@@ -498,7 +496,6 @@ main()
nvmax = test->nvar;
}
tls.init(dmax, nvmax); // initialize library
sthread::detach_thread_group::max_parallel_threads = num_threads;
// launch the tests
int success = 0;
......
......@@ -26,7 +26,7 @@ const char *help_str
" --prefix <string> prefix of variables in Mat-4 file [\"dyn\"]\n"
" --seed <num> random number generator seed [934098]\n"
" --order <num> order of approximation [no default]\n"
" --threads <num> number of max parallel threads [2]\n"
" --threads <num> number of max parallel threads [nb. of logical CPUs]\n"
" --ss-tol <num> steady state calcs tolerance [1.e-13]\n"
" --check pesPES check model residuals [no checks]\n"
" lower/upper case switches off/on\n"
......
......@@ -5,9 +5,9 @@
namespace sthread
{
/* We set the default value for |max_parallel_threads| to 2, i.e.
uniprocessor machine with hyper-threading */
int detach_thread_group::max_parallel_threads = 2;
/* We set the default value for |max_parallel_threads| to the number of
logical CPUs */
int detach_thread_group::max_parallel_threads = std::thread::hardware_concurrency();
/* We cycle through all threads in the group, and in each cycle we wait
for the change in the |counter|. If the counter indicates less than
......
......@@ -220,8 +220,6 @@ extern "C" {
const int nSteps = 0; // Dynare++ solving steps, for time being default to 0 = deterministic steady state
const double sstol = 1.e-13; //NL solver tolerance from
sthread::detach_thread_group::max_parallel_threads = 2; //params.num_threads;
try
{
// make journal name and journal
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment