Verified Commit 7c39b12b authored by Sébastien Villemot's avatar Sébastien Villemot
Browse files

Always compile MEX files with OpenMP (when they can take advantage of it)

As a consequence, remove the --enable-openmp option of the configure script.
parent 82cef48e
Pipeline #1453 passed with stages
in 103 minutes and 9 seconds
...@@ -127,11 +127,8 @@ To remove debugging information for MATLAB MEX functions, the analogous call wou ...@@ -127,11 +127,8 @@ To remove debugging information for MATLAB MEX functions, the analogous call wou
./configure MATLAB_MEX_CFLAGS="-O3" MATLAB_MEX_CXXFLAGS="-O3" ./configure MATLAB_MEX_CFLAGS="-O3" MATLAB_MEX_CXXFLAGS="-O3"
``` ```
If you want to give a try to the parallelized versions of some mex files (`A_times_B_kronecker_C` and `sparse_hessian_times_B_kronecker_C` used to get the reduced form of the second order approximation of the model) you can add the `--enable-openmp` flag, for instance: If the configuration goes well, the script will tell you which components are
``` correctly configured and will be built.
./configure --with-matlab=/usr/local/MATLAB/R2019a MATLAB_VERSION=9.6 --enable-openmp
```
If the configuration goes well, the script will tell you which components are correctly configured and will be built.
Note that it is possible that some MEX files cannot be compiled, due to missing Note that it is possible that some MEX files cannot be compiled, due to missing
build dependencies. If you find no way of installing the missing dependencies, build dependencies. If you find no way of installing the missing dependencies,
......
function set_dynare_threads(mexname,n) function set_dynare_threads(mexname,n)
% This function sets the number of threads used by some MEX files when compiled % This function sets the number of threads used by some MEX files using
% with OpenMP support (i.e with --enable-openmp is given to configure) or any % OpenMP features or any other parallel library.
% other parallel library.
% %
% INPUTS % INPUTS
% o mexname [string] Name of the mex file. % o mexname [string] Name of the mex file.
...@@ -10,7 +9,7 @@ function set_dynare_threads(mexname,n) ...@@ -10,7 +9,7 @@ function set_dynare_threads(mexname,n)
% OUTPUTS % OUTPUTS
% none. % none.
% Copyright (C) 2009-2012 Dynare Team % Copyright (C) 2009-2019 Dynare Team
% %
% This file is part of Dynare. % This file is part of Dynare.
% %
......
...@@ -3,6 +3,7 @@ mex_PROGRAMS = block_kalman_filter ...@@ -3,6 +3,7 @@ mex_PROGRAMS = block_kalman_filter
TOPDIR = $(top_srcdir)/../../sources/block_kalman_filter TOPDIR = $(top_srcdir)/../../sources/block_kalman_filter
block_kalman_filter_CPPFLAGS = $(AM_CPPFLAGS) -I$(TOPDIR) block_kalman_filter_CPPFLAGS = $(AM_CPPFLAGS) -I$(TOPDIR)
block_kalman_filter_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
nodist_block_kalman_filter_SOURCES = block_kalman_filter.cc nodist_block_kalman_filter_SOURCES = block_kalman_filter.cc
......
...@@ -3,6 +3,8 @@ mex_PROGRAMS = sparse_hessian_times_B_kronecker_C A_times_B_kronecker_C ...@@ -3,6 +3,8 @@ mex_PROGRAMS = sparse_hessian_times_B_kronecker_C A_times_B_kronecker_C
nodist_sparse_hessian_times_B_kronecker_C_SOURCES = sparse_hessian_times_B_kronecker_C.cc nodist_sparse_hessian_times_B_kronecker_C_SOURCES = sparse_hessian_times_B_kronecker_C.cc
nodist_A_times_B_kronecker_C_SOURCES = A_times_B_kronecker_C.cc nodist_A_times_B_kronecker_C_SOURCES = A_times_B_kronecker_C.cc
sparse_hessian_times_B_kronecker_C_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
BUILT_SOURCES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES) BUILT_SOURCES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES)
CLEANFILES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES) CLEANFILES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES)
......
...@@ -2,6 +2,8 @@ mex_PROGRAMS = local_state_space_iteration_2 ...@@ -2,6 +2,8 @@ mex_PROGRAMS = local_state_space_iteration_2
nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES)
CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES)
......
...@@ -129,14 +129,6 @@ else ...@@ -129,14 +129,6 @@ else
BUILD_MS_SBVAR_MEX_MATLAB="no (missing GSL)" BUILD_MS_SBVAR_MEX_MATLAB="no (missing GSL)"
fi fi
AC_ARG_ENABLE([openmp], AS_HELP_STRING([--enable-openmp], [use OpenMP for parallelization of some MEX files]), [
if test "$enable_openmp" = yes; then
CPPFLAGS="$CPPFLAGS -DUSE_OMP"
CFLAGS="$CFLAGS -fopenmp"
CXXFLAGS="$CXXFLAGS -fopenmp"
fi
])
AC_ARG_WITH([m2html], AS_HELP_STRING([--with-m2html=DIR], [specify installation directory of M2HTML]), [ AC_ARG_WITH([m2html], AS_HELP_STRING([--with-m2html=DIR], [specify installation directory of M2HTML]), [
M2HTML=$withval M2HTML=$withval
BUILD_M2HTML=yes BUILD_M2HTML=yes
......
...@@ -103,14 +103,6 @@ else ...@@ -103,14 +103,6 @@ else
BUILD_MS_SBVAR_MEX_OCTAVE="no (missing GSL or MatIO library)" BUILD_MS_SBVAR_MEX_OCTAVE="no (missing GSL or MatIO library)"
fi fi
AC_ARG_ENABLE([openmp], AS_HELP_STRING([--enable-openmp], [use OpenMP for parallelization of some MEX files]), [
if test "x$enable_openmp" = "xyes"; then
CPPFLAGS="$CPPFLAGS -DUSE_OMP"
CFLAGS="$CFLAGS -fopenmp"
CXXFLAGS="$CXXFLAGS -fopenmp"
fi
])
AC_MSG_NOTICE([ AC_MSG_NOTICE([
Dynare is now configured for building the following components... Dynare is now configured for building the following components...
......
...@@ -3,6 +3,7 @@ mex_PROGRAMS = qmc_sequence ...@@ -3,6 +3,7 @@ mex_PROGRAMS = qmc_sequence
TOPDIR = $(top_srcdir)/../../sources/sobol TOPDIR = $(top_srcdir)/../../sources/sobol
qmc_sequence_CPPFLAGS = $(AM_CPPFLAGS) -I$(TOPDIR) qmc_sequence_CPPFLAGS = $(AM_CPPFLAGS) -I$(TOPDIR)
qmc_sequence_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
nodist_qmc_sequence_SOURCES = qmc_sequence.cc nodist_qmc_sequence_SOURCES = qmc_sequence.cc
......
...@@ -19,9 +19,8 @@ ...@@ -19,9 +19,8 @@
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#ifdef USE_OMP #include <omp.h>
# include <omp.h>
#endif
#include "block_kalman_filter.hh" #include "block_kalman_filter.hh"
#define BLAS #define BLAS
...@@ -372,7 +371,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -372,7 +371,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
//v = Y(:,t) - a(mf) //v = Y(:,t) - a(mf)
int i_i = 0; int i_i = 0;
//#pragma omp parallel for shared(v, i_i, d_index) num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) //#pragma omp parallel for shared(v, i_i, d_index)
for (auto i = d_index.begin(); i != d_index.end(); i++) for (auto i = d_index.begin(); i != d_index.end(); i++)
{ {
//mexPrintf("i_i=%d, omp_get_max_threads()=%d\n",i_i,omp_get_max_threads()); //mexPrintf("i_i=%d, omp_get_max_threads()=%d\n",i_i,omp_get_max_threads());
...@@ -383,7 +382,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -383,7 +382,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
//F = P(mf,mf) + H; //F = P(mf,mf) + H;
i_i = 0; i_i = 0;
if (H_size == 1) if (H_size == 1)
//#pragma omp parallel for shared(iF, F, i_i) num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) //#pragma omp parallel for shared(iF, F, i_i)
for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++) for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++)
{ {
int j_j = 0; int j_j = 0;
...@@ -391,7 +390,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -391,7 +390,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
iF[i_i + j_j * size_d_index] = F[i_i + j_j * size_d_index] = P[mf[*i] + mf[*j] * n] + H[0]; iF[i_i + j_j * size_d_index] = F[i_i + j_j * size_d_index] = P[mf[*i] + mf[*j] * n] + H[0];
} }
else else
//#pragma omp parallel for shared(iF, F, P, H, mf, i_i) num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) //#pragma omp parallel for shared(iF, F, P, H, mf, i_i)
for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++) for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++)
{ {
int j_j = 0; int j_j = 0;
...@@ -502,9 +501,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -502,9 +501,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
mexPrintf("dgetri failure with error %d\n", static_cast<int>(info)); mexPrintf("dgetri failure with error %d\n", static_cast<int>(info));
//lik(t) = log(dF)+transpose(v)*iF*v; //lik(t) = log(dF)+transpose(v)*iF*v;
#ifdef USE_OMP #pragma omp parallel for shared(v_pp)
# pragma omp parallel for shared(v_pp) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
#endif
for (int i = 0; i < size_d_index; i++) for (int i = 0; i < size_d_index; i++)
{ {
double res = 0.0; double res = 0.0;
...@@ -522,9 +519,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -522,9 +519,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
if (missing_observations) if (missing_observations)
//K = P(:,mf)*iF; //K = P(:,mf)*iF;
#ifdef USE_OMP #pragma omp parallel for shared(P_mf)
# pragma omp parallel for shared(P_mf) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
#endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
int j_j = 0; int j_j = 0;
...@@ -538,9 +533,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -538,9 +533,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
for (int j = 0; j < pp; j++) for (int j = 0; j < pp; j++)
P_mf[i + j * n] = P[i + mf[j] * n]; P_mf[i + j * n] = P[i + mf[j] * n];
#ifdef USE_OMP #pragma omp parallel for shared(K)
# pragma omp parallel for shared(K) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
#endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
for (int j = 0; j < size_d_index; j++) for (int j = 0; j < size_d_index; j++)
{ {
...@@ -552,9 +545,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -552,9 +545,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
} }
//a = T*(a+K*v); //a = T*(a+K*v);
#ifdef USE_OMP #pragma omp parallel for shared(v_n)
# pragma omp parallel for shared(v_n) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
#endif
for (int i = pure_obs; i < n; i++) for (int i = pure_obs; i < n; i++)
{ {
double res = 0.0; double res = 0.0;
...@@ -563,9 +554,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -563,9 +554,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
v_n[i] = res + a[i]; v_n[i] = res + a[i];
} }
#ifdef USE_OMP #pragma omp parallel for shared(a)
# pragma omp parallel for shared(a) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
#endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
double res = 0.0; double res = 0.0;
...@@ -578,24 +567,20 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -578,24 +567,20 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
{ {
//P = T*(P-K*P(mf,:))*transpose(T)+QQ; //P = T*(P-K*P(mf,:))*transpose(T)+QQ;
int i_i = 0; int i_i = 0;
//#pragma omp parallel for shared(P_mf) num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) //#pragma omp parallel for shared(P_mf)
for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++) for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++)
for (int j = pure_obs; j < n; j++) for (int j = pure_obs; j < n; j++)
P_mf[i_i + j * size_d_index] = P[mf[*i] + j * n]; P_mf[i_i + j * size_d_index] = P[mf[*i] + j * n];
} }
else else
//P = T*(P-K*P(mf,:))*transpose(T)+QQ; //P = T*(P-K*P(mf,:))*transpose(T)+QQ;
#ifdef USE_OMP #pragma omp parallel for shared(P_mf)
# pragma omp parallel for shared(P_mf) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
#endif
for (int i = 0; i < pp; i++) for (int i = 0; i < pp; i++)
for (int j = pure_obs; j < n; j++) for (int j = pure_obs; j < n; j++)
P_mf[i + j * pp] = P[mf[i] + j * n]; P_mf[i + j * pp] = P[mf[i] + j * n];
#ifdef BLAS #ifdef BLAS
# ifdef USE_OMP # pragma omp parallel for shared(K_P)
# pragma omp parallel for shared(K_P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
for (int j = i; j < n; j++) for (int j = i; j < n; j++)
{ {
...@@ -733,9 +718,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -733,9 +718,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
} }
# else # else
# ifdef USE_OMP # pragma omp parallel for shared(K_P)
# pragma omp parallel for shared(K_P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# endif
for (int i = pure_obs; i < n; i++) for (int i = pure_obs; i < n; i++)
{ {
unsigned int i1 = i - pure_obs; unsigned int i1 = i - pure_obs;
...@@ -750,9 +733,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -750,9 +733,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
} }
} }
# ifdef USE_OMP # pragma omp parallel for shared(P_t_t1)
# pragma omp parallel for shared(P_t_t1) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# endif
for (int i = pure_obs; i < n; i++) for (int i = pure_obs; i < n; i++)
{ {
unsigned int i1 = i - pure_obs; unsigned int i1 = i - pure_obs;
...@@ -766,9 +747,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -766,9 +747,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
fill_n(tmp, 0, n * n_state); fill_n(tmp, 0, n * n_state);
# ifdef USE_OMP # pragma omp parallel for shared(tmp)
# pragma omp parallel for shared(tmp) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
int max_k = i_nz_state_var[i]; int max_k = i_nz_state_var[i];
...@@ -785,9 +764,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -785,9 +764,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
fill_n(P, 0, n * n); fill_n(P, 0, n * n);
int n_n_obs = -n * pure_obs; int n_n_obs = -n * pure_obs;
# ifdef USE_OMP # pragma omp parallel for shared(P)
# pragma omp parallel for shared(P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
for (int j = i; j < n; j++) for (int j = i; j < n; j++)
...@@ -802,9 +779,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[]) ...@@ -802,9 +779,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
} }
} }
# ifdef USE_OMP # pragma omp parallel for shared(P)
# pragma omp parallel for shared(P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
for (int j = i; j < n; j++) for (int j = i; j < n; j++)
......
...@@ -1923,7 +1923,7 @@ dynSparseMatrix::Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair ...@@ -1923,7 +1923,7 @@ dynSparseMatrix::Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair
NbNZCol[i] = 0; NbNZCol[i] = 0;
} }
int nnz = 0; int nnz = 0;
//pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) ordered private(it4, ti_y_kmin, ti_y_kmax, eq, var, lag) schedule(dynamic) //pragma omp parallel for ordered private(it4, ti_y_kmin, ti_y_kmax, eq, var, lag) schedule(dynamic)
for (t = 0; t < periods; t++) for (t = 0; t < periods; t++)
{ {
ti_y_kmin = -min(t, y_kmin); ti_y_kmin = -min(t, y_kmin);
...@@ -2710,7 +2710,7 @@ dynSparseMatrix::Sparse_mult_SAT_B(mxArray *A_m, mxArray *B_m) ...@@ -2710,7 +2710,7 @@ dynSparseMatrix::Sparse_mult_SAT_B(mxArray *A_m, mxArray *B_m)
//unsigned int nze_A = 0; //unsigned int nze_A = 0;
unsigned int C_col = 0; unsigned int C_col = 0;
C_j[C_col] = 0; C_j[C_col] = 0;
//#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) //#pragma omp parallel for
for (unsigned int j = 0; j < n_B; j++) for (unsigned int j = 0; j < n_B; j++)
{ {
for (unsigned int i = 0; i < n_A; i++) for (unsigned int i = 0; i < n_A; i++)
...@@ -5371,7 +5371,7 @@ dynSparseMatrix::Solve_ByteCode_Sparse_GaussianElimination(int Size, int blck, i ...@@ -5371,7 +5371,7 @@ dynSparseMatrix::Solve_ByteCode_Sparse_GaussianElimination(int Size, int blck, i
bc[nb_eq_todo++] = first; bc[nb_eq_todo++] = first;
first = first->NZE_C_N; first = first->NZE_C_N;
} }
//pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) //pragma omp parallel for
for (int j = 0; j < nb_eq_todo; j++) for (int j = 0; j < nb_eq_todo; j++)
{ {
first = bc[j]; first = bc[j];
...@@ -5704,7 +5704,7 @@ dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bo ...@@ -5704,7 +5704,7 @@ dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bo
bc[nb_eq_todo++] = first; bc[nb_eq_todo++] = first;
first = first->NZE_C_N; first = first->NZE_C_N;
} }
//#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop) //#pragma omp parallel for shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
for (int j = 0; j < nb_eq_todo; j++) for (int j = 0; j < nb_eq_todo; j++)
{ {
t_save_op_s *save_op_s_l; t_save_op_s *save_op_s_l;
...@@ -5873,7 +5873,7 @@ dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bo ...@@ -5873,7 +5873,7 @@ dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bo
bc[nb_eq_todo++] = first; bc[nb_eq_todo++] = first;
first = first->NZE_C_N; first = first->NZE_C_N;
} }
//#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop) //#pragma omp parallel for shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
for (int j = 0; j < nb_eq_todo; j++) for (int j = 0; j < nb_eq_todo; j++)
{ {
NonZeroElem *first = bc[j]; NonZeroElem *first = bc[j];
......
...@@ -27,9 +27,7 @@ ...@@ -27,9 +27,7 @@
#include <dynmex.h> #include <dynmex.h>
#ifdef USE_OMP #include <omp.h>
# include <omp.h>
#endif
#define DEBUG_OMP 0 #define DEBUG_OMP 0
...@@ -42,9 +40,7 @@ sparse_hessian_times_B_kronecker_B(const mwIndex *isparseA, const mwIndex *jspar ...@@ -42,9 +40,7 @@ sparse_hessian_times_B_kronecker_B(const mwIndex *isparseA, const mwIndex *jspar
** This loop is splitted into two nested loops because we use the ** This loop is splitted into two nested loops because we use the
** symmetric pattern of the hessian matrix. ** symmetric pattern of the hessian matrix.
*/ */
#if USE_OMP #pragma omp parallel for num_threads(number_of_threads)
# pragma omp parallel for num_threads(number_of_threads)
#endif
for (mwIndex j1B = 0; j1B < static_cast<mwIndex>(nB); j1B++) for (mwIndex j1B = 0; j1B < static_cast<mwIndex>(nB); j1B++)
{ {
#if DEBUG_OMP #if DEBUG_OMP
...@@ -95,9 +91,7 @@ sparse_hessian_times_B_kronecker_C(const mwIndex *isparseA, const mwIndex *jspar ...@@ -95,9 +91,7 @@ sparse_hessian_times_B_kronecker_C(const mwIndex *isparseA, const mwIndex *jspar
/* /*
** Loop over the columns of B⊗B (or of the result matrix D). ** Loop over the columns of B⊗B (or of the result matrix D).
*/ */
#if USE_OMP #pragma omp parallel for num_threads(number_of_threads)
# pragma omp parallel for num_threads(number_of_threads)
#endif
for (mwIndex jj = 0; jj < static_cast<mwIndex>(nB*nC); jj++) // column of B⊗C index. for (mwIndex jj = 0; jj < static_cast<mwIndex>(nB*nC); jj++) // column of B⊗C index.
{ {
// Uncomment the following line to check if all processors are used. // Uncomment the following line to check if all processors are used.
......
...@@ -28,9 +28,7 @@ ...@@ -28,9 +28,7 @@
#include <dynmex.h> #include <dynmex.h>
#include <dynblas.h> #include <dynblas.h>
#ifdef USE_OMP #include <omp.h>
# include <omp.h>
#endif
#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon #define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
...@@ -67,9 +65,7 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double * ...@@ -67,9 +65,7 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *
std::vector<int> jj1, jj2, jj3;// vector indices for ghuu std::vector<int> jj1, jj2, jj3;// vector indices for ghuu
set_vector_of_indices(n, m, ii1, ii2, ii3); set_vector_of_indices(n, m, ii1, ii2, ii3);
set_vector_of_indices(q, m, jj1, jj2, jj3); set_vector_of_indices(q, m, jj1, jj2, jj3);
#ifdef USE_OMP #pragma omp parallel for num_threads(number_of_threads)
# pragma omp parallel for num_threads(number_of_threads)
#endif
for (int particle = 0; particle < s; particle++) for (int particle = 0; particle < s; particle++)
{ {
int particle_ = particle*m; int particle_ = particle*m;
...@@ -155,9 +151,7 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon, ...@@ -155,9 +151,7 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon,
std::vector<int> jj1, jj2, jj3;// vector indices for ghuu std::vector<int> jj1, jj2, jj3;// vector indices for ghuu
set_vector_of_indices(n, m, ii1, ii2, ii3); set_vector_of_indices(n, m, ii1, ii2, ii3);
set_vector_of_indices(q, m, jj1, jj2, jj3); set_vector_of_indices(q, m, jj1, jj2, jj3);
#ifdef USE_OMP #pragma omp parallel for num_threads(number_of_threads)
# pragma omp parallel for num_threads(number_of_threads)
#endif
for (int particle = 0; particle < s; particle++) for (int particle = 0; particle < s; particle++)
{ {
int particle_ = particle*m; int particle_ = particle*m;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
** **
** Pseudo code of the algorithm is given at http://home.online.no/~pjacklam/notes/invnorm ** Pseudo code of the algorithm is given at http://home.online.no/~pjacklam/notes/invnorm
** **
** Copyright © 2010-2017 Dynare Team ** Copyright © 2010-2019 Dynare Team
** **
** This file is part of Dynare. ** This file is part of Dynare.
** **
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#include <limits> #include <limits>
#include <algorithm> #include <algorithm>
#include <omp.h>
#include <dynblas.h> #include <dynblas.h>
using namespace std; using namespace std;
...@@ -33,11 +35,6 @@ using namespace std; ...@@ -33,11 +35,6 @@ using namespace std;
constexpr double lb = .02425; constexpr double lb = .02425;
constexpr double ub = .97575; constexpr double ub = .97575;
#ifdef USE_OMP
# include <omp.h>
#endif
#define DEBUG_OMP 0
template<typename T> template<typename T>
T T
icdf(const T uniform) icdf(const T uniform)
...@@ -125,9 +122,7 @@ template<typename T> ...@@ -125,9 +122,7 @@ template<typename T>
void void
icdfm(int n, T *U)