From 7c39b12b7bc528b0bf8dce6c2fece8192f73f4f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Tue, 25 Jun 2019 15:42:32 +0200
Subject: [PATCH] Always compile MEX files with OpenMP (when they can take
 advantage of it)

As a consequence, remove the --enable-openmp option of the configure script.
---
 README.md                                     |  7 +--
 matlab/set_dynare_threads.m                   |  7 +--
 mex/build/block_kalman_filter.am              |  1 +
 mex/build/kronecker.am                        |  2 +
 mex/build/local_state_space_iterations.am     |  2 +
 mex/build/matlab/configure.ac                 |  8 ---
 mex/build/octave/configure.ac                 |  8 ---
 mex/build/qmc_sequence.am                     |  1 +
 .../block_kalman_filter.cc                    | 61 ++++++-------------
 mex/sources/bytecode/SparseMatrix.cc          | 10 +--
 .../sparse_hessian_times_B_kronecker_C.cc     | 12 +---
 .../local_state_space_iteration_2.cc          | 12 +---
 mex/sources/sobol/gaussian.hh                 | 21 ++-----
 13 files changed, 46 insertions(+), 106 deletions(-)

diff --git a/README.md b/README.md
index 395a780ac8..9c95021fa5 100644
--- a/README.md
+++ b/README.md
@@ -127,11 +127,8 @@ To remove debugging information for MATLAB MEX functions, the analogous call wou
 ./configure MATLAB_MEX_CFLAGS="-O3" MATLAB_MEX_CXXFLAGS="-O3"
 ```
 
-If you want to give a try to the parallelized versions of some mex files (`A_times_B_kronecker_C` and `sparse_hessian_times_B_kronecker_C` used to get the reduced form of the second order approximation of the model) you can add the `--enable-openmp` flag, for instance:
-```
-./configure --with-matlab=/usr/local/MATLAB/R2019a MATLAB_VERSION=9.6 --enable-openmp
-```
-If the configuration goes well, the script will tell you which components are correctly configured and will be built.
+If the configuration goes well, the script will tell you which components are
+correctly configured and will be built.
 
 Note that it is possible that some MEX files cannot be compiled, due to missing
 build dependencies. If you find no way of installing the missing dependencies,
diff --git a/matlab/set_dynare_threads.m b/matlab/set_dynare_threads.m
index 30dbf2febc..ca76186500 100644
--- a/matlab/set_dynare_threads.m
+++ b/matlab/set_dynare_threads.m
@@ -1,7 +1,6 @@
 function set_dynare_threads(mexname,n)
-% This function sets the number of threads used by some MEX files when compiled
-% with OpenMP support (i.e with --enable-openmp is given to configure) or any
-% other parallel library.
+% This function sets the number of threads used by some MEX files using
+% OpenMP features or any other parallel library.
 %
 % INPUTS
 %  o mexname  [string]    Name of the mex file.
@@ -10,7 +9,7 @@ function set_dynare_threads(mexname,n)
 % OUTPUTS
 %  none.
 
-% Copyright (C) 2009-2012 Dynare Team
+% Copyright (C) 2009-2019 Dynare Team
 %
 % This file is part of Dynare.
 %
diff --git a/mex/build/block_kalman_filter.am b/mex/build/block_kalman_filter.am
index 381d3a874d..9379db0060 100644
--- a/mex/build/block_kalman_filter.am
+++ b/mex/build/block_kalman_filter.am
@@ -3,6 +3,7 @@ mex_PROGRAMS = block_kalman_filter
 TOPDIR = $(top_srcdir)/../../sources/block_kalman_filter
 
 block_kalman_filter_CPPFLAGS = $(AM_CPPFLAGS) -I$(TOPDIR)
+block_kalman_filter_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
 
 nodist_block_kalman_filter_SOURCES = block_kalman_filter.cc
 
diff --git a/mex/build/kronecker.am b/mex/build/kronecker.am
index b570bf72a8..bb2c138bfd 100644
--- a/mex/build/kronecker.am
+++ b/mex/build/kronecker.am
@@ -3,6 +3,8 @@ mex_PROGRAMS = sparse_hessian_times_B_kronecker_C A_times_B_kronecker_C
 nodist_sparse_hessian_times_B_kronecker_C_SOURCES = sparse_hessian_times_B_kronecker_C.cc
 nodist_A_times_B_kronecker_C_SOURCES = A_times_B_kronecker_C.cc
 
+sparse_hessian_times_B_kronecker_C_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
+
 BUILT_SOURCES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES)
 CLEANFILES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES)
 
diff --git a/mex/build/local_state_space_iterations.am b/mex/build/local_state_space_iterations.am
index 02641c8e42..59054b76d7 100644
--- a/mex/build/local_state_space_iterations.am
+++ b/mex/build/local_state_space_iterations.am
@@ -2,6 +2,8 @@ mex_PROGRAMS = local_state_space_iteration_2
 
 nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
 
+local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
+
 BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES)
 CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES)
 
diff --git a/mex/build/matlab/configure.ac b/mex/build/matlab/configure.ac
index 906aeee2e7..87cd5953c2 100644
--- a/mex/build/matlab/configure.ac
+++ b/mex/build/matlab/configure.ac
@@ -129,14 +129,6 @@ else
    BUILD_MS_SBVAR_MEX_MATLAB="no (missing GSL)"
 fi
 
-AC_ARG_ENABLE([openmp], AS_HELP_STRING([--enable-openmp], [use OpenMP for parallelization of some MEX files]), [
-  if test "$enable_openmp" = yes; then
-    CPPFLAGS="$CPPFLAGS -DUSE_OMP"
-    CFLAGS="$CFLAGS -fopenmp"
-    CXXFLAGS="$CXXFLAGS -fopenmp"
-  fi
-])
-
 AC_ARG_WITH([m2html], AS_HELP_STRING([--with-m2html=DIR], [specify installation directory of M2HTML]), [
 M2HTML=$withval
 BUILD_M2HTML=yes
diff --git a/mex/build/octave/configure.ac b/mex/build/octave/configure.ac
index 7bb01f7047..d13a0e83f9 100644
--- a/mex/build/octave/configure.ac
+++ b/mex/build/octave/configure.ac
@@ -103,14 +103,6 @@ else
    BUILD_MS_SBVAR_MEX_OCTAVE="no (missing GSL or MatIO library)"
 fi
 
-AC_ARG_ENABLE([openmp], AS_HELP_STRING([--enable-openmp], [use OpenMP for parallelization of some MEX files]), [
-  if test "x$enable_openmp" = "xyes"; then
-    CPPFLAGS="$CPPFLAGS -DUSE_OMP"
-    CFLAGS="$CFLAGS -fopenmp"
-    CXXFLAGS="$CXXFLAGS -fopenmp"
-  fi
-])
-
 AC_MSG_NOTICE([
 
 Dynare is now configured for building the following components...
diff --git a/mex/build/qmc_sequence.am b/mex/build/qmc_sequence.am
index d65ea11453..05084ec4d0 100644
--- a/mex/build/qmc_sequence.am
+++ b/mex/build/qmc_sequence.am
@@ -3,6 +3,7 @@ mex_PROGRAMS = qmc_sequence
 TOPDIR = $(top_srcdir)/../../sources/sobol
 
 qmc_sequence_CPPFLAGS = $(AM_CPPFLAGS) -I$(TOPDIR)
+qmc_sequence_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
 
 nodist_qmc_sequence_SOURCES = qmc_sequence.cc
 
diff --git a/mex/sources/block_kalman_filter/block_kalman_filter.cc b/mex/sources/block_kalman_filter/block_kalman_filter.cc
index e6a82d6730..26ddf92e26 100644
--- a/mex/sources/block_kalman_filter/block_kalman_filter.cc
+++ b/mex/sources/block_kalman_filter/block_kalman_filter.cc
@@ -19,9 +19,8 @@
 
 #include <cmath>
 #include <algorithm>
-#ifdef USE_OMP
-# include <omp.h>
-#endif
+#include <omp.h>
+
 #include "block_kalman_filter.hh"
 
 #define BLAS
@@ -372,7 +371,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
 
           //v = Y(:,t) - a(mf)
           int i_i = 0;
-          //#pragma omp parallel for shared(v, i_i, d_index) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+          //#pragma omp parallel for shared(v, i_i, d_index)
           for (auto i = d_index.begin(); i != d_index.end(); i++)
             {
               //mexPrintf("i_i=%d, omp_get_max_threads()=%d\n",i_i,omp_get_max_threads());
@@ -383,7 +382,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
           //F  = P(mf,mf) + H;
           i_i = 0;
           if (H_size == 1)
-            //#pragma omp parallel for shared(iF, F, i_i) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+            //#pragma omp parallel for shared(iF, F, i_i)
             for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++)
               {
                 int j_j = 0;
@@ -391,7 +390,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
                   iF[i_i + j_j * size_d_index] = F[i_i + j_j * size_d_index] = P[mf[*i] + mf[*j] * n] + H[0];
               }
           else
-            //#pragma omp parallel for shared(iF, F, P, H, mf, i_i) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+            //#pragma omp parallel for shared(iF, F, P, H, mf, i_i)
             for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++)
               {
                 int j_j = 0;
@@ -502,9 +501,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
             mexPrintf("dgetri failure with error %d\n", static_cast<int>(info));
 
           //lik(t) = log(dF)+transpose(v)*iF*v;
-#ifdef USE_OMP
-# pragma omp parallel for shared(v_pp) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#endif
+#pragma omp parallel for shared(v_pp)
           for (int i = 0; i < size_d_index; i++)
             {
               double res = 0.0;
@@ -522,9 +519,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
 
           if (missing_observations)
               //K      = P(:,mf)*iF;
-#ifdef USE_OMP
-# pragma omp parallel for shared(P_mf) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#endif
+#pragma omp parallel for shared(P_mf)
             for (int i = 0; i < n; i++)
               {
                 int j_j = 0;
@@ -538,9 +533,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
               for (int j = 0; j < pp; j++)
                 P_mf[i + j * n] = P[i + mf[j] * n];
 
-#ifdef USE_OMP
-# pragma omp parallel for shared(K) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#endif
+#pragma omp parallel for shared(K)
           for (int i = 0; i < n; i++)
             for (int j = 0; j < size_d_index; j++)
               {
@@ -552,9 +545,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
               }
 
           //a      = T*(a+K*v);
-#ifdef USE_OMP
-# pragma omp parallel for shared(v_n) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#endif
+#pragma omp parallel for shared(v_n)
           for (int i = pure_obs; i < n; i++)
             {
               double res = 0.0;
@@ -563,9 +554,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
               v_n[i] = res + a[i];
             }
 
-#ifdef USE_OMP
-# pragma omp parallel for shared(a) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#endif
+#pragma omp parallel for shared(a)
           for (int i = 0; i < n; i++)
             {
               double res = 0.0;
@@ -578,24 +567,20 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
             {
               //P      = T*(P-K*P(mf,:))*transpose(T)+QQ;
               int i_i = 0;
-              //#pragma omp parallel for shared(P_mf) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+              //#pragma omp parallel for shared(P_mf)
               for (auto i = d_index.begin(); i != d_index.end(); i++, i_i++)
                 for (int j = pure_obs; j < n; j++)
                   P_mf[i_i + j * size_d_index] = P[mf[*i] + j * n];
             }
           else
             //P      = T*(P-K*P(mf,:))*transpose(T)+QQ;
-#ifdef USE_OMP
-# pragma omp parallel for shared(P_mf) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#endif
+#pragma omp parallel for shared(P_mf)
             for (int i = 0; i < pp; i++)
               for (int j = pure_obs; j < n; j++)
                 P_mf[i + j * pp] = P[mf[i] + j * n];
 
 #ifdef BLAS
-# ifdef USE_OMP
-#  pragma omp parallel for shared(K_P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-# endif
+# pragma omp parallel for shared(K_P)
           for (int i = 0; i < n; i++)
             for (int j = i; j < n; j++)
               {
@@ -733,9 +718,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
             }
 
 # else
-#  ifdef USE_OMP
-#   pragma omp parallel for shared(K_P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#  endif
+#  pragma omp parallel for shared(K_P)
           for (int i = pure_obs; i < n; i++)
             {
               unsigned int i1 = i - pure_obs;
@@ -750,9 +733,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
                 }
             }
 
-#  ifdef USE_OMP
-#   pragma omp parallel for shared(P_t_t1) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#  endif
+#  pragma omp parallel for shared(P_t_t1)
           for (int i = pure_obs; i < n; i++)
             {
               unsigned int i1 = i - pure_obs;
@@ -766,9 +747,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
 
           fill_n(tmp, 0, n * n_state);
 
-#  ifdef USE_OMP
-#   pragma omp parallel for shared(tmp) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#  endif
+#  pragma omp parallel for shared(tmp)
           for (int i = 0; i < n; i++)
             {
               int max_k = i_nz_state_var[i];
@@ -785,9 +764,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
           fill_n(P, 0, n * n);
 
           int n_n_obs = -n * pure_obs;
-#  ifdef USE_OMP
-#   pragma omp parallel for shared(P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#  endif
+#  pragma omp parallel for shared(P)
           for (int i = 0; i < n; i++)
             {
               for (int j = i; j < n; j++)
@@ -802,9 +779,7 @@ BlockKalmanFilter::block_kalman_filter(int nlhs, mxArray *plhs[])
                 }
             }
 
-#  ifdef USE_OMP
-#   pragma omp parallel for shared(P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
-#  endif
+#  pragma omp parallel for shared(P)
           for (int i = 0; i < n; i++)
             {
               for (int j = i; j < n; j++)
diff --git a/mex/sources/bytecode/SparseMatrix.cc b/mex/sources/bytecode/SparseMatrix.cc
index 2f076615de..1f4cb7fb67 100644
--- a/mex/sources/bytecode/SparseMatrix.cc
+++ b/mex/sources/bytecode/SparseMatrix.cc
@@ -1923,7 +1923,7 @@ dynSparseMatrix::Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair
       NbNZCol[i] = 0;
     }
   int nnz = 0;
-  //pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) ordered private(it4, ti_y_kmin, ti_y_kmax, eq, var, lag) schedule(dynamic)
+  //pragma omp parallel for ordered private(it4, ti_y_kmin, ti_y_kmax, eq, var, lag) schedule(dynamic)
   for (t = 0; t < periods; t++)
     {
       ti_y_kmin = -min(t, y_kmin);
@@ -2710,7 +2710,7 @@ dynSparseMatrix::Sparse_mult_SAT_B(mxArray *A_m, mxArray *B_m)
   //unsigned int nze_A = 0;
   unsigned int C_col = 0;
   C_j[C_col] = 0;
-  //#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+  //#pragma omp parallel for
   for (unsigned int j = 0; j < n_B; j++)
     {
       for (unsigned int i = 0; i < n_A; i++)
@@ -5371,7 +5371,7 @@ dynSparseMatrix::Solve_ByteCode_Sparse_GaussianElimination(int Size, int blck, i
             bc[nb_eq_todo++] = first;
           first = first->NZE_C_N;
         }
-      //pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+      //pragma omp parallel for
       for (int j = 0; j < nb_eq_todo; j++)
         {
           first = bc[j];
@@ -5704,7 +5704,7 @@ dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bo
                     bc[nb_eq_todo++] = first;
                   first = first->NZE_C_N;
                 }
-              //#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
+              //#pragma omp parallel for shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
               for (int j = 0; j < nb_eq_todo; j++)
                 {
                   t_save_op_s *save_op_s_l;
@@ -5873,7 +5873,7 @@ dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bo
                     bc[nb_eq_todo++] = first;
                   first = first->NZE_C_N;
                 }
-              //#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
+              //#pragma omp parallel for shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
               for (int j = 0; j < nb_eq_todo; j++)
                 {
                   NonZeroElem *first = bc[j];
diff --git a/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc b/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc
index 18ee522ee0..93265998cc 100644
--- a/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc
+++ b/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc
@@ -27,9 +27,7 @@
 
 #include <dynmex.h>
 
-#ifdef USE_OMP
-# include <omp.h>
-#endif
+#include <omp.h>
 
 #define DEBUG_OMP 0
 
@@ -42,9 +40,7 @@ sparse_hessian_times_B_kronecker_B(const mwIndex *isparseA, const mwIndex *jspar
   **   This loop is splitted into two nested loops because we use the
   **   symmetric pattern of the hessian matrix.
   */
-#if USE_OMP
-# pragma omp parallel for num_threads(number_of_threads)
-#endif
+#pragma omp parallel for num_threads(number_of_threads)
   for (mwIndex j1B = 0; j1B < static_cast<mwIndex>(nB); j1B++)
     {
 #if DEBUG_OMP
@@ -95,9 +91,7 @@ sparse_hessian_times_B_kronecker_C(const mwIndex *isparseA, const mwIndex *jspar
   /*
   **   Loop over the columns of B⊗B (or of the result matrix D).
   */
-#if USE_OMP
-# pragma omp parallel for num_threads(number_of_threads)
-#endif
+#pragma omp parallel for num_threads(number_of_threads)
   for (mwIndex jj = 0; jj < static_cast<mwIndex>(nB*nC); jj++) // column of B⊗C index.
     {
       // Uncomment the following line to check if all processors are used.
diff --git a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc
index b700c39b17..2ecd1fc9f3 100644
--- a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc
+++ b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc
@@ -28,9 +28,7 @@
 #include <dynmex.h>
 #include <dynblas.h>
 
-#ifdef USE_OMP
-# include <omp.h>
-#endif
+#include <omp.h>
 
 #define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
 
@@ -67,9 +65,7 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *
   std::vector<int> jj1, jj2, jj3;// vector indices for ghuu
   set_vector_of_indices(n, m, ii1, ii2, ii3);
   set_vector_of_indices(q, m, jj1, jj2, jj3);
-#ifdef USE_OMP
-# pragma omp parallel for num_threads(number_of_threads)
-#endif
+#pragma omp parallel for num_threads(number_of_threads)
   for (int particle = 0; particle < s; particle++)
     {
       int particle_ = particle*m;
@@ -155,9 +151,7 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon,
   std::vector<int> jj1, jj2, jj3;// vector indices for ghuu
   set_vector_of_indices(n, m, ii1, ii2, ii3);
   set_vector_of_indices(q, m, jj1, jj2, jj3);
-#ifdef USE_OMP
-# pragma omp parallel for num_threads(number_of_threads)
-#endif
+#pragma omp parallel for num_threads(number_of_threads)
   for (int particle = 0; particle < s; particle++)
     {
       int particle_ = particle*m;
diff --git a/mex/sources/sobol/gaussian.hh b/mex/sources/sobol/gaussian.hh
index ade3ef9e35..ea5377cb95 100644
--- a/mex/sources/sobol/gaussian.hh
+++ b/mex/sources/sobol/gaussian.hh
@@ -2,7 +2,7 @@
 **
 ** Pseudo code of the algorithm is given at http://home.online.no/~pjacklam/notes/invnorm
 **
-** Copyright © 2010-2017 Dynare Team
+** Copyright © 2010-2019 Dynare Team
 **
 ** This file is part of Dynare.
 **
@@ -26,6 +26,8 @@
 #include <limits>
 #include <algorithm>
 
+#include <omp.h>
+
 #include <dynblas.h>
 
 using namespace std;
@@ -33,11 +35,6 @@ using namespace std;
 constexpr double lb = .02425;
 constexpr double ub = .97575;
 
-#ifdef USE_OMP
-# include <omp.h>
-#endif
-#define DEBUG_OMP 0
-
 template<typename T>
 T
 icdf(const T uniform)
@@ -125,9 +122,7 @@ template<typename T>
 void
 icdfm(int n, T *U)
 {
-#if USE_OMP
-# pragma omp parallel for num_threads(omp_get_num_threads())
-#endif
+#pragma omp parallel for
   for (int i = 0; i < n; i++)
     U[i] = icdf(U[i]);
   return;
@@ -152,9 +147,7 @@ void
 usphere(int d, int n, T *U)
 {
   icdfm(n*d, U);
-#if USE_OMP
-# pragma omp parallel for num_threads(omp_get_num_threads())
-#endif
+#pragma omp parallel for
   for (int j = 0; j < n; j++)// sequence index.
     {
       int k = j*d;
@@ -173,9 +166,7 @@ void
 usphereRadius(int d, int n, double radius, T *U)
 {
   icdfm(n*d, U);
-#if USE_OMP
-# pragma omp parallel for num_threads(omp_get_num_threads())
-#endif
+#pragma omp parallel for
   for (int j = 0; j < n; j++)// sequence index.
     {
       int k = j*d;
-- 
GitLab