diff --git a/matlab/global_initialization.m b/matlab/global_initialization.m
index d019777169362bee4f1bd988c98985c55624fd30..2ee53d90f3d7a75d351085da5a8509ce5c1d7be0 100644
--- a/matlab/global_initialization.m
+++ b/matlab/global_initialization.m
@@ -62,6 +62,7 @@ options_.mode_check_neighbourhood_size = 0.5;
 % Default number of threads for parallelized mex files.
 options_.threads.kronecker.A_times_B_kronecker_C = 1;
 options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = 1;
+options_.threads.local_state_space_iteration_2 = 1;
 
 % steady state
 options_.jacobian_flag = 1;
@@ -176,9 +177,9 @@ particle.algorithm = 'sequential_importance_particle_filter';
 % Set the Gaussian approximation method
 particle.approximation_method = 'unscented';
 % Set unscented parameters alpha, beta and kappa for gaussian approximation
-particle.unscented.alpha = 1 ;
-particle.unscented.beta = 2 ;
-particle.unscented.kappa = 1 ;
+particle.unscented.alpha = 1;
+particle.unscented.beta = 2;
+particle.unscented.kappa = 1;
 % Configuration of resampling in case of particles
 particle.resampling.status = 'systematic' ;
 % Choice of the resampling method
diff --git a/matlab/particle/auxiliary_particle_filter.m b/matlab/particle/auxiliary_particle_filter.m
index eb1130d8fdb2846e1e7b0c62149ab3628c5193c8..77777c85005c0aaa856964ac2429e2cb3ba001db 100644
--- a/matlab/particle/auxiliary_particle_filter.m
+++ b/matlab/particle/auxiliary_particle_filter.m
@@ -20,7 +20,7 @@ function [LIK,lik] = auxiliary_particle_filter(ReducedForm,Y,start,DynareOptions
 % NOTES
 %   The vector "lik" is used to evaluate the jacobian of the likelihood.
 
-% Copyright (C) 2009-2010 Dynare Team
+% Copyright (C) 2011, 2012 Dynare Team
 %
 % This file is part of Dynare.
 %
@@ -94,7 +94,7 @@ weights = ones(1,number_of_particles);
 StateVectors = bsxfun(@plus,StateVectorVarianceSquareRoot*randn(state_variance_rank,number_of_particles),StateVectorMean);
 for t=1:sample_size
     yhat = bsxfun(@minus,StateVectors,state_variables_steady_state);
-    tmp = local_state_space_iteration_2(yhat,zeros(number_of_structural_innovations,number_of_particles),ghx,ghu,constant,ghxx,ghuu,ghxu);
+    tmp = local_state_space_iteration_2(yhat,zeros(number_of_structural_innovations,number_of_particles),ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2);
     PredictedObservedMean = mean(tmp(mf1,:),2);
     PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:));
     dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean);
@@ -104,17 +104,17 @@ for t=1:sample_size
     sum_tau_tilde = sum(tau_tilde) ;
     lik(t) = log(sum_tau_tilde) ;
     tau_tilde = tau_tilde/sum_tau_tilde;
-    indx_resmpl = resample(tau_tilde) ;
-    yhat = yhat(:,indx_resmpl) ;
-    wtilde = wtilde(indx_resmpl) ;
+    indx_resmpl = resample(tau_tilde);
+    yhat = yhat(:,indx_resmpl);
+    wtilde = wtilde(indx_resmpl);
     epsilon = Q_lower_triangular_cholesky*randn(number_of_structural_innovations,number_of_particles);
-    tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
-    StateVectors = tmp(mf0,:) ;
+    tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2);
+    StateVectors = tmp(mf0,:);
     PredictedObservedMean = mean(tmp(mf1,:),2);
     PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:));
     dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean);
     PredictedObservedVariance = (dPredictedObservedMean*dPredictedObservedMean')/number_of_particles+H;
-    lnw = exp(-.5*(const_lik+log(det(PredictedObservedVariance))+sum(PredictionError.*(PredictedObservedVariance\PredictionError),1))) ;
+    lnw = exp(-.5*(const_lik+log(det(PredictedObservedVariance))+sum(PredictionError.*(PredictedObservedVariance\PredictionError),1)));
     wtilde = lnw./wtilde;
     weights = wtilde/sum(wtilde);
 end
diff --git a/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m b/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m
index be4b07b84d9ff70364e74a13ddc1aedbfddb1444..778e2d075b4334fd7bb778faf431be7208315e69 100644
--- a/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m
+++ b/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m
@@ -1,4 +1,4 @@
-function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss)
+function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,a,b,c)%yhat_,ss)
 
 %@info:
 %! @deftypefn {Function File} {@var{y}, @var{y_} =} local_state_equation_2 (@var{yhat},@var{epsilon}, @var{ghx}, @var{ghu}, @var{constant}, @var{ghxx}, @var{ghuu}, @var{ghxu}, @var{yhat_}, @var{ss})
@@ -78,15 +78,13 @@ function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,gh
 % AUTHOR(S) stephane DOT adjemian AT univ DASH lemans DOT fr
 %           frederic DOT karame AT univ DASH evry DOT fr
 
-number_of_threads = 1;
-
-if nargin==8
-    pruning = 0;
+if nargin==9
+    pruning = 0; numthreads = a;
     if nargout>1
         error('local_state_space_iteration_2:: Numbers of input and output argument are inconsistent!')
     end
-elseif nargin==10
-    pruning = 1;
+elseif nargin==11
+    pruning = 1; numthreads = c; yhat_ = a; ss = b;
     if nargout~=2
         error('local_state_space_iteration_2:: Numbers of input and output argument are inconsistent!')
     end
@@ -94,6 +92,8 @@ else
     error('local_state_space_iteration_2:: Wrong number of input arguments!')
 end
 
+number_of_threads = numthreads;
+
 switch pruning
   case 0
     for i =1:size(yhat,2)
@@ -130,8 +130,8 @@ end
 %$
 %$ % Call the tested routine.
 %$ for i=1:10
-%$     y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
-%$     [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
+%$     y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
+%$     [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
 %$ end
 %$
 %$ % Check the results.
@@ -168,12 +168,12 @@ end
 %$
 %$ % Call the tested routine.
 %$ try
-%$     y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
+%$     y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
 %$ catch
 %$     t(1) = 0;
 %$ end
 %$ try
-%$     [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
+%$     [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
 %$ catch
 %$     t(2) = 0;
 %$ end
@@ -200,8 +200,8 @@ end
 %$     ss = ones(n,1);
 %$
 %$     % Call the tested routine (mex version).
-%$     y1a = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
-%$     [y2a,y2a_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
+%$     y1a = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
+%$     [y2a,y2a_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
 %$
 %$     % Call the tested routine (matlab version)
 %$     path_to_mex = fileparts(which(['qmc_sequence.' mexext]));
@@ -210,8 +210,8 @@ end
 %$     tar('local_state_space_iteration_2.tar',['local_state_space_iteration_2.' mexext]);
 %$     cd(where_am_i_coming_from);
 %$     dynare_config([],0);
-%$     y1b = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
-%$     [y2b,y2b_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
+%$     y1b = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
+%$     [y2b,y2b_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
 %$     cd(path_to_mex);
 %$     untar('local_state_space_iteration_2.tar');
 %$     delete('local_state_space_iteration_2.tar');
@@ -227,3 +227,49 @@ end
 %$     T = all(t);
 %$ end
 %@eof:3
+
+
+%@test:4
+%$ % TIMING TEST (parallelization with openmp)
+%$ old_path = pwd;
+%$ cd([fileparts(which('dynare')) '/../tests/']);
+%$ dynare('dsge_base2');
+%$ load dsge_base2;
+%$ cd(old_path);
+%$ dr = oo_.dr;
+%$ clear('oo_','options_','M_');
+%$ delete([fileparts(which('dynare')) '/../tests/dsge_base2.mat']);
+%$ istates = dr.nstatic+(1:dr.npred);
+%$ n = dr.npred;
+%$ q = size(dr.ghu,2);
+%$ yhat = zeros(n,10000000);
+%$ epsilon = zeros(q,10000000);
+%$ ghx = dr.ghx(istates,:);
+%$ ghu = dr.ghu(istates,:);
+%$ constant = dr.ys(istates,:)+dr.ghs2(istates,:);
+%$ ghxx = dr.ghxx(istates,:);
+%$ ghuu = dr.ghuu(istates,:);
+%$ ghxu = dr.ghxu(istates,:);
+%$ yhat_ = zeros(n,10000000);
+%$ ss = dr.ys(istates,:);
+%$
+%$ t = NaN(4,1);
+%$ tic, for i=1:10, y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); end
+%$ t1 = toc;
+%$ tic, for i=1:10, y2 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,2); end
+%$ t2 = toc;
+%$ t(1) = dyn_assert(y1,y2,1e-15); clear('y1');
+%$ tic, for i=1:10, y3 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,3); end
+%$ t3 = toc;
+%$ t(2) = dyn_assert(y2,y3,1e-15); clear('y2');
+%$ tic, for i=1:10, y4 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,4); end
+%$ t4 = toc;
+%$ t(3) = dyn_assert(y4,y3,1e-15); clear('y3','y4');
+%$ t(4) = (t1>t2) && (t2>t3) && (t3>t4);
+%$ if ~t(4)
+%$     disp('Timmings:')
+%$     [t1, t2, t3, t4]
+%$ end
+%$ % Check the results.
+%$ T = all(t);
+%@eof:4
diff --git a/matlab/particle/sequential_importance_particle_filter.m b/matlab/particle/sequential_importance_particle_filter.m
index db89ff4e4ea6ff19220640db9f6f1f95b93736f0..cbf675237030ba8b473837a6f222cd95a3d14023 100644
--- a/matlab/particle/sequential_importance_particle_filter.m
+++ b/matlab/particle/sequential_importance_particle_filter.m
@@ -120,7 +120,7 @@ StateVectors = bsxfun(@plus,StateVectorVarianceSquareRoot*randn(state_variance_r
 for t=1:sample_size
     yhat = bsxfun(@minus,StateVectors,state_variables_steady_state);
     epsilon = Q_lower_triangular_cholesky*randn(number_of_structural_innovations,number_of_particles);
-    tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
+    tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2);
     PredictedObservedMean = mean(tmp(mf1,:),2);
     PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:));
     dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean);
diff --git a/matlab/set_dynare_threads.m b/matlab/set_dynare_threads.m
index a25bfbc3ca9ecca4fb9be965d5d9222d472fa219..c113b936951b1bf5fc13ae15e08b799176aa9a15 100644
--- a/matlab/set_dynare_threads.m
+++ b/matlab/set_dynare_threads.m
@@ -1,13 +1,13 @@
 function set_dynare_threads(mexname,n)
 % This function sets the number of threads used by some MEX files when compiled
-% with OpenMP support (i.e with --enable-openmp is given to configure) or any 
+% with OpenMP support (i.e with --enable-openmp is given to configure) or any
 % other parallel library.
 %
-% INPUTS 
-%  o mexname  [string]    Name of the mex file.  
+% INPUTS
+%  o mexname  [string]    Name of the mex file.
 %  o n        [integer]   scalar specifying the number of threads to be used.
 %
-% OUTPUTS 
+% OUTPUTS
 %  none.
 
 % Copyright (C) 2009-2010 Dynare Team
@@ -41,6 +41,8 @@ switch mexname
     options_.threads.kronecker.A_times_B_kronecker_C = n;
   case 'sparse_hessian_times_B_kronecker_C'
     options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = n;
+  case 'local_state_space_iteration_2'
+    options_.threads.local_state_space_iteration_2 = n;
   otherwise
     message = [ mexname ' is not a known parallel mex file.' ];
     message_id  = 'Dynare:Threads:UnknownParallelMex';
diff --git a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc
index ae1667b43669cb97e447ca6a5ca38b99a944ac3d..6943b782b52f5068e573cc4fb5cc4424ab9bf31e 100644
--- a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc
+++ b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Dynare Team
+ * Copyright (C) 2010-2012 Dynare Team
  *
  * This file is part of Dynare.
  *
@@ -18,7 +18,7 @@
  */
 
 /*
- * This mex file computes particles at time t+1 given particles and innovations at time t, 
+ * This mex file computes particles at time t+1 given particles and innovations at time t,
  * using a second order approximation of the nonlinear state space model.
  */
 
@@ -34,7 +34,7 @@
 
 using namespace std;
 
-//#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
+#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
 
 void set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2, vector<int> &v3)
 {
@@ -54,20 +54,23 @@ void set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int
     }
 }
 
-void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const double* yhat1, const double *epsilon, 
-                  double* ghx, double* ghu, 
-                  const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, const double* ss, 
-                  const blas_int m, const blas_int n, const blas_int q, const blas_int s)
+void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const double* yhat1, const double *epsilon,
+                  double* ghx, double* ghu,
+                  const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, const double* ss,
+			  const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads)
 {
-  const char transpose[2] = "N";
-  const double one = 1.0;
-  const blas_int ONE = 1;
-  
+  #ifndef FIRST_ORDER_LOOP
+      const char transpose[2] = "N";
+      const double one = 1.0;
+      const blas_int ONE = 1;
+  #endif
   vector<int> ii1, ii2, ii3;// vector indices for ghxx
   vector<int> jj1, jj2, jj3;// vector indices for ghuu
   set_vector_of_indices(n, m, ii1, ii2, ii3);
   set_vector_of_indices(q, m, jj1, jj2, jj3);
-  
+  #ifdef USE_OMP
+  #pragma omp parallel for num_threads(number_of_threads)
+  #endif
   for (int particle = 0; particle<s; particle++)
   {
     int particle_ = particle*m;
@@ -75,35 +78,35 @@ void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const dou
     int particle___ = particle*q;
     memcpy(&y2[particle_],&constant[0],m*sizeof(double));
     memcpy(&y1[particle_],&ss[0],m*sizeof(double));
-#ifndef FIRST_ORDER_LOOP
-    dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat2[particle__],&ONE,&one,&y2[particle_],&ONE);
-    dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y2[particle_],&ONE);
-#endif
+    #ifndef FIRST_ORDER_LOOP
+        dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat2[particle__],&ONE,&one,&y2[particle_],&ONE);
+	dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y2[particle_],&ONE);
+    #endif
     for (int variable = 0; variable<m; variable++)
       {
         int variable_ = variable + particle_;
         // +ghx*yhat2+ghu*u
-#ifdef FIRST_ORDER_LOOP
-        for (int column = 0, column_=0; column<q; column++, column_ += m)
-          {
-            int i1 = variable+column_;
-            int i2 = column+particle__;
-            int i3 = column+particle___;
-            y2[variable_] += ghx[i1]*yhat2[i2];
-            y2[variable_] += ghu[i1]*epsilon[i3];
-          }
-        for (int column = q, column_=q*m; column<n; column++, column_ += m)
-          {
-            y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
-          }
-#endif
+        #ifdef FIRST_ORDER_LOOP
+            for (int column = 0, column_=0; column<q; column++, column_ += m)
+	      {
+		int i1 = variable+column_;
+		int i2 = column+particle__;
+		int i3 = column+particle___;
+		y2[variable_] += ghx[i1]*yhat2[i2];
+		y2[variable_] += ghu[i1]*epsilon[i3];
+	      }
+	    for (int column = q, column_=q*m; column<n; column++, column_ += m)
+	      {
+		y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
+	      }
+        #endif
         // +ghxx*kron(yhat1,yhat1)
         for(int i=0; i<n*(n+1)/2; i++)
           {
             int i1 = particle__+ii1[i];
             int i2 = particle__+ii2[i];
             if(i1==i2)
-              { 
+              {
                 y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1];
               }
             else
@@ -129,77 +132,80 @@ void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const dou
         for (int v = particle__, i = 0; v<particle__+n; v++)
           for (int s = particle___; s<particle___+q; s++, i += m)
             y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat2[v];
-#ifdef FIRST_ORDER_LOOP
-        for (int column = 0, column_=0; column<q; column++, column_ += m)
-          {
-            int i1 = variable+column_;
-            int i2 = column+particle__;
-            int i3 = column+particle___;
-            y1[variable_] += ghx[i1]*yhat1[i2];
-            y1[variable_] += ghu[i1]*epsilon[i3];
-          }
-        for (int column = q, column_=q*m; column<n; column++, column_ += m)
-          {
-            y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
-          }
-#endif
+            #ifdef FIRST_ORDER_LOOP
+                for (int column = 0, column_=0; column<q; column++, column_ += m)
+		  {
+		    int i1 = variable+column_;
+		    int i2 = column+particle__;
+		    int i3 = column+particle___;
+		    y1[variable_] += ghx[i1]*yhat1[i2];
+		    y1[variable_] += ghu[i1]*epsilon[i3];
+		  }
+		for (int column = q, column_=q*m; column<n; column++, column_ += m)
+		  {
+		    y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
+		  }
+            #endif
       }
-#ifndef FIRST_ORDER_LOOP
-    dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat1[particle__],&ONE,&one,&y1[particle_],&ONE);
-    dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y1[particle_],&ONE);
-#endif
+      #ifndef FIRST_ORDER_LOOP
+          dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat1[particle__],&ONE,&one,&y1[particle_],&ONE);
+	  dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y1[particle_],&ONE);
+      #endif
   }
 }
 
-void ss2Iteration(double* y, const double* yhat, const double *epsilon, 
-                  double* ghx, double* ghu, 
-                  const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, 
-                  const blas_int m, const blas_int n, const blas_int q, const blas_int s)
+void ss2Iteration(double* y, const double* yhat, const double *epsilon,
+                  double* ghx, double* ghu,
+                  const double* constant, const double* ghxx, const double* ghuu, const double* ghxu,
+                  const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads)
 {
-  const char transpose[2] = "N";
-  const double one = 1.0;
-  const blas_int ONE = 1;
-  
+  #ifndef FIRST_ORDER_LOOP
+      const char transpose[2] = "N";
+      const double one = 1.0;
+      const blas_int ONE = 1;
+  #endif
   vector<int> ii1, ii2, ii3;// vector indices for ghxx
   vector<int> jj1, jj2, jj3;// vector indices for ghuu
   set_vector_of_indices(n, m, ii1, ii2, ii3);
   set_vector_of_indices(q, m, jj1, jj2, jj3);
-  
+  #ifdef USE_OMP
+  #pragma omp parallel for num_threads(number_of_threads)
+  #endif
   for (int particle = 0; particle<s; particle++)
   {
     int particle_ = particle*m;
     int particle__ = particle*n;
     int particle___ = particle*q;
     memcpy(&y[particle_],&constant[0],m*sizeof(double));
-#ifndef FIRST_ORDER_LOOP
-    dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat[particle__],&ONE,&one,&y[particle_],&ONE);
-    dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y[particle_],&ONE);
-#endif
+    #ifndef FIRST_ORDER_LOOP
+        dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat[particle__],&ONE,&one,&y[particle_],&ONE);
+        dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y[particle_],&ONE);
+    #endif
     for (int variable = 0; variable<m; variable++)
       {
         int variable_ = variable + particle_;
         // +ghx*yhat+ghu*u
-#ifdef FIRST_ORDER_LOOP
-        for (int column = 0, column_=0; column<q; column++, column_ += m)
-          {
-            int i1 = variable+column_;
-            int i2 = column+particle__;
-            int i3 = column+particle___;
-            y[variable_] += ghx[i1]*yhat[i2];
-            y[variable_] += ghu[i1]*epsilon[i3];
-          }
-        for (int column = q, column_=q*m; column<n; column++, column_ += m)
-          {
-            y[variable_] += ghx[variable+column_]*yhat[column+particle__];
-          }
-#endif
+        #ifdef FIRST_ORDER_LOOP
+            for (int column = 0, column_=0; column<q; column++, column_ += m)
+	      {
+		int i1 = variable+column_;
+		int i2 = column+particle__;
+		int i3 = column+particle___;
+		y[variable_] += ghx[i1]*yhat[i2];
+		y[variable_] += ghu[i1]*epsilon[i3];
+	      }
+	    for (int column = q, column_=q*m; column<n; column++, column_ += m)
+	      {
+		y[variable_] += ghx[variable+column_]*yhat[column+particle__];
+	      }
+        #endif
         // +ghxx*kron(yhat,yhat)
-        for(int i=0; i<n*(n+1)/2; i++)
+	for(int i=0; i<n*(n+1)/2; i++)
           {
             int i1 = particle__+ii1[i];
             int i2 = particle__+ii2[i];
             if(i1==i2)
-              { 
+              {
                 y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1];
               }
             else
@@ -238,25 +244,25 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
   ** prhs[0] yhat          [double]  n*s array, time t particles.
   ** prhs[1] epsilon       [double]  q*s array, time t innovations.
   ** prhs[2] ghx           [double]  m*n array, first order reduced form.
-  ** prhs[3] ghu           [double]  m*q array, first order reduced form. 
+  ** prhs[3] ghu           [double]  m*q array, first order reduced form.
   ** prhs[4] constant      [double]  m*1 array, deterministic steady state + second order correction for the union of the states and observed variables.
   ** prhs[5] ghxx          [double]  m*n^2 array, second order reduced form.
   ** prhs[6] ghuu          [double]  m*q^2 array, second order reduced form.
   ** prhs[7] ghxu          [double]  m*nq array, second order reduced form.
-  ** prhs[8] yhat_         [double]  [OPTIONAL] n*s array, time t particles (pruning additional latent variables). 
+  ** prhs[8] yhat_         [double]  [OPTIONAL] n*s array, time t particles (pruning additional latent variables).
   ** prhs[9] ss            [double]  [OPTIONAL] m*1 array, steady state for the union of the states and the observed variables (needed for the pruning mode).
   **
   ** plhs[0] y             [double]  n*s array, time t+1 particles.
-  ** plhs[1] y_            [double]  n*s array, time t+1 particles for the pruning latent variables. 
+  ** plhs[1] y_            [double]  n*s array, time t+1 particles for the pruning latent variables.
   **
   */
 
   // Check the number of input and output.
-  if ((nrhs != 8) && (nrhs != 10))
+  if ((nrhs != 9) && (nrhs != 11))
     {
       mexErrMsgTxt("Eight or ten input arguments are required.");
     }
-  if (nlhs > 2) 
+  if (nlhs > 2)
     {
       mexErrMsgTxt("Too many output arguments.");
     }
@@ -265,15 +271,17 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
   mwSize s = mxGetN(prhs[0]);// Number of particles.
   mwSize q = mxGetM(prhs[1]);// Number of innovations.
   mwSize m = mxGetM(prhs[2]);// Number of elements in the union of states and observed variables.
+  //mexPrintf("\n s (the number of column of yhat) is equal to %d.", s);
+  //mexPrintf("\n The number of column of epsilon is %d.", mxGetN(prhs[1]));
   // Check the dimensions.
   if (
-      (s != mxGetN(prhs[1]))   || // Number of columns for epsilon 
+      (s != mxGetN(prhs[1]))   || // Number of columns for epsilon
       (n != mxGetN(prhs[2]))   || // Number of columns for ghx
       (m != mxGetM(prhs[3]))   || // Number of rows for ghu
       (q != mxGetN(prhs[3]))   || // Number of columns for ghu
       (m != mxGetM(prhs[4]))   || // Number of rows for 2nd order constant correction + deterministic steady state
       (m != mxGetM(prhs[5]))   || // Number of rows for ghxx
-      (n*n != mxGetN(prhs[5])) || // Number of columns for ghxx 
+      (n*n != mxGetN(prhs[5])) || // Number of columns for ghxx
       (m != mxGetM(prhs[6]))   || // Number of rows for ghuu
       (q*q != mxGetN(prhs[6])) || // Number of columns for ghuu
       (m != mxGetM(prhs[7]))   || // Number of rows for ghxu
@@ -282,7 +290,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
     {
       mexErrMsgTxt("Input dimension mismatch!.");
     }
-  if (nrhs>8)
+  if (nrhs>9)
     {
       if (
           (n != mxGetM(prhs[8]))   || // Number of rows for yhat_
@@ -303,25 +311,27 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
   double *ghuu = mxGetPr(prhs[6]);
   double *ghxu = mxGetPr(prhs[7]);
   double *yhat_, *ss;
-  if (nrhs>8)
+  if (nrhs>9)
     {
       yhat_ = mxGetPr(prhs[8]);
       ss = mxGetPr(prhs[9]);
-    } 
-  if (nrhs==8)
+    }
+  if (nrhs==9)
     {
+      int numthreads = (int) mxGetScalar(prhs[8]);
       double *y;
       plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
       y = mxGetPr(plhs[0]);
-      ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, (int) m, (int) n, (int) q, (int) s);
+      ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, (int) m, (int) n, (int) q, (int) s, numthreads);
     }
   else
     {
+      int numthreads = (int) mxGetScalar(prhs[10]);
       double *y, *y_;
       plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
       plhs[1] = mxCreateDoubleMatrix(m, s, mxREAL);
       y = mxGetPr(plhs[0]);
       y_ = mxGetPr(plhs[1]);
-      ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, (int) m, (int) n, (int) q, (int) s);
+      ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, (int) m, (int) n, (int) q, (int) s, numthreads);
     }
 }
diff --git a/tests/particle/dsge_base.mod b/tests/particle/dsge_base.mod
index f99c52c97ac91cd935cf7f06c270f215b09737d8..3c173e0137b79f24abf5a3598956235ecb607558 100644
--- a/tests/particle/dsge_base.mod
+++ b/tests/particle/dsge_base.mod
@@ -63,7 +63,14 @@ end;
 
 varobs y_obs l_obs i_obs;
 
-options_.particle_filter.algorithm = 'sequential_importance_particle_filter';
-options_.particle_filter.initialization = 1;
+options_.particle.status = 1;
+options_.particle.algorithm = 'sequential_importance_particle_filter';
+options_.particle.initialization = 1;
+particle.number_of_particles = 500;
 
-estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=8,mode_check);
+set_dynare_threads('local_state_space_iteration_2',2);
+
+//estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=6);
+//estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=8,mode_file=dsge_base_mode);
+//estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=8,mode_file=dsge_base_mode);
+estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=4,mode_file=dsge_base_mode,mode_check);
\ No newline at end of file