diff --git a/matlab/global_initialization.m b/matlab/global_initialization.m index d019777169362bee4f1bd988c98985c55624fd30..2ee53d90f3d7a75d351085da5a8509ce5c1d7be0 100644 --- a/matlab/global_initialization.m +++ b/matlab/global_initialization.m @@ -62,6 +62,7 @@ options_.mode_check_neighbourhood_size = 0.5; % Default number of threads for parallelized mex files. options_.threads.kronecker.A_times_B_kronecker_C = 1; options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = 1; +options_.threads.local_state_space_iteration_2 = 1; % steady state options_.jacobian_flag = 1; @@ -176,9 +177,9 @@ particle.algorithm = 'sequential_importance_particle_filter'; % Set the Gaussian approximation method particle.approximation_method = 'unscented'; % Set unscented parameters alpha, beta and kappa for gaussian approximation -particle.unscented.alpha = 1 ; -particle.unscented.beta = 2 ; -particle.unscented.kappa = 1 ; +particle.unscented.alpha = 1; +particle.unscented.beta = 2; +particle.unscented.kappa = 1; % Configuration of resampling in case of particles particle.resampling.status = 'systematic' ; % Choice of the resampling method diff --git a/matlab/particle/auxiliary_particle_filter.m b/matlab/particle/auxiliary_particle_filter.m index eb1130d8fdb2846e1e7b0c62149ab3628c5193c8..77777c85005c0aaa856964ac2429e2cb3ba001db 100644 --- a/matlab/particle/auxiliary_particle_filter.m +++ b/matlab/particle/auxiliary_particle_filter.m @@ -20,7 +20,7 @@ function [LIK,lik] = auxiliary_particle_filter(ReducedForm,Y,start,DynareOptions % NOTES % The vector "lik" is used to evaluate the jacobian of the likelihood. -% Copyright (C) 2009-2010 Dynare Team +% Copyright (C) 2011, 2012 Dynare Team % % This file is part of Dynare. % @@ -94,7 +94,7 @@ weights = ones(1,number_of_particles); StateVectors = bsxfun(@plus,StateVectorVarianceSquareRoot*randn(state_variance_rank,number_of_particles),StateVectorMean); for t=1:sample_size yhat = bsxfun(@minus,StateVectors,state_variables_steady_state); - tmp = local_state_space_iteration_2(yhat,zeros(number_of_structural_innovations,number_of_particles),ghx,ghu,constant,ghxx,ghuu,ghxu); + tmp = local_state_space_iteration_2(yhat,zeros(number_of_structural_innovations,number_of_particles),ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2); PredictedObservedMean = mean(tmp(mf1,:),2); PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:)); dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean); @@ -104,17 +104,17 @@ for t=1:sample_size sum_tau_tilde = sum(tau_tilde) ; lik(t) = log(sum_tau_tilde) ; tau_tilde = tau_tilde/sum_tau_tilde; - indx_resmpl = resample(tau_tilde) ; - yhat = yhat(:,indx_resmpl) ; - wtilde = wtilde(indx_resmpl) ; + indx_resmpl = resample(tau_tilde); + yhat = yhat(:,indx_resmpl); + wtilde = wtilde(indx_resmpl); epsilon = Q_lower_triangular_cholesky*randn(number_of_structural_innovations,number_of_particles); - tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu); - StateVectors = tmp(mf0,:) ; + tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2); + StateVectors = tmp(mf0,:); PredictedObservedMean = mean(tmp(mf1,:),2); PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:)); dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean); PredictedObservedVariance = (dPredictedObservedMean*dPredictedObservedMean')/number_of_particles+H; - lnw = exp(-.5*(const_lik+log(det(PredictedObservedVariance))+sum(PredictionError.*(PredictedObservedVariance\PredictionError),1))) ; + lnw = exp(-.5*(const_lik+log(det(PredictedObservedVariance))+sum(PredictionError.*(PredictedObservedVariance\PredictionError),1))); wtilde = lnw./wtilde; weights = wtilde/sum(wtilde); end diff --git a/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m b/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m index be4b07b84d9ff70364e74a13ddc1aedbfddb1444..778e2d075b4334fd7bb778faf431be7208315e69 100644 --- a/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m +++ b/matlab/particle/local_state_space_iteration/local_state_space_iteration_2.m @@ -1,4 +1,4 @@ -function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss) +function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,a,b,c)%yhat_,ss) %@info: %! @deftypefn {Function File} {@var{y}, @var{y_} =} local_state_equation_2 (@var{yhat},@var{epsilon}, @var{ghx}, @var{ghu}, @var{constant}, @var{ghxx}, @var{ghuu}, @var{ghxu}, @var{yhat_}, @var{ss}) @@ -78,15 +78,13 @@ function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,gh % AUTHOR(S) stephane DOT adjemian AT univ DASH lemans DOT fr % frederic DOT karame AT univ DASH evry DOT fr -number_of_threads = 1; - -if nargin==8 - pruning = 0; +if nargin==9 + pruning = 0; numthreads = a; if nargout>1 error('local_state_space_iteration_2:: Numbers of input and output argument are inconsistent!') end -elseif nargin==10 - pruning = 1; +elseif nargin==11 + pruning = 1; numthreads = c; yhat_ = a; ss = b; if nargout~=2 error('local_state_space_iteration_2:: Numbers of input and output argument are inconsistent!') end @@ -94,6 +92,8 @@ else error('local_state_space_iteration_2:: Wrong number of input arguments!') end +number_of_threads = numthreads; + switch pruning case 0 for i =1:size(yhat,2) @@ -130,8 +130,8 @@ end %$ %$ % Call the tested routine. %$ for i=1:10 -%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu); -%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss); +%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); +%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1); %$ end %$ %$ % Check the results. @@ -168,12 +168,12 @@ end %$ %$ % Call the tested routine. %$ try -%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu); +%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); %$ catch %$ t(1) = 0; %$ end %$ try -%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss); +%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1); %$ catch %$ t(2) = 0; %$ end @@ -200,8 +200,8 @@ end %$ ss = ones(n,1); %$ %$ % Call the tested routine (mex version). -%$ y1a = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu); -%$ [y2a,y2a_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss); +%$ y1a = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); +%$ [y2a,y2a_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1); %$ %$ % Call the tested routine (matlab version) %$ path_to_mex = fileparts(which(['qmc_sequence.' mexext])); @@ -210,8 +210,8 @@ end %$ tar('local_state_space_iteration_2.tar',['local_state_space_iteration_2.' mexext]); %$ cd(where_am_i_coming_from); %$ dynare_config([],0); -%$ y1b = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu); -%$ [y2b,y2b_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss); +%$ y1b = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); +%$ [y2b,y2b_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1); %$ cd(path_to_mex); %$ untar('local_state_space_iteration_2.tar'); %$ delete('local_state_space_iteration_2.tar'); @@ -227,3 +227,49 @@ end %$ T = all(t); %$ end %@eof:3 + + +%@test:4 +%$ % TIMING TEST (parallelization with openmp) +%$ old_path = pwd; +%$ cd([fileparts(which('dynare')) '/../tests/']); +%$ dynare('dsge_base2'); +%$ load dsge_base2; +%$ cd(old_path); +%$ dr = oo_.dr; +%$ clear('oo_','options_','M_'); +%$ delete([fileparts(which('dynare')) '/../tests/dsge_base2.mat']); +%$ istates = dr.nstatic+(1:dr.npred); +%$ n = dr.npred; +%$ q = size(dr.ghu,2); +%$ yhat = zeros(n,10000000); +%$ epsilon = zeros(q,10000000); +%$ ghx = dr.ghx(istates,:); +%$ ghu = dr.ghu(istates,:); +%$ constant = dr.ys(istates,:)+dr.ghs2(istates,:); +%$ ghxx = dr.ghxx(istates,:); +%$ ghuu = dr.ghuu(istates,:); +%$ ghxu = dr.ghxu(istates,:); +%$ yhat_ = zeros(n,10000000); +%$ ss = dr.ys(istates,:); +%$ +%$ t = NaN(4,1); +%$ tic, for i=1:10, y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); end +%$ t1 = toc; +%$ tic, for i=1:10, y2 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,2); end +%$ t2 = toc; +%$ t(1) = dyn_assert(y1,y2,1e-15); clear('y1'); +%$ tic, for i=1:10, y3 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,3); end +%$ t3 = toc; +%$ t(2) = dyn_assert(y2,y3,1e-15); clear('y2'); +%$ tic, for i=1:10, y4 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,4); end +%$ t4 = toc; +%$ t(3) = dyn_assert(y4,y3,1e-15); clear('y3','y4'); +%$ t(4) = (t1>t2) && (t2>t3) && (t3>t4); +%$ if ~t(4) +%$ disp('Timmings:') +%$ [t1, t2, t3, t4] +%$ end +%$ % Check the results. +%$ T = all(t); +%@eof:4 diff --git a/matlab/particle/sequential_importance_particle_filter.m b/matlab/particle/sequential_importance_particle_filter.m index db89ff4e4ea6ff19220640db9f6f1f95b93736f0..cbf675237030ba8b473837a6f222cd95a3d14023 100644 --- a/matlab/particle/sequential_importance_particle_filter.m +++ b/matlab/particle/sequential_importance_particle_filter.m @@ -120,7 +120,7 @@ StateVectors = bsxfun(@plus,StateVectorVarianceSquareRoot*randn(state_variance_r for t=1:sample_size yhat = bsxfun(@minus,StateVectors,state_variables_steady_state); epsilon = Q_lower_triangular_cholesky*randn(number_of_structural_innovations,number_of_particles); - tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu); + tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2); PredictedObservedMean = mean(tmp(mf1,:),2); PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:)); dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean); diff --git a/matlab/set_dynare_threads.m b/matlab/set_dynare_threads.m index a25bfbc3ca9ecca4fb9be965d5d9222d472fa219..c113b936951b1bf5fc13ae15e08b799176aa9a15 100644 --- a/matlab/set_dynare_threads.m +++ b/matlab/set_dynare_threads.m @@ -1,13 +1,13 @@ function set_dynare_threads(mexname,n) % This function sets the number of threads used by some MEX files when compiled -% with OpenMP support (i.e with --enable-openmp is given to configure) or any +% with OpenMP support (i.e with --enable-openmp is given to configure) or any % other parallel library. % -% INPUTS -% o mexname [string] Name of the mex file. +% INPUTS +% o mexname [string] Name of the mex file. % o n [integer] scalar specifying the number of threads to be used. % -% OUTPUTS +% OUTPUTS % none. % Copyright (C) 2009-2010 Dynare Team @@ -41,6 +41,8 @@ switch mexname options_.threads.kronecker.A_times_B_kronecker_C = n; case 'sparse_hessian_times_B_kronecker_C' options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = n; + case 'local_state_space_iteration_2' + options_.threads.local_state_space_iteration_2 = n; otherwise message = [ mexname ' is not a known parallel mex file.' ]; message_id = 'Dynare:Threads:UnknownParallelMex'; diff --git a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc index ae1667b43669cb97e447ca6a5ca38b99a944ac3d..6943b782b52f5068e573cc4fb5cc4424ab9bf31e 100644 --- a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc +++ b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Dynare Team + * Copyright (C) 2010-2012 Dynare Team * * This file is part of Dynare. * @@ -18,7 +18,7 @@ */ /* - * This mex file computes particles at time t+1 given particles and innovations at time t, + * This mex file computes particles at time t+1 given particles and innovations at time t, * using a second order approximation of the nonlinear state space model. */ @@ -34,7 +34,7 @@ using namespace std; -//#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon +#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon void set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2, vector<int> &v3) { @@ -54,20 +54,23 @@ void set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int } } -void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const double* yhat1, const double *epsilon, - double* ghx, double* ghu, - const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, const double* ss, - const blas_int m, const blas_int n, const blas_int q, const blas_int s) +void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const double* yhat1, const double *epsilon, + double* ghx, double* ghu, + const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, const double* ss, + const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads) { - const char transpose[2] = "N"; - const double one = 1.0; - const blas_int ONE = 1; - + #ifndef FIRST_ORDER_LOOP + const char transpose[2] = "N"; + const double one = 1.0; + const blas_int ONE = 1; + #endif vector<int> ii1, ii2, ii3;// vector indices for ghxx vector<int> jj1, jj2, jj3;// vector indices for ghuu set_vector_of_indices(n, m, ii1, ii2, ii3); set_vector_of_indices(q, m, jj1, jj2, jj3); - + #ifdef USE_OMP + #pragma omp parallel for num_threads(number_of_threads) + #endif for (int particle = 0; particle<s; particle++) { int particle_ = particle*m; @@ -75,35 +78,35 @@ void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const dou int particle___ = particle*q; memcpy(&y2[particle_],&constant[0],m*sizeof(double)); memcpy(&y1[particle_],&ss[0],m*sizeof(double)); -#ifndef FIRST_ORDER_LOOP - dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat2[particle__],&ONE,&one,&y2[particle_],&ONE); - dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y2[particle_],&ONE); -#endif + #ifndef FIRST_ORDER_LOOP + dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat2[particle__],&ONE,&one,&y2[particle_],&ONE); + dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y2[particle_],&ONE); + #endif for (int variable = 0; variable<m; variable++) { int variable_ = variable + particle_; // +ghx*yhat2+ghu*u -#ifdef FIRST_ORDER_LOOP - for (int column = 0, column_=0; column<q; column++, column_ += m) - { - int i1 = variable+column_; - int i2 = column+particle__; - int i3 = column+particle___; - y2[variable_] += ghx[i1]*yhat2[i2]; - y2[variable_] += ghu[i1]*epsilon[i3]; - } - for (int column = q, column_=q*m; column<n; column++, column_ += m) - { - y2[variable_] += ghx[variable+column_]*yhat2[column+particle__]; - } -#endif + #ifdef FIRST_ORDER_LOOP + for (int column = 0, column_=0; column<q; column++, column_ += m) + { + int i1 = variable+column_; + int i2 = column+particle__; + int i3 = column+particle___; + y2[variable_] += ghx[i1]*yhat2[i2]; + y2[variable_] += ghu[i1]*epsilon[i3]; + } + for (int column = q, column_=q*m; column<n; column++, column_ += m) + { + y2[variable_] += ghx[variable+column_]*yhat2[column+particle__]; + } + #endif // +ghxx*kron(yhat1,yhat1) for(int i=0; i<n*(n+1)/2; i++) { int i1 = particle__+ii1[i]; int i2 = particle__+ii2[i]; if(i1==i2) - { + { y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1]; } else @@ -129,77 +132,80 @@ void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const dou for (int v = particle__, i = 0; v<particle__+n; v++) for (int s = particle___; s<particle___+q; s++, i += m) y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat2[v]; -#ifdef FIRST_ORDER_LOOP - for (int column = 0, column_=0; column<q; column++, column_ += m) - { - int i1 = variable+column_; - int i2 = column+particle__; - int i3 = column+particle___; - y1[variable_] += ghx[i1]*yhat1[i2]; - y1[variable_] += ghu[i1]*epsilon[i3]; - } - for (int column = q, column_=q*m; column<n; column++, column_ += m) - { - y1[variable_] += ghx[variable+column_]*yhat1[column+particle__]; - } -#endif + #ifdef FIRST_ORDER_LOOP + for (int column = 0, column_=0; column<q; column++, column_ += m) + { + int i1 = variable+column_; + int i2 = column+particle__; + int i3 = column+particle___; + y1[variable_] += ghx[i1]*yhat1[i2]; + y1[variable_] += ghu[i1]*epsilon[i3]; + } + for (int column = q, column_=q*m; column<n; column++, column_ += m) + { + y1[variable_] += ghx[variable+column_]*yhat1[column+particle__]; + } + #endif } -#ifndef FIRST_ORDER_LOOP - dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat1[particle__],&ONE,&one,&y1[particle_],&ONE); - dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y1[particle_],&ONE); -#endif + #ifndef FIRST_ORDER_LOOP + dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat1[particle__],&ONE,&one,&y1[particle_],&ONE); + dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y1[particle_],&ONE); + #endif } } -void ss2Iteration(double* y, const double* yhat, const double *epsilon, - double* ghx, double* ghu, - const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, - const blas_int m, const blas_int n, const blas_int q, const blas_int s) +void ss2Iteration(double* y, const double* yhat, const double *epsilon, + double* ghx, double* ghu, + const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, + const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads) { - const char transpose[2] = "N"; - const double one = 1.0; - const blas_int ONE = 1; - + #ifndef FIRST_ORDER_LOOP + const char transpose[2] = "N"; + const double one = 1.0; + const blas_int ONE = 1; + #endif vector<int> ii1, ii2, ii3;// vector indices for ghxx vector<int> jj1, jj2, jj3;// vector indices for ghuu set_vector_of_indices(n, m, ii1, ii2, ii3); set_vector_of_indices(q, m, jj1, jj2, jj3); - + #ifdef USE_OMP + #pragma omp parallel for num_threads(number_of_threads) + #endif for (int particle = 0; particle<s; particle++) { int particle_ = particle*m; int particle__ = particle*n; int particle___ = particle*q; memcpy(&y[particle_],&constant[0],m*sizeof(double)); -#ifndef FIRST_ORDER_LOOP - dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat[particle__],&ONE,&one,&y[particle_],&ONE); - dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y[particle_],&ONE); -#endif + #ifndef FIRST_ORDER_LOOP + dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat[particle__],&ONE,&one,&y[particle_],&ONE); + dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y[particle_],&ONE); + #endif for (int variable = 0; variable<m; variable++) { int variable_ = variable + particle_; // +ghx*yhat+ghu*u -#ifdef FIRST_ORDER_LOOP - for (int column = 0, column_=0; column<q; column++, column_ += m) - { - int i1 = variable+column_; - int i2 = column+particle__; - int i3 = column+particle___; - y[variable_] += ghx[i1]*yhat[i2]; - y[variable_] += ghu[i1]*epsilon[i3]; - } - for (int column = q, column_=q*m; column<n; column++, column_ += m) - { - y[variable_] += ghx[variable+column_]*yhat[column+particle__]; - } -#endif + #ifdef FIRST_ORDER_LOOP + for (int column = 0, column_=0; column<q; column++, column_ += m) + { + int i1 = variable+column_; + int i2 = column+particle__; + int i3 = column+particle___; + y[variable_] += ghx[i1]*yhat[i2]; + y[variable_] += ghu[i1]*epsilon[i3]; + } + for (int column = q, column_=q*m; column<n; column++, column_ += m) + { + y[variable_] += ghx[variable+column_]*yhat[column+particle__]; + } + #endif // +ghxx*kron(yhat,yhat) - for(int i=0; i<n*(n+1)/2; i++) + for(int i=0; i<n*(n+1)/2; i++) { int i1 = particle__+ii1[i]; int i2 = particle__+ii2[i]; if(i1==i2) - { + { y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1]; } else @@ -238,25 +244,25 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) ** prhs[0] yhat [double] n*s array, time t particles. ** prhs[1] epsilon [double] q*s array, time t innovations. ** prhs[2] ghx [double] m*n array, first order reduced form. - ** prhs[3] ghu [double] m*q array, first order reduced form. + ** prhs[3] ghu [double] m*q array, first order reduced form. ** prhs[4] constant [double] m*1 array, deterministic steady state + second order correction for the union of the states and observed variables. ** prhs[5] ghxx [double] m*n^2 array, second order reduced form. ** prhs[6] ghuu [double] m*q^2 array, second order reduced form. ** prhs[7] ghxu [double] m*nq array, second order reduced form. - ** prhs[8] yhat_ [double] [OPTIONAL] n*s array, time t particles (pruning additional latent variables). + ** prhs[8] yhat_ [double] [OPTIONAL] n*s array, time t particles (pruning additional latent variables). ** prhs[9] ss [double] [OPTIONAL] m*1 array, steady state for the union of the states and the observed variables (needed for the pruning mode). ** ** plhs[0] y [double] n*s array, time t+1 particles. - ** plhs[1] y_ [double] n*s array, time t+1 particles for the pruning latent variables. + ** plhs[1] y_ [double] n*s array, time t+1 particles for the pruning latent variables. ** */ // Check the number of input and output. - if ((nrhs != 8) && (nrhs != 10)) + if ((nrhs != 9) && (nrhs != 11)) { mexErrMsgTxt("Eight or ten input arguments are required."); } - if (nlhs > 2) + if (nlhs > 2) { mexErrMsgTxt("Too many output arguments."); } @@ -265,15 +271,17 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) mwSize s = mxGetN(prhs[0]);// Number of particles. mwSize q = mxGetM(prhs[1]);// Number of innovations. mwSize m = mxGetM(prhs[2]);// Number of elements in the union of states and observed variables. + //mexPrintf("\n s (the number of column of yhat) is equal to %d.", s); + //mexPrintf("\n The number of column of epsilon is %d.", mxGetN(prhs[1])); // Check the dimensions. if ( - (s != mxGetN(prhs[1])) || // Number of columns for epsilon + (s != mxGetN(prhs[1])) || // Number of columns for epsilon (n != mxGetN(prhs[2])) || // Number of columns for ghx (m != mxGetM(prhs[3])) || // Number of rows for ghu (q != mxGetN(prhs[3])) || // Number of columns for ghu (m != mxGetM(prhs[4])) || // Number of rows for 2nd order constant correction + deterministic steady state (m != mxGetM(prhs[5])) || // Number of rows for ghxx - (n*n != mxGetN(prhs[5])) || // Number of columns for ghxx + (n*n != mxGetN(prhs[5])) || // Number of columns for ghxx (m != mxGetM(prhs[6])) || // Number of rows for ghuu (q*q != mxGetN(prhs[6])) || // Number of columns for ghuu (m != mxGetM(prhs[7])) || // Number of rows for ghxu @@ -282,7 +290,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { mexErrMsgTxt("Input dimension mismatch!."); } - if (nrhs>8) + if (nrhs>9) { if ( (n != mxGetM(prhs[8])) || // Number of rows for yhat_ @@ -303,25 +311,27 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) double *ghuu = mxGetPr(prhs[6]); double *ghxu = mxGetPr(prhs[7]); double *yhat_, *ss; - if (nrhs>8) + if (nrhs>9) { yhat_ = mxGetPr(prhs[8]); ss = mxGetPr(prhs[9]); - } - if (nrhs==8) + } + if (nrhs==9) { + int numthreads = (int) mxGetScalar(prhs[8]); double *y; plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL); y = mxGetPr(plhs[0]); - ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, (int) m, (int) n, (int) q, (int) s); + ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, (int) m, (int) n, (int) q, (int) s, numthreads); } else { + int numthreads = (int) mxGetScalar(prhs[10]); double *y, *y_; plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL); plhs[1] = mxCreateDoubleMatrix(m, s, mxREAL); y = mxGetPr(plhs[0]); y_ = mxGetPr(plhs[1]); - ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, (int) m, (int) n, (int) q, (int) s); + ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, (int) m, (int) n, (int) q, (int) s, numthreads); } } diff --git a/tests/particle/dsge_base.mod b/tests/particle/dsge_base.mod index f99c52c97ac91cd935cf7f06c270f215b09737d8..3c173e0137b79f24abf5a3598956235ecb607558 100644 --- a/tests/particle/dsge_base.mod +++ b/tests/particle/dsge_base.mod @@ -63,7 +63,14 @@ end; varobs y_obs l_obs i_obs; -options_.particle_filter.algorithm = 'sequential_importance_particle_filter'; -options_.particle_filter.initialization = 1; +options_.particle.status = 1; +options_.particle.algorithm = 'sequential_importance_particle_filter'; +options_.particle.initialization = 1; +particle.number_of_particles = 500; -estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=8,mode_check); +set_dynare_threads('local_state_space_iteration_2',2); + +//estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=6); +//estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=8,mode_file=dsge_base_mode); +//estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=8,mode_file=dsge_base_mode); +estimation(datafile=data_benchmark,order=2,nobs=100,mh_replic=0,mode_compute=4,mode_file=dsge_base_mode,mode_check); \ No newline at end of file