Commit a2f3a536 authored by Stéphane Adjemian's avatar Stéphane Adjemian
Browse files

Parallelization of local_state_space_iteration_2 (used in non linear filters).

parent 6c091015
......@@ -62,6 +62,7 @@ options_.mode_check_neighbourhood_size = 0.5;
% Default number of threads for parallelized mex files.
options_.threads.kronecker.A_times_B_kronecker_C = 1;
options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = 1;
options_.threads.local_state_space_iteration_2 = 1;
% steady state
options_.jacobian_flag = 1;
......@@ -176,9 +177,9 @@ particle.algorithm = 'sequential_importance_particle_filter';
% Set the Gaussian approximation method
particle.approximation_method = 'unscented';
% Set unscented parameters alpha, beta and kappa for gaussian approximation
particle.unscented.alpha = 1 ;
particle.unscented.beta = 2 ;
particle.unscented.kappa = 1 ;
particle.unscented.alpha = 1;
particle.unscented.beta = 2;
particle.unscented.kappa = 1;
% Configuration of resampling in case of particles
particle.resampling.status = 'systematic' ;
% Choice of the resampling method
......
......@@ -20,7 +20,7 @@ function [LIK,lik] = auxiliary_particle_filter(ReducedForm,Y,start,DynareOptions
% NOTES
% The vector "lik" is used to evaluate the jacobian of the likelihood.
% Copyright (C) 2009-2010 Dynare Team
% Copyright (C) 2011, 2012 Dynare Team
%
% This file is part of Dynare.
%
......@@ -94,7 +94,7 @@ weights = ones(1,number_of_particles);
StateVectors = bsxfun(@plus,StateVectorVarianceSquareRoot*randn(state_variance_rank,number_of_particles),StateVectorMean);
for t=1:sample_size
yhat = bsxfun(@minus,StateVectors,state_variables_steady_state);
tmp = local_state_space_iteration_2(yhat,zeros(number_of_structural_innovations,number_of_particles),ghx,ghu,constant,ghxx,ghuu,ghxu);
tmp = local_state_space_iteration_2(yhat,zeros(number_of_structural_innovations,number_of_particles),ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2);
PredictedObservedMean = mean(tmp(mf1,:),2);
PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:));
dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean);
......@@ -104,17 +104,17 @@ for t=1:sample_size
sum_tau_tilde = sum(tau_tilde) ;
lik(t) = log(sum_tau_tilde) ;
tau_tilde = tau_tilde/sum_tau_tilde;
indx_resmpl = resample(tau_tilde) ;
yhat = yhat(:,indx_resmpl) ;
wtilde = wtilde(indx_resmpl) ;
indx_resmpl = resample(tau_tilde);
yhat = yhat(:,indx_resmpl);
wtilde = wtilde(indx_resmpl);
epsilon = Q_lower_triangular_cholesky*randn(number_of_structural_innovations,number_of_particles);
tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
StateVectors = tmp(mf0,:) ;
tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2);
StateVectors = tmp(mf0,:);
PredictedObservedMean = mean(tmp(mf1,:),2);
PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:));
dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean);
PredictedObservedVariance = (dPredictedObservedMean*dPredictedObservedMean')/number_of_particles+H;
lnw = exp(-.5*(const_lik+log(det(PredictedObservedVariance))+sum(PredictionError.*(PredictedObservedVariance\PredictionError),1))) ;
lnw = exp(-.5*(const_lik+log(det(PredictedObservedVariance))+sum(PredictionError.*(PredictedObservedVariance\PredictionError),1)));
wtilde = lnw./wtilde;
weights = wtilde/sum(wtilde);
end
......
function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss)
function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,a,b,c)%yhat_,ss)
%@info:
%! @deftypefn {Function File} {@var{y}, @var{y_} =} local_state_equation_2 (@var{yhat},@var{epsilon}, @var{ghx}, @var{ghu}, @var{constant}, @var{ghxx}, @var{ghuu}, @var{ghxu}, @var{yhat_}, @var{ss})
......@@ -78,15 +78,13 @@ function [y,y_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,gh
% AUTHOR(S) stephane DOT adjemian AT univ DASH lemans DOT fr
% frederic DOT karame AT univ DASH evry DOT fr
number_of_threads = 1;
if nargin==8
pruning = 0;
if nargin==9
pruning = 0; numthreads = a;
if nargout>1
error('local_state_space_iteration_2:: Numbers of input and output argument are inconsistent!')
end
elseif nargin==10
pruning = 1;
elseif nargin==11
pruning = 1; numthreads = c; yhat_ = a; ss = b;
if nargout~=2
error('local_state_space_iteration_2:: Numbers of input and output argument are inconsistent!')
end
......@@ -94,6 +92,8 @@ else
error('local_state_space_iteration_2:: Wrong number of input arguments!')
end
number_of_threads = numthreads;
switch pruning
case 0
for i =1:size(yhat,2)
......@@ -130,8 +130,8 @@ end
%$
%$ % Call the tested routine.
%$ for i=1:10
%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
%$ end
%$
%$ % Check the results.
......@@ -168,12 +168,12 @@ end
%$
%$ % Call the tested routine.
%$ try
%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
%$ y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
%$ catch
%$ t(1) = 0;
%$ end
%$ try
%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
%$ [y2,y2_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
%$ catch
%$ t(2) = 0;
%$ end
......@@ -200,8 +200,8 @@ end
%$ ss = ones(n,1);
%$
%$ % Call the tested routine (mex version).
%$ y1a = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
%$ [y2a,y2a_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
%$ y1a = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
%$ [y2a,y2a_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
%$
%$ % Call the tested routine (matlab version)
%$ path_to_mex = fileparts(which(['qmc_sequence.' mexext]));
......@@ -210,8 +210,8 @@ end
%$ tar('local_state_space_iteration_2.tar',['local_state_space_iteration_2.' mexext]);
%$ cd(where_am_i_coming_from);
%$ dynare_config([],0);
%$ y1b = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
%$ [y2b,y2b_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss);
%$ y1b = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1);
%$ [y2b,y2b_] = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,yhat_,ss,1);
%$ cd(path_to_mex);
%$ untar('local_state_space_iteration_2.tar');
%$ delete('local_state_space_iteration_2.tar');
......@@ -227,3 +227,49 @@ end
%$ T = all(t);
%$ end
%@eof:3
%@test:4
%$ % TIMING TEST (parallelization with openmp)
%$ old_path = pwd;
%$ cd([fileparts(which('dynare')) '/../tests/']);
%$ dynare('dsge_base2');
%$ load dsge_base2;
%$ cd(old_path);
%$ dr = oo_.dr;
%$ clear('oo_','options_','M_');
%$ delete([fileparts(which('dynare')) '/../tests/dsge_base2.mat']);
%$ istates = dr.nstatic+(1:dr.npred);
%$ n = dr.npred;
%$ q = size(dr.ghu,2);
%$ yhat = zeros(n,10000000);
%$ epsilon = zeros(q,10000000);
%$ ghx = dr.ghx(istates,:);
%$ ghu = dr.ghu(istates,:);
%$ constant = dr.ys(istates,:)+dr.ghs2(istates,:);
%$ ghxx = dr.ghxx(istates,:);
%$ ghuu = dr.ghuu(istates,:);
%$ ghxu = dr.ghxu(istates,:);
%$ yhat_ = zeros(n,10000000);
%$ ss = dr.ys(istates,:);
%$
%$ t = NaN(4,1);
%$ tic, for i=1:10, y1 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,1); end
%$ t1 = toc;
%$ tic, for i=1:10, y2 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,2); end
%$ t2 = toc;
%$ t(1) = dyn_assert(y1,y2,1e-15); clear('y1');
%$ tic, for i=1:10, y3 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,3); end
%$ t3 = toc;
%$ t(2) = dyn_assert(y2,y3,1e-15); clear('y2');
%$ tic, for i=1:10, y4 = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,4); end
%$ t4 = toc;
%$ t(3) = dyn_assert(y4,y3,1e-15); clear('y3','y4');
%$ t(4) = (t1>t2) && (t2>t3) && (t3>t4);
%$ if ~t(4)
%$ disp('Timmings:')
%$ [t1, t2, t3, t4]
%$ end
%$ % Check the results.
%$ T = all(t);
%@eof:4
......@@ -120,7 +120,7 @@ StateVectors = bsxfun(@plus,StateVectorVarianceSquareRoot*randn(state_variance_r
for t=1:sample_size
yhat = bsxfun(@minus,StateVectors,state_variables_steady_state);
epsilon = Q_lower_triangular_cholesky*randn(number_of_structural_innovations,number_of_particles);
tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu);
tmp = local_state_space_iteration_2(yhat,epsilon,ghx,ghu,constant,ghxx,ghuu,ghxu,DynareOptions.threads.local_state_space_iteration_2);
PredictedObservedMean = mean(tmp(mf1,:),2);
PredictionError = bsxfun(@minus,Y(:,t),tmp(mf1,:));
dPredictedObservedMean = bsxfun(@minus,tmp(mf1,:),PredictedObservedMean);
......
function set_dynare_threads(mexname,n)
% This function sets the number of threads used by some MEX files when compiled
% with OpenMP support (i.e with --enable-openmp is given to configure) or any
% with OpenMP support (i.e with --enable-openmp is given to configure) or any
% other parallel library.
%
% INPUTS
% o mexname [string] Name of the mex file.
% INPUTS
% o mexname [string] Name of the mex file.
% o n [integer] scalar specifying the number of threads to be used.
%
% OUTPUTS
% OUTPUTS
% none.
% Copyright (C) 2009-2010 Dynare Team
......@@ -41,6 +41,8 @@ switch mexname
options_.threads.kronecker.A_times_B_kronecker_C = n;
case 'sparse_hessian_times_B_kronecker_C'
options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = n;
case 'local_state_space_iteration_2'
options_.threads.local_state_space_iteration_2 = n;
otherwise
message = [ mexname ' is not a known parallel mex file.' ];
message_id = 'Dynare:Threads:UnknownParallelMex';
......
/*
* Copyright (C) 2010 Dynare Team
* Copyright (C) 2010-2012 Dynare Team
*
* This file is part of Dynare.
*
......@@ -18,7 +18,7 @@
*/
/*
* This mex file computes particles at time t+1 given particles and innovations at time t,
* This mex file computes particles at time t+1 given particles and innovations at time t,
* using a second order approximation of the nonlinear state space model.
*/
......@@ -34,7 +34,7 @@
using namespace std;
//#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
void set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2, vector<int> &v3)
{
......@@ -54,20 +54,23 @@ void set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int
}
}
void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const double* yhat1, const double *epsilon,
double* ghx, double* ghu,
const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, const double* ss,
const blas_int m, const blas_int n, const blas_int q, const blas_int s)
void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const double* yhat1, const double *epsilon,
double* ghx, double* ghu,
const double* constant, const double* ghxx, const double* ghuu, const double* ghxu, const double* ss,
const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads)
{
const char transpose[2] = "N";
const double one = 1.0;
const blas_int ONE = 1;
#ifndef FIRST_ORDER_LOOP
const char transpose[2] = "N";
const double one = 1.0;
const blas_int ONE = 1;
#endif
vector<int> ii1, ii2, ii3;// vector indices for ghxx
vector<int> jj1, jj2, jj3;// vector indices for ghuu
set_vector_of_indices(n, m, ii1, ii2, ii3);
set_vector_of_indices(q, m, jj1, jj2, jj3);
#ifdef USE_OMP
#pragma omp parallel for num_threads(number_of_threads)
#endif
for (int particle = 0; particle<s; particle++)
{
int particle_ = particle*m;
......@@ -75,35 +78,35 @@ void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const dou
int particle___ = particle*q;
memcpy(&y2[particle_],&constant[0],m*sizeof(double));
memcpy(&y1[particle_],&ss[0],m*sizeof(double));
#ifndef FIRST_ORDER_LOOP
dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat2[particle__],&ONE,&one,&y2[particle_],&ONE);
dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y2[particle_],&ONE);
#endif
#ifndef FIRST_ORDER_LOOP
dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat2[particle__],&ONE,&one,&y2[particle_],&ONE);
dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y2[particle_],&ONE);
#endif
for (int variable = 0; variable<m; variable++)
{
int variable_ = variable + particle_;
// +ghx*yhat2+ghu*u
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_=0; column<q; column++, column_ += m)
{
int i1 = variable+column_;
int i2 = column+particle__;
int i3 = column+particle___;
y2[variable_] += ghx[i1]*yhat2[i2];
y2[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_=q*m; column<n; column++, column_ += m)
{
y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
}
#endif
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_=0; column<q; column++, column_ += m)
{
int i1 = variable+column_;
int i2 = column+particle__;
int i3 = column+particle___;
y2[variable_] += ghx[i1]*yhat2[i2];
y2[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_=q*m; column<n; column++, column_ += m)
{
y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
}
#endif
// +ghxx*kron(yhat1,yhat1)
for(int i=0; i<n*(n+1)/2; i++)
{
int i1 = particle__+ii1[i];
int i2 = particle__+ii2[i];
if(i1==i2)
{
{
y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1];
}
else
......@@ -129,77 +132,80 @@ void ss2Iteration_pruning(double* y2, double* y1, const double* yhat2, const dou
for (int v = particle__, i = 0; v<particle__+n; v++)
for (int s = particle___; s<particle___+q; s++, i += m)
y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat2[v];
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_=0; column<q; column++, column_ += m)
{
int i1 = variable+column_;
int i2 = column+particle__;
int i3 = column+particle___;
y1[variable_] += ghx[i1]*yhat1[i2];
y1[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_=q*m; column<n; column++, column_ += m)
{
y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
}
#endif
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_=0; column<q; column++, column_ += m)
{
int i1 = variable+column_;
int i2 = column+particle__;
int i3 = column+particle___;
y1[variable_] += ghx[i1]*yhat1[i2];
y1[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_=q*m; column<n; column++, column_ += m)
{
y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
}
#endif
}
#ifndef FIRST_ORDER_LOOP
dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat1[particle__],&ONE,&one,&y1[particle_],&ONE);
dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y1[particle_],&ONE);
#endif
#ifndef FIRST_ORDER_LOOP
dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat1[particle__],&ONE,&one,&y1[particle_],&ONE);
dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y1[particle_],&ONE);
#endif
}
}
void ss2Iteration(double* y, const double* yhat, const double *epsilon,
double* ghx, double* ghu,
const double* constant, const double* ghxx, const double* ghuu, const double* ghxu,
const blas_int m, const blas_int n, const blas_int q, const blas_int s)
void ss2Iteration(double* y, const double* yhat, const double *epsilon,
double* ghx, double* ghu,
const double* constant, const double* ghxx, const double* ghuu, const double* ghxu,
const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads)
{
const char transpose[2] = "N";
const double one = 1.0;
const blas_int ONE = 1;
#ifndef FIRST_ORDER_LOOP
const char transpose[2] = "N";
const double one = 1.0;
const blas_int ONE = 1;
#endif
vector<int> ii1, ii2, ii3;// vector indices for ghxx
vector<int> jj1, jj2, jj3;// vector indices for ghuu
set_vector_of_indices(n, m, ii1, ii2, ii3);
set_vector_of_indices(q, m, jj1, jj2, jj3);
#ifdef USE_OMP
#pragma omp parallel for num_threads(number_of_threads)
#endif
for (int particle = 0; particle<s; particle++)
{
int particle_ = particle*m;
int particle__ = particle*n;
int particle___ = particle*q;
memcpy(&y[particle_],&constant[0],m*sizeof(double));
#ifndef FIRST_ORDER_LOOP
dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat[particle__],&ONE,&one,&y[particle_],&ONE);
dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y[particle_],&ONE);
#endif
#ifndef FIRST_ORDER_LOOP
dgemv(transpose,&m,&n,&one,&ghx[0],&m,&yhat[particle__],&ONE,&one,&y[particle_],&ONE);
dgemv(transpose,&m,&q,&one,&ghu[0],&m,&epsilon[particle___],&ONE,&one,&y[particle_],&ONE);
#endif
for (int variable = 0; variable<m; variable++)
{
int variable_ = variable + particle_;
// +ghx*yhat+ghu*u
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_=0; column<q; column++, column_ += m)
{
int i1 = variable+column_;
int i2 = column+particle__;
int i3 = column+particle___;
y[variable_] += ghx[i1]*yhat[i2];
y[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_=q*m; column<n; column++, column_ += m)
{
y[variable_] += ghx[variable+column_]*yhat[column+particle__];
}
#endif
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_=0; column<q; column++, column_ += m)
{
int i1 = variable+column_;
int i2 = column+particle__;
int i3 = column+particle___;
y[variable_] += ghx[i1]*yhat[i2];
y[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_=q*m; column<n; column++, column_ += m)
{
y[variable_] += ghx[variable+column_]*yhat[column+particle__];
}
#endif
// +ghxx*kron(yhat,yhat)
for(int i=0; i<n*(n+1)/2; i++)
for(int i=0; i<n*(n+1)/2; i++)
{
int i1 = particle__+ii1[i];
int i2 = particle__+ii2[i];
if(i1==i2)
{
{
y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1];
}
else
......@@ -238,25 +244,25 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
** prhs[0] yhat [double] n*s array, time t particles.
** prhs[1] epsilon [double] q*s array, time t innovations.
** prhs[2] ghx [double] m*n array, first order reduced form.
** prhs[3] ghu [double] m*q array, first order reduced form.
** prhs[3] ghu [double] m*q array, first order reduced form.
** prhs[4] constant [double] m*1 array, deterministic steady state + second order correction for the union of the states and observed variables.
** prhs[5] ghxx [double] m*n^2 array, second order reduced form.
** prhs[6] ghuu [double] m*q^2 array, second order reduced form.
** prhs[7] ghxu [double] m*nq array, second order reduced form.
** prhs[8] yhat_ [double] [OPTIONAL] n*s array, time t particles (pruning additional latent variables).
** prhs[8] yhat_ [double] [OPTIONAL] n*s array, time t particles (pruning additional latent variables).
** prhs[9] ss [double] [OPTIONAL] m*1 array, steady state for the union of the states and the observed variables (needed for the pruning mode).
**
** plhs[0] y [double] n*s array, time t+1 particles.
** plhs[1] y_ [double] n*s array, time t+1 particles for the pruning latent variables.
** plhs[1] y_ [double] n*s array, time t+1 particles for the pruning latent variables.
**
*/
// Check the number of input and output.
if ((nrhs != 8) && (nrhs != 10))
if ((nrhs != 9) && (nrhs != 11))
{
mexErrMsgTxt("Eight or ten input arguments are required.");
}
if (nlhs > 2)
if (nlhs > 2)
{
mexErrMsgTxt("Too many output arguments.");
}
......@@ -265,15 +271,17 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
mwSize s = mxGetN(prhs[0]);// Number of particles.
mwSize q = mxGetM(prhs[1]);// Number of innovations.
mwSize m = mxGetM(prhs[2]);// Number of elements in the union of states and observed variables.
//mexPrintf("\n s (the number of column of yhat) is equal to %d.", s);
//mexPrintf("\n The number of column of epsilon is %d.", mxGetN(prhs[1]));
// Check the dimensions.
if (
(s != mxGetN(prhs[1])) || // Number of columns for epsilon
(s != mxGetN(prhs[1])) || // Number of columns for epsilon
(n != mxGetN(prhs[2])) || // Number of columns for ghx
(m != mxGetM(prhs[3])) || // Number of rows for ghu
(q != mxGetN(prhs[3])) || // Number of columns for ghu
(m != mxGetM(prhs[4])) || // Number of rows for 2nd order constant correction + deterministic steady state
(m != mxGetM(prhs[5])) || // Number of rows for ghxx
(n*n != mxGetN(prhs[5])) || // Number of columns for ghxx
(n*n != mxGetN(prhs[5])) || // Number of columns for ghxx
(m != mxGetM(prhs[6])) || // Number of rows for ghuu
(q*q != mxGetN(prhs[6])) || // Number of columns for ghuu
(m != mxGetM(prhs[7])) || // Number of rows for ghxu
......@@ -282,7 +290,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
mexErrMsgTxt("Input dimension mismatch!.");
}
if (nrhs>8)
if (nrhs>9)
{
if (
(n != mxGetM(prhs[8])) || // Number of rows for yhat_
......@@ -303,25 +311,27 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
double *ghuu = mxGetPr(prhs[6]);
double *ghxu = mxGetPr(prhs[7]);
double *yhat_, *ss;
if (nrhs>8)
if (nrhs>9)
{
yhat_ = mxGetPr(prhs[8]);
ss = mxGetPr(prhs[9]);
}
if (nrhs==8)
}
if (nrhs==9)
{
int numthreads = (int) mxGetScalar(prhs[8]);
double *y;
plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
y = mxGetPr(plhs[0]);
ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, (int) m, (int) n, (int) q, (int) s);
ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, (int) m, (int) n, (int) q, (int) s, numthreads);