diff --git a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc index ae3c142f1343ebc98413ac43ecf5c6ef09fb08ef..b700c39b176fca280ee903dcf10cb4964e69a037 100644 --- a/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc +++ b/mex/sources/local_state_space_iterations/local_state_space_iteration_2.cc @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2017 Dynare Team + * Copyright © 2010-2019 Dynare Team * * This file is part of Dynare. * @@ -22,9 +22,9 @@ * using a second order approximation of the nonlinear state space model. */ -#include <iostream> -#include <cstring> #include <vector> +#include <algorithm> + #include <dynmex.h> #include <dynblas.h> @@ -32,14 +32,12 @@ # include <omp.h> #endif -using namespace std; - #define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon void -set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2, vector<int> &v3) +set_vector_of_indices(int n, int r, std::vector<int> &v1, std::vector<int> &v2, std::vector<int> &v3) { - const int m = n*(n+1)/2; + int m = n*(n+1)/2; v1.resize(m, 0); v2.resize(m, 0); v3.resize(m, 0); @@ -57,17 +55,16 @@ set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2 void ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *yhat1, const double *epsilon, - double *ghx, double *ghu, + const double *ghx, const double *ghu, const double *constant, const double *ghxx, const double *ghuu, const double *ghxu, const double *ss, - const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads) + blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads) { #ifndef FIRST_ORDER_LOOP - const char transpose[2] = "N"; const double one = 1.0; const blas_int ONE = 1; #endif - vector<int> ii1, ii2, ii3;// vector indices for ghxx - vector<int> jj1, jj2, jj3;// vector indices for ghuu + std::vector<int> ii1, ii2, ii3;// vector indices for ghxx + std::vector<int> jj1, jj2, jj3;// vector indices for ghuu set_vector_of_indices(n, m, ii1, ii2, ii3); set_vector_of_indices(q, m, jj1, jj2, jj3); #ifdef USE_OMP @@ -78,16 +75,16 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double * int particle_ = particle*m; int particle__ = particle*n; int particle___ = particle*q; - memcpy(&y2[particle_], &constant[0], m*sizeof(double)); - memcpy(&y1[particle_], &ss[0], m*sizeof(double)); + std::copy_n(constant, m, &y2[particle_]); + std::copy_n(ss, m, &y1[particle_]); #ifndef FIRST_ORDER_LOOP - dgemv(transpose, &m, &n, &one, &ghx[0], &m, &yhat2[particle__], &ONE, &one, &y2[particle_], &ONE); - dgemv(transpose, &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y2[particle_], &ONE); + dgemv("N", &m, &n, &one, ghx, &m, &yhat2[particle__], &ONE, &one, &y2[particle_], &ONE); + dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y2[particle_], &ONE); #endif for (int variable = 0; variable < m; variable++) { int variable_ = variable + particle_; - // +ghx*yhat2+ghu*u + // +ghx·yhat2+ghu·u #ifdef FIRST_ORDER_LOOP for (int column = 0, column_ = 0; column < q; column++, column_ += m) { @@ -98,39 +95,29 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double * y2[variable_] += ghu[i1]*epsilon[i3]; } for (int column = q, column_ = q*m; column < n; column++, column_ += m) - { - y2[variable_] += ghx[variable+column_]*yhat2[column+particle__]; - } + y2[variable_] += ghx[variable+column_]*yhat2[column+particle__]; #endif - // +ghxx*kron(yhat1,yhat1) + // +ghxx·yhat1⊗yhat1 for (int i = 0; i < n*(n+1)/2; i++) { int i1 = particle__+ii1[i]; int i2 = particle__+ii2[i]; if (i1 == i2) - { - y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1]; - } + y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1]; else - { - y2[variable_] += ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i2]; - } + y2[variable_] += ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i2]; } - // +ghuu*kron(u,u) + // +ghuu·u⊗u for (int j = 0; j < q*(q+1)/2; j++) { int j1 = particle___+jj1[j]; int j2 = particle___+jj2[j]; if (j1 == j2) - { - y2[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1]; - } + y2[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1]; else - { - y2[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2]; - } + y2[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2]; } - // +ghxu*kron(yhat1,u) + // +ghxu·yhat1⊗u for (int v = particle__, i = 0; v < particle__+n; v++) for (int s = particle___; s < particle___+q; s++, i += m) y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat2[v]; @@ -144,31 +131,28 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double * y1[variable_] += ghu[i1]*epsilon[i3]; } for (int column = q, column_ = q*m; column < n; column++, column_ += m) - { - y1[variable_] += ghx[variable+column_]*yhat1[column+particle__]; - } + y1[variable_] += ghx[variable+column_]*yhat1[column+particle__]; #endif } #ifndef FIRST_ORDER_LOOP - dgemv(transpose, &m, &n, &one, &ghx[0], &m, &yhat1[particle__], &ONE, &one, &y1[particle_], &ONE); - dgemv(transpose, &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y1[particle_], &ONE); + dgemv("N", &m, &n, &one, &ghx[0], &m, &yhat1[particle__], &ONE, &one, &y1[particle_], &ONE); + dgemv("N", &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y1[particle_], &ONE); #endif } } void ss2Iteration(double *y, const double *yhat, const double *epsilon, - double *ghx, double *ghu, + const double *ghx, const double *ghu, const double *constant, const double *ghxx, const double *ghuu, const double *ghxu, - const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads) + blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads) { #ifndef FIRST_ORDER_LOOP - const char transpose[2] = "N"; const double one = 1.0; const blas_int ONE = 1; #endif - vector<int> ii1, ii2, ii3;// vector indices for ghxx - vector<int> jj1, jj2, jj3;// vector indices for ghuu + std::vector<int> ii1, ii2, ii3;// vector indices for ghxx + std::vector<int> jj1, jj2, jj3;// vector indices for ghuu set_vector_of_indices(n, m, ii1, ii2, ii3); set_vector_of_indices(q, m, jj1, jj2, jj3); #ifdef USE_OMP @@ -179,15 +163,15 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon, int particle_ = particle*m; int particle__ = particle*n; int particle___ = particle*q; - memcpy(&y[particle_], &constant[0], m*sizeof(double)); + std::copy_n(constant, m, &y[particle_]); #ifndef FIRST_ORDER_LOOP - dgemv(transpose, &m, &n, &one, &ghx[0], &m, &yhat[particle__], &ONE, &one, &y[particle_], &ONE); - dgemv(transpose, &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y[particle_], &ONE); + dgemv("N", &m, &n, &one, ghx, &m, &yhat[particle__], &ONE, &one, &y[particle_], &ONE); + dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y[particle_], &ONE); #endif for (int variable = 0; variable < m; variable++) { int variable_ = variable + particle_; - // +ghx*yhat+ghu*u + // +ghx·yhat+ghu·u #ifdef FIRST_ORDER_LOOP for (int column = 0, column_ = 0; column < q; column++, column_ += m) { @@ -198,39 +182,29 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon, y[variable_] += ghu[i1]*epsilon[i3]; } for (int column = q, column_ = q*m; column < n; column++, column_ += m) - { - y[variable_] += ghx[variable+column_]*yhat[column+particle__]; - } + y[variable_] += ghx[variable+column_]*yhat[column+particle__]; #endif - // +ghxx*kron(yhat,yhat) + // +ghxx·yhat⊗yhat for (int i = 0; i < n*(n+1)/2; i++) { int i1 = particle__+ii1[i]; int i2 = particle__+ii2[i]; if (i1 == i2) - { - y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1]; - } + y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1]; else - { - y[variable_] += ghxx[variable+ii3[i]]*yhat[i1]*yhat[i2]; - } + y[variable_] += ghxx[variable+ii3[i]]*yhat[i1]*yhat[i2]; } - // +ghuu*kron(u,u) + // +ghuu·u⊗u for (int j = 0; j < q*(q+1)/2; j++) { int j1 = particle___+jj1[j]; int j2 = particle___+jj2[j]; if (j1 == j2) - { - y[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1]; - } + y[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1]; else - { - y[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2]; - } + y[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2]; } - // +ghxu*kron(yhat,u) + // +ghxu·yhat⊗u for (int v = particle__, i = 0; v < particle__+n; v++) for (int s = particle___; s < particle___+q; s++, i += m) y[variable_] += ghxu[variable+i]*epsilon[s]*yhat[v]; @@ -242,31 +216,28 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* - ** prhs[0] yhat [double] n*s array, time t particles. - ** prhs[1] epsilon [double] q*s array, time t innovations. - ** prhs[2] ghx [double] m*n array, first order reduced form. - ** prhs[3] ghu [double] m*q array, first order reduced form. - ** prhs[4] constant [double] m*1 array, deterministic steady state + second order correction for the union of the states and observed variables. - ** prhs[5] ghxx [double] m*n^2 array, second order reduced form. - ** prhs[6] ghuu [double] m*q^2 array, second order reduced form. - ** prhs[7] ghxu [double] m*nq array, second order reduced form. - ** prhs[8] yhat_ [double] [OPTIONAL] n*s array, time t particles (pruning additional latent variables). - ** prhs[9] ss [double] [OPTIONAL] m*1 array, steady state for the union of the states and the observed variables (needed for the pruning mode). - ** - ** plhs[0] y [double] n*s array, time t+1 particles. - ** plhs[1] y_ [double] n*s array, time t+1 particles for the pruning latent variables. - ** + prhs[0] yhat [double] n×s array, time t particles. + prhs[1] epsilon [double] q×s array, time t innovations. + prhs[2] ghx [double] m×n array, first order reduced form. + prhs[3] ghu [double] m×q array, first order reduced form. + prhs[4] constant [double] m×1 array, deterministic steady state + second order correction for the union of the states and observed variables. + prhs[5] ghxx [double] m×n² array, second order reduced form. + prhs[6] ghuu [double] m×q² array, second order reduced form. + prhs[7] ghxu [double] m×nq array, second order reduced form. + prhs[8] yhat_ [double] [OPTIONAL] n×s array, time t particles (pruning additional latent variables). + prhs[9] ss [double] [OPTIONAL] m×1 array, steady state for the union of the states and the observed variables (needed for the pruning mode). + + plhs[0] y [double] n×s array, time t+1 particles. + plhs[1] y_ [double] n×s array, time t+1 particles for the pruning latent variables. */ // Check the number of input and output. - if ((nrhs != 9) && (nrhs != 11)) - { - mexErrMsgTxt("Eight or ten input arguments are required."); - } + if (nrhs != 9 && nrhs != 11) + mexErrMsgTxt("Eight or ten input arguments are required."); + if (nlhs > 2) - { - mexErrMsgTxt("Too many output arguments."); - } + mexErrMsgTxt("Too many output arguments."); + // Get dimensions. size_t n = mxGetM(prhs[0]);// Number of states. size_t s = mxGetN(prhs[0]);// Number of particles. @@ -275,43 +246,34 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) //mexPrintf("\n s (the number of column of yhat) is equal to %d.", s); //mexPrintf("\n The number of column of epsilon is %d.", mxGetN(prhs[1])); // Check the dimensions. - if ( - (s != mxGetN(prhs[1])) // Number of columns for epsilon - || (n != mxGetN(prhs[2])) // Number of columns for ghx - || (m != mxGetM(prhs[3])) // Number of rows for ghu - || (q != mxGetN(prhs[3])) // Number of columns for ghu - || (m != mxGetM(prhs[4])) // Number of rows for 2nd order constant correction + deterministic steady state - || (m != mxGetM(prhs[5])) // Number of rows for ghxx - || (n*n != mxGetN(prhs[5])) // Number of columns for ghxx - || (m != mxGetM(prhs[6])) // Number of rows for ghuu - || (q*q != mxGetN(prhs[6])) // Number of columns for ghuu - || (m != mxGetM(prhs[7])) // Number of rows for ghxu - || (n*q != mxGetN(prhs[7])) // Number of rows for ghxu - ) - { - mexErrMsgTxt("Input dimension mismatch!."); - } + if (s != mxGetN(prhs[1]) // Number of columns for epsilon + || n != mxGetN(prhs[2]) // Number of columns for ghx + || m != mxGetM(prhs[3]) // Number of rows for ghu + || q != mxGetN(prhs[3]) // Number of columns for ghu + || m != mxGetM(prhs[4]) // Number of rows for 2nd order constant correction + deterministic steady state + || m != mxGetM(prhs[5]) // Number of rows for ghxx + || n*n != mxGetN(prhs[5]) // Number of columns for ghxx + || m != mxGetM(prhs[6]) // Number of rows for ghuu + || q*q != mxGetN(prhs[6]) // Number of columns for ghuu + || m != mxGetM(prhs[7]) // Number of rows for ghxu + || n*q != mxGetN(prhs[7])) // Number of rows for ghxu + mexErrMsgTxt("Input dimension mismatch!."); if (nrhs > 9) - { - if ( - (n != mxGetM(prhs[8])) // Number of rows for yhat_ - || (s != mxGetN(prhs[8])) // Number of columns for yhat_ - || (m != mxGetM(prhs[9])) // Number of rows for ss - ) - { - mexErrMsgTxt("Input dimension mismatch!."); - } - } + if (n != mxGetM(prhs[8]) // Number of rows for yhat_ + || s != mxGetN(prhs[8]) // Number of columns for yhat_ + || m != mxGetM(prhs[9])) // Number of rows for ss + mexErrMsgTxt("Input dimension mismatch!."); + // Get Input arrays. - double *yhat = mxGetPr(prhs[0]); - double *epsilon = mxGetPr(prhs[1]); - double *ghx = mxGetPr(prhs[2]); - double *ghu = mxGetPr(prhs[3]); - double *constant = mxGetPr(prhs[4]); - double *ghxx = mxGetPr(prhs[5]); - double *ghuu = mxGetPr(prhs[6]); - double *ghxu = mxGetPr(prhs[7]); - double *yhat_ = NULL, *ss = NULL; + const double *yhat = mxGetPr(prhs[0]); + const double *epsilon = mxGetPr(prhs[1]); + const double *ghx = mxGetPr(prhs[2]); + const double *ghu = mxGetPr(prhs[3]); + const double *constant = mxGetPr(prhs[4]); + const double *ghxx = mxGetPr(prhs[5]); + const double *ghuu = mxGetPr(prhs[6]); + const double *ghxu = mxGetPr(prhs[7]); + const double *yhat_ = nullptr, *ss = nullptr; if (nrhs > 9) { yhat_ = mxGetPr(prhs[8]); @@ -320,19 +282,17 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) if (nrhs == 9) { int numthreads = static_cast<int>(mxGetScalar(prhs[8])); - double *y; plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL); - y = mxGetPr(plhs[0]); + double *y = mxGetPr(plhs[0]); ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, static_cast<int>(m), static_cast<int>(n), static_cast<int>(q), static_cast<int>(s), numthreads); } else { int numthreads = static_cast<int>(mxGetScalar(prhs[10])); - double *y, *y_; plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL); plhs[1] = mxCreateDoubleMatrix(m, s, mxREAL); - y = mxGetPr(plhs[0]); - y_ = mxGetPr(plhs[1]); + double *y = mxGetPr(plhs[0]); + double *y_ = mxGetPr(plhs[1]); ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, static_cast<int>(m), static_cast<int>(n), static_cast<int>(q), static_cast<int>(s), numthreads); } }