diff --git a/matlab/default_option_values.m b/matlab/default_option_values.m index fa2b89f52006c170ba1ab4abd444627d7d1faa61..85ec9e50928ddfbc021e600677a523a1fd524c23 100644 --- a/matlab/default_option_values.m +++ b/matlab/default_option_values.m @@ -79,6 +79,7 @@ options_.threads.local_state_space_iteration_2 = num_procs; options_.threads.local_state_space_iteration_3 = num_procs; options_.threads.local_state_space_iteration_k = 1; options_.threads.perfect_foresight_problem = num_procs; +options_.threads.perfect_foresight_block_problem = num_procs; options_.threads.k_order_perturbation = max(1, num_procs/2); % steady state diff --git a/matlab/perfect-foresight-models/solve_block_decomposed_problem.m b/matlab/perfect-foresight-models/solve_block_decomposed_problem.m index cbfd37b82a3fef5395f3a021cb2e416270108c80..e2ced0893e9ba4c59f12a6d239beb650e7b6e25d 100644 --- a/matlab/perfect-foresight-models/solve_block_decomposed_problem.m +++ b/matlab/perfect-foresight-models/solve_block_decomposed_problem.m @@ -103,7 +103,7 @@ for blk = 1:nblocks if ismember(options_.stack_solve_algo, [1 6]) [y, T, success, maxblkerror, iter] = solve_two_boundaries_lbj(fh_dynamic, y, exo_simul, steady_state, T, blk, options_, M_); else - [y, T, success, maxblkerror, iter] = solve_two_boundaries_stacked(fh_dynamic, y, exo_simul, steady_state, T, blk, cutoff, options_, M_); + [y, T, success, maxblkerror, iter] = solve_two_boundaries_stacked(y, exo_simul, steady_state, T, blk, cutoff, options_, M_); end end diff --git a/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m b/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m index 2c09bb1e4644b9726a2bbb7ce5cc616ec3b6cae6..50e6beb7b0a7185ae967a02ef34e50097bcb96af 100644 --- a/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m +++ b/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m @@ -1,10 +1,9 @@ -function [y, T, success, max_res, iter] = solve_two_boundaries_stacked(fh, y, x, steady_state, T, Block_Num, cutoff, options_, M_) +function [y, T, success, max_res, iter] = solve_two_boundaries_stacked(y, x, steady_state, T, Block_Num, cutoff, options_, M_) % Computes the deterministic simulation of a block of equations containing % both lead and lag variables, using a Newton method over the stacked Jacobian % (in particular, this excludes LBJ). % % INPUTS -% fh [handle] function handle to the dynamic file for the block % y [matrix] All the endogenous variables of the model % x [matrix] All the exogenous variables of the model % steady_state [vector] steady state of the model @@ -42,6 +41,8 @@ function [y, T, success, max_res, iter] = solve_two_boundaries_stacked(fh, y, x, % You should have received a copy of the GNU General Public License % along with Dynare. If not, see <https://www.gnu.org/licenses/>. +assert(~options_.bytecode); + Blck_size = M_.block_structure.block(Block_Num).mfs; y_index = M_.block_structure.block(Block_Num).variable(end-Blck_size+1:end); periods = get_simulation_periods(options_); @@ -54,32 +55,27 @@ end verbose = options_.verbosity; +ny = size(y, 1); +if M_.maximum_lag > 0 + y0 = y(:,M_.maximum_lag); +else + y0 = NaN(ny, 1); +end +if M_.maximum_lead > 0 + yT = y(:,M_.maximum_lag+periods+1); +else + yT = NaN(ny, 1); +end +yy = y(:,M_.maximum_lag+(1:periods)); + cvg=false; iter=0; correcting_factor=0.01; max_resa=1e100; lambda = 1; % Length of Newton step (unused for stack_solve_algo=4) while ~(cvg || iter > options_.simul.maxit) - r = NaN(Blck_size, periods); - g1a = spalloc(Blck_size*periods, Blck_size*periods, M_.block_structure.block(Block_Num).NNZDerivatives*periods); - for it_ = y_kmin+(1:periods) - [yy, T(:, it_), r(:, it_-y_kmin), g1]=fh(dynendo(y, it_, M_), x(it_, :), M_.params, steady_state, ... - M_.block_structure.block(Block_Num).g1_sparse_rowval, ... - M_.block_structure.block(Block_Num).g1_sparse_colval, ... - M_.block_structure.block(Block_Num).g1_sparse_colptr, T(:, it_)); - y(:, it_) = yy(M_.endo_nbr+(1:M_.endo_nbr)); - if periods == 1 - g1a = g1(:, Blck_size+(1:Blck_size)); - elseif it_ == y_kmin+1 - g1a(1:Blck_size, 1:Blck_size*2) = g1(:, Blck_size+1:end); - elseif it_ == y_kmin+periods - g1a((periods-1)*Blck_size+1:end, (periods-2)*Blck_size+1:end) = g1(:, 1:2*Blck_size); - else - g1a((it_-y_kmin-1)*Blck_size+(1:Blck_size), (it_-y_kmin-2)*Blck_size+(1:3*Blck_size)) = g1; - end - end - ya = reshape(y(y_index, y_kmin+(1:periods)), 1, periods*Blck_size)'; - ra = reshape(r, periods*Blck_size, 1); + [yy, T, ra, g1a] = perfect_foresight_block_problem(Block_Num, yy, y0, yT, x, M_.params, steady_state, T, periods, M_, options_); + ya = reshape(yy(y_index,1:periods), 1, periods*Blck_size)'; b=-ra+g1a*ya; [max_res, max_indx]=max(max(abs(r'))); if ~isreal(r) @@ -114,10 +110,11 @@ while ~(cvg || iter > options_.simul.maxit) disp([' correcting_factor=' num2str(correcting_factor,'%f') ' max(Jacobian)=' num2str(full(max_factor),'%f')]); end dx = (g1aa+correcting_factor*speye(periods*Blck_size))\ba- ya_save; - y(y_index, y_kmin+(1:periods))=reshape((ya_save+lambda*dx)',length(y_index),periods); + yy(y_index,1:periods)=reshape((ya_save+lambda*dx)', length(y_index), periods); continue else disp('The singularity of the jacobian matrix could not be corrected'); + y(:,y_kmin+(1:periods)) = yy; success = false; return end @@ -127,7 +124,7 @@ while ~(cvg || iter > options_.simul.maxit) if verbose disp(['reducing the path length: lambda=' num2str(lambda,'%f')]); end - y(y_index, y_kmin+(1:periods))=reshape((ya_save+lambda*dx)',length(y_index),periods); + yy(y_index,1:periods)=reshape((ya_save+lambda*dx)', length(y_index), periods); continue else if verbose @@ -137,6 +134,7 @@ while ~(cvg || iter > options_.simul.maxit) fprintf('Convergence not achieved in block %d, after %d iterations.\n Increase "maxit" or set "cutoff=0" in model options.\n',Block_Num, iter); end end + y(:,y_kmin+(1:periods)) = yy; success = false; return end @@ -154,7 +152,7 @@ while ~(cvg || iter > options_.simul.maxit) && options_.simul.iterstack_nperiods == 0 && options_.simul.iterstack_nlu == 0 && size(g1a, 1) < options_.simul.iterstack_maxlu) % Fallback to LU if block too small for iterstack dx = g1a\b- ya; ya = ya + lambda*dx; - y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods); + yy(y_index,1:periods)=reshape(ya', length(y_index), periods); elseif ismember(stack_solve_algo, [2, 3]) if strcmp(options_.simul.preconditioner, 'umfiter') && iter == 0 [L, U, P, Q] = lu(g1a); @@ -177,7 +175,7 @@ while ~(cvg || iter > options_.simul.maxit) end dx = Q*zb - ya; ya = ya + lambda*dx; - y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods); + yy(y_index,1:periods)=reshape(ya', length(y_index), periods); elseif stack_solve_algo==4 stpmx = 100 ; stpmax = stpmx*max([sqrt(ya'*ya);size(y_index,2)]); @@ -185,9 +183,9 @@ while ~(cvg || iter > options_.simul.maxit) g = (ra'*g1a)'; f = 0.5*ra'*ra; p = -g1a\ra; - yn = lnsrch1(ya,f,g,p,stpmax,@lnsrch1_wrapper_two_boundaries,nn,nn, options_.solve_tolx, fh, Block_Num, y, y_index,x, M_.params, steady_state, T, periods, Blck_size, M_); + yn = lnsrch1(ya,f,g,p,stpmax,@lnsrch1_wrapper_two_boundaries,nn,nn, options_.solve_tolx, y_index, Block_Num, yy, y0, yT, x, M_.params, steady_state, T, periods, M_, options_); dx = ya - yn; - y(y_index, y_kmin+(1:periods))=reshape(yn',length(y_index),periods); + yy(y_index,1:periods)=reshape(yn', length(y_index), periods); end end iter=iter+1; @@ -196,6 +194,8 @@ while ~(cvg || iter > options_.simul.maxit) end end +y(:,y_kmin+(1:periods)) = yy; + if iter > options_.simul.maxit if verbose printline(41) @@ -211,11 +211,7 @@ success = true; function y3n = dynendo(y, it_, M_) y3n = reshape(y(:, it_+(-1:1)), 3*M_.endo_nbr, 1); -function ra = lnsrch1_wrapper_two_boundaries(ya, fh, Block_Num, y, y_index, x, ... - params, steady_state, T, periods, ... - y_size, M_) - y(y_index, M_.maximum_lag+(1:periods)) = reshape(ya',length(y_index),periods); - ra = NaN(periods*y_size, 1); - for it_ = M_.maximum_lag+(1:periods) - [~, ~, ra((it_-M_.maximum_lag-1)*y_size+(1:y_size)), g1] = fh(dynendo(y, it_, M_), x(it_, :), params, steady_state, M_.block_structure.block(Block_Num).g1_sparse_rowval, M_.block_structure.block(Block_Num).g1_sparse_colval, M_.block_structure.block(Block_Num).g1_sparse_colptr, T(:, it_)); - end +function ra = lnsrch1_wrapper_two_boundaries(ya, y_index, Block_Num, yy, y0, yT, x, ... + params, steady_state, T, periods, M_, options_) + yy(y_index,1:periods) = reshape(ya', length(y_index), periods); + [~, ~, ra] = perfect_foresight_block_problem(Block_Num, yy, y0, yT, x, params, steady_state, T, periods, M_, options_); diff --git a/meson.build b/meson.build index 29cdbe58f22407d4370f5c0c71af35039f039ac0..6440af1647713f450b04536d1b237f361ddf14d2 100644 --- a/meson.build +++ b/meson.build @@ -364,6 +364,10 @@ perfect_foresight_problem_src = [ 'mex/sources/perfect_foresight_problem/perfect 'mex/sources/perfect_foresight_problem/DynamicModelCaller.cc' ] shared_module('perfect_foresight_problem', perfect_foresight_problem_src, kwargs : mex_kwargs, dependencies : [ openmp_dep, dl_dep ]) +perfect_foresight_block_problem_src = [ 'mex/sources/perfect_foresight_problem/perfect_foresight_block_problem.cc', + 'mex/sources/perfect_foresight_problem/DynamicModelCaller.cc' ] +shared_module('perfect_foresight_block_problem', perfect_foresight_block_problem_src, kwargs : mex_kwargs, dependencies : [ openmp_dep, dl_dep ]) + block_trust_region_src = [ 'mex/sources/block_trust_region/dulmage_mendelsohn.f08', 'mex/sources/block_trust_region/matlab_fcn_closure.F08', 'mex/sources/block_trust_region/trust_region.f08', diff --git a/mex/sources/perfect_foresight_problem/DynamicModelCaller.cc b/mex/sources/perfect_foresight_problem/DynamicModelCaller.cc index a3013731c7aeb2c711d71a8447b51790ebb04178..5c4bf8920644789838a36099af2e4f8fbe6a63ed 100644 --- a/mex/sources/perfect_foresight_problem/DynamicModelCaller.cc +++ b/mex/sources/perfect_foresight_problem/DynamicModelCaller.cc @@ -317,3 +317,228 @@ DynamicModelMatlabCaller::eval(double* resid) mxDestroyArray(T_order_mx); mxDestroyArray(T_mx); } + +#if !defined(_WIN32) && !defined(__CYGWIN32__) +void* DynamicModelBlockDllCaller::mex {nullptr}; +#else +HINSTANCE DynamicModelBlockDllCaller::mex {nullptr}; +#endif +DynamicModelBlockDllCaller::dynamic_resid_fct DynamicModelBlockDllCaller::residual_fct {nullptr}; +DynamicModelBlockDllCaller::dynamic_g1_fct DynamicModelBlockDllCaller::g1_fct {nullptr}; + +void +DynamicModelBlockDllCaller::load_dll(const std::string& basename, int block_num) +{ + // Load symbols from dynamic MEX + const std::filesystem::path sparse_dir {"+" + basename + "/+sparse/+block"}; + const std::filesystem::path mex_name {sparse_dir + / ("dynamic_"s + std::to_string(block_num) + MEXEXT)}; +#if !defined(__CYGWIN32__) && !defined(_WIN32) + mex = dlopen(mex_name.c_str(), RTLD_NOW); +#else + mex = LoadLibraryW(mex_name.c_str()); +#endif + if (!mex) + mexErrMsgTxt(("Can't load dynamic_"s + std::to_string(block_num) + " MEX file").c_str()); + +#if !defined(__CYGWIN32__) && !defined(_WIN32) + residual_fct = reinterpret_cast<dynamic_resid_fct>( + dlsym(mex, ("dynamic_"s + std::to_string(block_num) + "_resid").c_str())); + g1_fct = reinterpret_cast<dynamic_g1_fct>( + dlsym(mex, ("dynamic_"s + std::to_string(block_num) + "_g1").c_str())); +#else +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wcast-function-type" + residual_fct = reinterpret_cast<dynamic_resid_fct>( + GetProcAddress(mex, ("dynamic_"s + std::to_string(block_num) + "_resid").c_str())); + g1_fct = reinterpret_cast<dynamic_g1_fct>( + GetProcAddress(mex, ("dynamic_"s + std::to_string(block_num) + "_g1").c_str())); +# pragma GCC diagnostic pop +#endif + if (!residual_fct || !g1_fct) + mexErrMsgTxt("Can't load functions in dynamic MEX file"); +} + +void +DynamicModelBlockDllCaller::unload_dll() +{ +#if !defined(__CYGWIN32__) && !defined(_WIN32) + dlclose(mex); +#else + FreeLibrary(mex); +#endif +} + +DynamicModelBlockDllCaller::DynamicModelBlockDllCaller(size_t ntt, size_t mfs, mwIndex ny, + mwIndex nx, const double* params_arg, + const double* steady_state_arg, + const int32_T* g1_sparse_colptr_arg, + bool linear_arg, bool compute_jacobian_arg) : + DynamicModelBlockCaller {linear_arg, compute_jacobian_arg}, + params {params_arg}, + steady_state {steady_state_arg}, + g1_sparse_colptr {g1_sparse_colptr_arg} +{ + tt.resize(ntt); + y_p.resize(3 * ny); + x_p.resize(nx); + if (compute_jacobian) + jacobian_p.resize(g1_sparse_colptr[3 * mfs] - 1); +} + +void +DynamicModelBlockDllCaller::copy_jacobian_column(mwIndex col, double* dest) const +{ + std::ranges::copy_n(jacobian_p.data() + g1_sparse_colptr[col] - 1, + g1_sparse_colptr[col + 1] - g1_sparse_colptr[col], dest); +} + +void +DynamicModelBlockDllCaller::eval(double* resid) +{ + residual_fct(y_p.data(), x_p.data(), params, steady_state, tt.data(), resid); + if (compute_jacobian) + { + g1_fct(y_p.data(), x_p.data(), params, steady_state, tt.data(), jacobian_p.data()); + + if (linear) + compute_jacobian = false; // If model is linear, no need to recompute Jacobian later + } +} + +DynamicModelBlockMatlabCaller::DynamicModelBlockMatlabCaller( + std::string basename_arg, int block_num_arg, mwIndex ntt, mwIndex ny, mwIndex nx, + const mxArray* params_mx_arg, const mxArray* steady_state_mx_arg, + const mxArray* g1_sparse_rowval_mx_arg, const mxArray* g1_sparse_colval_mx_arg, + const mxArray* g1_sparse_colptr_mx_arg, bool linear_arg, bool compute_jacobian_arg) : + DynamicModelBlockCaller {linear_arg, compute_jacobian_arg}, + basename {std::move(basename_arg)}, + block_num {block_num_arg}, + y_mx {mxCreateDoubleMatrix(3 * ny, 1, mxREAL)}, + x_mx {mxCreateDoubleMatrix(nx, 1, mxREAL)}, + T_mx {mxCreateDoubleMatrix(ntt, 1, mxREAL)}, + jacobian_mx {nullptr}, + params_mx {mxDuplicateArray(params_mx_arg)}, + steady_state_mx {mxDuplicateArray(steady_state_mx_arg)}, + g1_sparse_rowval_mx {mxDuplicateArray(g1_sparse_rowval_mx_arg)}, + g1_sparse_colval_mx {mxDuplicateArray(g1_sparse_colval_mx_arg)}, + g1_sparse_colptr_mx {mxDuplicateArray(g1_sparse_colptr_mx_arg)} +{ +} + +DynamicModelBlockMatlabCaller::~DynamicModelBlockMatlabCaller() +{ + mxDestroyArray(y_mx); + mxDestroyArray(x_mx); + mxDestroyArray(T_mx); + if (jacobian_mx) + mxDestroyArray(jacobian_mx); + mxDestroyArray(params_mx); + mxDestroyArray(steady_state_mx); + mxDestroyArray(g1_sparse_rowval_mx); + mxDestroyArray(g1_sparse_colval_mx); + mxDestroyArray(g1_sparse_colptr_mx); +} + +void +DynamicModelBlockMatlabCaller::copy_jacobian_column(mwIndex col, double* dest) const +{ + if (jacobian_mx) + { +#if MX_HAS_INTERLEAVED_COMPLEX + const int32_T* g1_sparse_rowval {mxGetInt32s(g1_sparse_rowval_mx)}; + const int32_T* g1_sparse_colptr {mxGetInt32s(g1_sparse_colptr_mx)}; +#else + const int32_T* g1_sparse_rowval {static_cast<const int32_T*>(mxGetData(g1_sparse_rowval_mx))}; + const int32_T* g1_sparse_colptr {static_cast<const int32_T*>(mxGetData(g1_sparse_colptr_mx))}; +#endif + + /* We cannot assume that jacobian_mx internally uses + g1_sparse_{rowval,colval,colptr}, because the call to sparse() in + dynamic_g1.m may have further compressed the matrix by removing + elements that are numerically zero, despite being symbolically + non-zero. */ + mwIndex *ir {mxGetIr(jacobian_mx)}, *jc {mxGetJc(jacobian_mx)}; + mwIndex isrc {jc[col]}; // Index in value array of source Jacobian + for (mwIndex idest {0}; // Index in value array of destination Jacobian + idest < static_cast<mwIndex>(g1_sparse_colptr[col + 1] - g1_sparse_colptr[col]); idest++) + { + mwIndex row { + static_cast<mwIndex>(g1_sparse_rowval[idest + g1_sparse_colptr[col] - 1] - 1)}; + while (isrc < jc[col + 1] && ir[isrc] < row) + isrc++; + if (isrc < jc[col + 1] && ir[isrc] == row) + dest[idest] = mxGetPr(jacobian_mx)[isrc]; + else + dest[idest] = 0.0; + } + } +} + +void +DynamicModelBlockMatlabCaller::eval(double* resid) +{ + std::string funcname {basename + ".sparse.block.dynamic_" + std::to_string(block_num)}; + + std::array prhs {y_mx, + x_mx, + params_mx, + steady_state_mx, + g1_sparse_rowval_mx, + g1_sparse_colval_mx, + g1_sparse_colptr_mx, + T_mx}; + std::vector<mxArray*> plhs(3 + static_cast<int>(compute_jacobian)); + mxArray* exception { + mexCallMATLABWithTrap(plhs.size(), plhs.data(), prhs.size(), prhs.data(), funcname.c_str())}; + + if (exception) + { + DynamicModelCaller::setMException(exception); + return; // Avoid manipulating null pointers in plhs, see #1832 + } + + mxDestroyArray(y_mx); + if (mxIsComplex(plhs[0])) + y_mx = DynamicModelMatlabCaller::cmplxToReal<false>(plhs[0]); + else + y_mx = plhs[0]; + + mxDestroyArray(T_mx); + T_mx = plhs[1]; + + if (!mxIsDouble(plhs[2]) || mxIsSparse(plhs[2])) + { + DynamicModelCaller::setErrMsg("Residuals should be a dense array of double floats"); + return; + } + + if (mxIsComplex(plhs[2])) + plhs[2] = DynamicModelMatlabCaller::cmplxToReal<false>(plhs[2]); + + std::ranges::copy_n(mxGetPr(plhs[2]), mxGetNumberOfElements(plhs[2]), resid); + mxDestroyArray(plhs[2]); + + if (compute_jacobian) + { + if (jacobian_mx) + { + mxDestroyArray(jacobian_mx); + jacobian_mx = nullptr; + } + + if (!mxIsDouble(plhs[3]) || !mxIsSparse(plhs[3])) + { + DynamicModelCaller::setErrMsg("Jacobian should be a dense array of double floats"); + return; + } + + if (mxIsComplex(plhs[3])) + jacobian_mx = DynamicModelMatlabCaller::cmplxToReal<true>(plhs[3]); + else + jacobian_mx = plhs[3]; + + if (linear) + compute_jacobian = false; // If model is linear, no need to recompute Jacobian later + } +} diff --git a/mex/sources/perfect_foresight_problem/DynamicModelCaller.hh b/mex/sources/perfect_foresight_problem/DynamicModelCaller.hh index d210c7e767fe4ed34c746a4d9846f6bf4725f338..06009f778c43221ec6a834d9842fa732bc07c280 100644 --- a/mex/sources/perfect_foresight_problem/DynamicModelCaller.hh +++ b/mex/sources/perfect_foresight_problem/DynamicModelCaller.hh @@ -110,6 +110,8 @@ private: std::string basename; mxArray *y_mx, *x_mx, *jacobian_mx, *params_mx, *steady_state_mx, *g1_sparse_rowval_mx, *g1_sparse_colval_mx, *g1_sparse_colptr_mx; + +public: /* Given a complex matrix (of double floats), returns a sparse matrix of same size. Real elements of the original matrix are copied as-is to the new one. Complex elements are replaced by NaNs. @@ -119,7 +121,6 @@ private: template<bool sparse> static mxArray* cmplxToReal(mxArray* cmplx_mx); -public: DynamicModelMatlabCaller(std::string basename_arg, mwIndex ny, mwIndex nx, const mxArray* params_mx_arg, const mxArray* steady_state_mx_arg, const mxArray* g1_sparse_rowval_mx_arg, @@ -186,4 +187,106 @@ DynamicModelMatlabCaller::cmplxToReal(mxArray* cmplx_mx) return real_mx; } +class DynamicModelBlockCaller +{ +public: + const bool linear; + bool compute_jacobian; // Not constant, because will be changed from true to false for linear + // models after first evaluation + + DynamicModelBlockCaller(bool linear_arg, bool compute_jacobian_arg) : + linear {linear_arg}, compute_jacobian {compute_jacobian_arg} + { + } + virtual ~DynamicModelBlockCaller() = default; + + [[nodiscard]] virtual double* y() = 0; + [[nodiscard]] virtual double* x() = 0; + [[nodiscard]] virtual double* T() = 0; + /* Copy a column of the Jacobian to dest. + Only copies non-zero elements, according to g1_sparse_{rowval,colval,colptr}. */ + virtual void copy_jacobian_column(mwIndex col, double* dest) const = 0; + virtual void eval(double* resid) = 0; +}; + +class DynamicModelBlockDllCaller : public DynamicModelBlockCaller +{ +private: +#if defined(_WIN32) || defined(__CYGWIN32__) + static HINSTANCE mex; +#else + static void* mex; +#endif + using dynamic_resid_fct = void (*)(double* y, const double* x, const double* params, + const double* steady_state, double* T, double* residual); + using dynamic_g1_fct = void (*)(const double* y, const double* x, const double* params, + const double* steady_state, double* T, double* g1_v); + static dynamic_resid_fct residual_fct; + static dynamic_g1_fct g1_fct; + const double *params, *steady_state; + std::vector<double> tt, y_p, x_p, jacobian_p; + const int32_T* g1_sparse_colptr; + +public: + DynamicModelBlockDllCaller(size_t ntt, size_t mfs, mwIndex ny, mwIndex nx, + const double* params_arg, const double* steady_state_arg, + const int32_T* g1_sparse_colptr_arg, bool linear_arg, + bool compute_jacobian_arg); + [[nodiscard]] double* + y() override + { + return y_p.data(); + } + [[nodiscard]] double* + x() override + { + return x_p.data(); + } + [[nodiscard]] double* + T() override + { + return tt.data(); + } + void copy_jacobian_column(mwIndex col, double* dest) const override; + void eval(double* resid) override; + static void load_dll(const std::string& basename, int block_num); + static void unload_dll(); +}; + +class DynamicModelBlockMatlabCaller : public DynamicModelBlockCaller +{ +private: + std::string basename; + int block_num; + mxArray *y_mx, *x_mx, *T_mx, *jacobian_mx, *params_mx, *steady_state_mx, *g1_sparse_rowval_mx, + *g1_sparse_colval_mx, *g1_sparse_colptr_mx; + +public: + DynamicModelBlockMatlabCaller(std::string basename_arg, int block_num_arg, mwIndex ntt, + mwIndex ny, mwIndex nx, const mxArray* params_mx_arg, + const mxArray* steady_state_mx_arg, + const mxArray* g1_sparse_rowval_mx_arg, + const mxArray* g1_sparse_colval_mx_arg, + const mxArray* g1_sparse_colptr_mx_arg, bool linear_arg, + bool compute_jacobian_arg); + ~DynamicModelBlockMatlabCaller() override; + [[nodiscard]] double* + y() override + { + return mxGetPr(y_mx); + } + [[nodiscard]] double* + x() override + { + return mxGetPr(x_mx); + } + [[nodiscard]] double* + T() override + { + return mxGetPr(T_mx); + } + void copy_jacobian_column(mwIndex col, double* dest) const override; + void eval(double* resid) override; +}; + #endif diff --git a/mex/sources/perfect_foresight_problem/perfect_foresight_block_problem.cc b/mex/sources/perfect_foresight_problem/perfect_foresight_block_problem.cc new file mode 100644 index 0000000000000000000000000000000000000000..59d6849279082b14369d22af04192fd98d2ddfc8 --- /dev/null +++ b/mex/sources/perfect_foresight_problem/perfect_foresight_block_problem.cc @@ -0,0 +1,355 @@ +/* + * Copyright © 2019-2025 Dynare Team + * + * This file is part of Dynare. + * + * Dynare is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Dynare is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Dynare. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <algorithm> +#include <memory> +#include <string> + +#include <dynmex.h> + +#include "DynamicModelCaller.hh" + +void +mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) +{ + if (nlhs < 3 || nlhs > 4 || nrhs != 11) + mexErrMsgTxt("Must have 11 input arguments and 3 or 4 output arguments"); + bool compute_jacobian = nlhs == 4; + + // Give explicit names to input arguments + const mxArray* block_num_mx = prhs[0]; + const mxArray* y_mx = prhs[1]; + const mxArray* y0_mx = prhs[2]; + const mxArray* yT_mx = prhs[3]; + const mxArray* exo_path_mx = prhs[4]; + const mxArray* params_mx = prhs[5]; + const mxArray* steady_state_mx = prhs[6]; + const mxArray* T_mx = prhs[7]; + const mxArray* periods_mx = prhs[8]; + const mxArray* M_mx = prhs[9]; + const mxArray* options_mx = prhs[10]; + + // Extract various fields from M_ + const mxArray* basename_mx = mxGetField(M_mx, 0, "fname"); + if (!(basename_mx && mxIsChar(basename_mx) && mxGetM(basename_mx) == 1)) + mexErrMsgTxt("M_.fname should be a character string"); + std::string basename {mxArrayToString(basename_mx)}; + + const mxArray* endo_nbr_mx = mxGetField(M_mx, 0, "endo_nbr"); + if (!(endo_nbr_mx && mxIsScalar(endo_nbr_mx) && mxIsNumeric(endo_nbr_mx))) + mexErrMsgTxt("M_.endo_nbr should be a numeric scalar"); + auto ny = static_cast<mwIndex>(mxGetScalar(endo_nbr_mx)); + + const mxArray* maximum_lag_mx = mxGetField(M_mx, 0, "maximum_lag"); + if (!(maximum_lag_mx && mxIsScalar(maximum_lag_mx) && mxIsNumeric(maximum_lag_mx))) + mexErrMsgTxt("M_.maximum_lag should be a numeric scalar"); + auto maximum_lag = static_cast<mwIndex>(mxGetScalar(maximum_lag_mx)); + + // TODO: could be improved to take into account only the equations of the block + const mxArray* has_external_function_mx = mxGetField(M_mx, 0, "has_external_function"); + if (!(has_external_function_mx && mxIsLogicalScalar(has_external_function_mx))) + mexErrMsgTxt("M_.has_external_function should be a logical scalar"); + bool has_external_function = static_cast<bool>(mxGetScalar(has_external_function_mx)); + + // Extract various fields from options_ + const mxArray* use_dll_mx = mxGetField(options_mx, 0, "use_dll"); + if (!(use_dll_mx && mxIsLogicalScalar(use_dll_mx))) + mexErrMsgTxt("options_.use_dll should be a logical scalar"); + bool use_dll = static_cast<bool>(mxGetScalar(use_dll_mx)); + + const mxArray* threads_mx = mxGetField(options_mx, 0, "threads"); + if (!threads_mx) + mexErrMsgTxt("Can't find field options_.threads"); + const mxArray* num_threads_mx = mxGetField(threads_mx, 0, "perfect_foresight_block_problem"); + if (!(num_threads_mx && mxIsScalar(num_threads_mx) && mxIsNumeric(num_threads_mx))) + mexErrMsgTxt("options_.threads.perfect_foresight_block_problem should be a numeric scalar"); + // False positive: num_threads is used in OpemMP pragma + // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) + int num_threads = static_cast<int>(mxGetScalar(num_threads_mx)); + + // Check other input and map it to local variables + if (!(mxIsScalar(block_num_mx) && mxIsNumeric(block_num_mx))) + mexErrMsgTxt("block_num should be a numeric scalar"); + auto block_num = static_cast<int>(mxGetScalar(block_num_mx)); + + if (!(mxIsScalar(periods_mx) && mxIsNumeric(periods_mx))) + mexErrMsgTxt("periods should be a numeric scalar"); + auto periods = static_cast<mwIndex>(mxGetScalar(periods_mx)); + + if (!(mxIsDouble(y_mx) && mxGetM(y_mx) == static_cast<size_t>(ny) + && mxGetN(y_mx) == static_cast<size_t>(periods))) + mexErrMsgTxt("y should be a double precision matrix of size M_.endo_nbr*periods elements"); + + if (!(mxIsDouble(y0_mx) && mxGetM(y0_mx) == static_cast<size_t>(ny) && mxGetN(y0_mx) == 1)) + mexErrMsgTxt("y0 should be a double precision column-vector of M_.endo_nbr elements"); + const double* y0 = mxGetPr(y0_mx); + + if (!(mxIsDouble(yT_mx) && mxGetM(yT_mx) == static_cast<size_t>(ny) && mxGetN(yT_mx) == 1)) + mexErrMsgTxt("yT should be a double precision column-vector of M_.endo_nbr elements"); + const double* yT = mxGetPr(yT_mx); + + if (!(mxIsDouble(exo_path_mx) + && mxGetM(exo_path_mx) >= static_cast<size_t>(periods + maximum_lag))) + mexErrMsgTxt( + "exo_path should be a double precision matrix with at least periods+M_.maximum_lag rows"); + auto nx = static_cast<mwIndex>(mxGetN(exo_path_mx)); + size_t nb_row_x = mxGetM(exo_path_mx); + const double* exo_path = mxGetPr(exo_path_mx); + + const mxArray* block_structure_mx {mxGetField(M_mx, 0, "block_structure")}; + if (!(block_structure_mx && mxIsStruct(block_structure_mx))) + mexErrMsgTxt("M_.block_structure should be a structure"); + const mxArray* block_mx {mxGetField(block_structure_mx, 0, "block")}; + if (!(block_mx && mxIsStruct(block_mx))) + mexErrMsgTxt("M_.block_structure.block should be a structure"); + + const mxArray* linear_mx = mxGetField(block_mx, block_num - 1, "is_linear"); + if (!(linear_mx && mxIsLogicalScalar(linear_mx))) + mexErrMsgTxt("M_.block_structure.block(block_num).is_linear should be a logical scalar"); + bool linear = static_cast<bool>(mxGetScalar(linear_mx)); + + const mxArray* mfs_mx {mxGetField(block_mx, block_num - 1, "mfs")}; + if (!(mxIsScalar(mfs_mx) && mxIsNumeric(mfs_mx))) + mexErrMsgTxt("M_.block_structure.block(block_num).mfs should be a numeric scalar"); + auto mfs = static_cast<mwIndex>(mxGetScalar(mfs_mx)); + + const mxArray* g1_sparse_rowval_mx {mxGetField(block_mx, block_num - 1, "g1_sparse_rowval")}; + if (!(mxIsInt32(g1_sparse_rowval_mx))) + mexErrMsgTxt("M_.block_structure.block(block_num).g1_sparse_rowval should be an int32 vector"); +#if MX_HAS_INTERLEAVED_COMPLEX + const int32_T* g1_sparse_rowval {mxGetInt32s(g1_sparse_rowval_mx)}; +#else + const int32_T* g1_sparse_rowval {static_cast<const int32_T*>(mxGetData(g1_sparse_rowval_mx))}; +#endif + + const mxArray* g1_sparse_colval_mx {mxGetField(block_mx, block_num - 1, "g1_sparse_colval")}; + if (!(mxIsInt32(g1_sparse_colval_mx))) + mexErrMsgTxt("M_.block_structure.block(block_num).g1_sparse_colval should be an int32 vector"); + if (mxGetNumberOfElements(g1_sparse_colval_mx) != mxGetNumberOfElements(g1_sparse_rowval_mx)) + mexErrMsgTxt("M_.block_structure.block(block_num).g1_sparse_colval should have the same length " + "as M_.block_structure.block(block_num).g1_sparse_rowval"); + + const mxArray* g1_sparse_colptr_mx {mxGetField(block_mx, block_num - 1, "g1_sparse_colptr")}; + if (!(mxIsInt32(g1_sparse_colptr_mx) + && mxGetNumberOfElements(g1_sparse_colptr_mx) == static_cast<size_t>(3 * mfs + 1))) + mexErrMsgTxt( + ("M_.block_structure.block(block_num).g1_sparse_colptr should be an int32 vector with " + + std::to_string(3 * mfs + 1) + " elements") + .c_str()); +#if MX_HAS_INTERLEAVED_COMPLEX + const int32_T* g1_sparse_colptr {mxGetInt32s(g1_sparse_colptr_mx)}; +#else + const int32_T* g1_sparse_colptr {static_cast<const int32_T*>(mxGetData(g1_sparse_colptr_mx))}; +#endif + if (static_cast<size_t>(g1_sparse_colptr[3 * mfs]) - 1 + != mxGetNumberOfElements(g1_sparse_rowval_mx)) + mexErrMsgTxt( + "The size of M_.block_structure.block(block_num).g1_sparse_rowval is not consistent with " + "the last element of M_.block_structure.block(block_num).g1_sparse_colptr"); + + const mxArray* dyn_tmp_nbr_mx = mxGetField(block_structure_mx, 0, "dyn_tmp_nbr"); + if (!(dyn_tmp_nbr_mx && mxIsDouble(dyn_tmp_nbr_mx)) || mxIsComplex(dyn_tmp_nbr_mx) + || mxIsSparse(dyn_tmp_nbr_mx)) + mexErrMsgTxt("M_.block_structure.dyn_tmp_nbr should be a real scalar"); + auto ntt = static_cast<size_t>(mxGetScalar(dyn_tmp_nbr_mx)); + + if (!(mxIsDouble(T_mx) && mxGetM(T_mx) == static_cast<size_t>(ntt) + && mxGetN(T_mx) >= static_cast<size_t>(maximum_lag + periods))) + mexErrMsgTxt("T should be a double precision matrix with as many lines as temporary terms and " + "at least M_.maximum_lag+periods columns"); + + if (!(mxIsDouble(params_mx) && mxGetN(params_mx) == 1)) + mexErrMsgTxt("params should be a double precision column-vector"); + const double* params = mxGetPr(params_mx); + + if (!(mxIsDouble(steady_state_mx) && mxGetN(steady_state_mx) == 1)) + mexErrMsgTxt("steady_state should be a double precision column-vector"); + const double* steady_state = mxGetPr(steady_state_mx); + + // Allocate output matrices + plhs[0] = mxDuplicateArray(y_mx); + plhs[1] = mxDuplicateArray(T_mx); + plhs[2] = mxCreateDoubleMatrix(periods * mfs, 1, mxREAL); + double* TT = mxGetPr(plhs[0]); // Named TT to avoid name-clash with mwIndex T (period index) + double* y = mxGetPr(plhs[1]); + double* stacked_residual = mxGetPr(plhs[2]); + + /* Number of non-zero values in the stacked Jacobian. + Contemporaneous derivatives appear at all periods, while lag or lead + derivatives appear at all periods except one. */ + mwSize nzmax {static_cast<mwSize>((g1_sparse_colptr[3 * mfs] - 1) * (periods - 1) + + (g1_sparse_colptr[2 * mfs] - g1_sparse_colptr[mfs]))}; + + double* stacked_jacobian = nullptr; + mwIndex *ir = nullptr, *jc = nullptr; + if (compute_jacobian) + { + plhs[3] = mxCreateSparse(periods * mfs, periods * mfs, nzmax, mxREAL); + stacked_jacobian = mxGetPr(plhs[3]); + ir = mxGetIr(plhs[3]); + jc = mxGetJc(plhs[3]); + + /* Create the index vectors (IR, JC) of the sparse stacked jacobian. This + makes parallelization across periods possible when evaluating the model, + since all indices are known ex ante. */ + mwIndex k = 0; + jc[0] = 0; + for (mwIndex T = 0; T < periods; T++) + for (mwIndex j {0}; j < mfs; j++) // Column within the period (i.e. variable) + { + if (T > 0) + for (int32_T idx {g1_sparse_colptr[2 * mfs + j] - 1}; + idx < g1_sparse_colptr[2 * mfs + j + 1] - 1; idx++) + ir[k++] = (T - 1) * mfs + g1_sparse_rowval[idx] + - 1; // Derivatives w.r.t. y_{t+1} in T-1 + for (int32_T idx {g1_sparse_colptr[mfs + j] - 1}; + idx < g1_sparse_colptr[mfs + j + 1] - 1; idx++) + ir[k++] = T * mfs + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_t in T + if (T < periods - 1) + for (int32_T idx {g1_sparse_colptr[j] - 1}; idx < g1_sparse_colptr[j + 1] - 1; idx++) + ir[k++] = (T + 1) * mfs + g1_sparse_rowval[idx] + - 1; // Derivatives w.r.t. y_{t-1} in T+1 + jc[T * mfs + j + 1] = k; + } + } + + if (use_dll) + DynamicModelBlockDllCaller::load_dll(basename, block_num); + + DynamicModelCaller::error_msg.clear(); + DynamicModelCaller::error_id.clear(); + + /* Parallelize the main loop, if use_dll and no external function (to avoid + parallel calls to MATLAB) */ +#pragma omp parallel num_threads(num_threads) if (use_dll && !has_external_function) + { + // Allocate (thread-private) model evaluator (which allocates space for temporaries) + std::unique_ptr<DynamicModelBlockCaller> m; + if (use_dll) + m = std::make_unique<DynamicModelBlockDllCaller>(ntt, mfs, ny, nx, params, steady_state, + g1_sparse_colptr, linear, compute_jacobian); + else + m = std::make_unique<DynamicModelBlockMatlabCaller>( + basename, block_num, ntt, ny, nx, params_mx, steady_state_mx, g1_sparse_rowval_mx, + g1_sparse_colval_mx, g1_sparse_colptr_mx, linear, compute_jacobian); + + // Main computing loop +#pragma omp for + for (mwIndex T = 0; T < periods; T++) + { + // Fill vector of dynamic variables + if (T > 0 && T < periods - 1) + std::ranges::copy_n(y + (T - 1) * ny, 3 * ny, m->y()); + else if (T > 0) // Last simulation period + { + std::ranges::copy_n(y + (T - 1) * ny, 2 * ny, m->y()); + std::ranges::copy_n(yT, ny, m->y() + 2 * ny); + } + else if (T < periods - 1) // First simulation period + { + std::ranges::copy_n(y0, ny, m->y()); + std::ranges::copy_n(y + T * ny, 2 * ny, m->y() + ny); + } + else // Special case: periods=1 (and so T=0) + { + std::ranges::copy_n(y0, ny, m->y()); + std::ranges::copy_n(y, ny, m->y() + ny); + std::ranges::copy_n(yT, ny, m->y() + 2 * ny); + } + + // Fill exogenous + for (mwIndex j {0}; j < nx; j++) + m->x()[j] = exo_path[T + maximum_lag + nb_row_x * j]; + + // Fill temporary terms + std::ranges::copy_n(TT + (T + maximum_lag) * ntt, ntt, m->T()); + + // Compute the residual and Jacobian, and fill the stacked residual + m->eval(stacked_residual + T * mfs); + + // Copy back endogenous in plhs[0] + std::ranges::copy_n(m->y() + ny, ny, y + T * ny); + + // Copy back temporary terms in plhs[1] + std::ranges::copy_n(m->T(), ntt, TT + (T + maximum_lag) * ntt); + + if (compute_jacobian) + { + // Fill the stacked jacobian + for (mwIndex col {T > 1 ? (T - 1) * mfs : 0}; // We can't use std::max() here, because + // mwIndex is unsigned under MATLAB + col < std::min(periods * mfs, (T + 2) * mfs); col++) + { + mwIndex k = jc[col]; + while (k < jc[col + 1]) + { + if (ir[k] >= (T + 1) * mfs) + break; // Nothing to copy for this column + if (ir[k] >= T * mfs) + { + /* Within the current column, this is the first line of + the stacked Jacobian that contains elements from the + (small) Jacobian just computed, so copy the whole + column of the latter to the former. */ + m->copy_jacobian_column(col - (T - 1) * mfs, stacked_jacobian + k); + break; + } + k++; + } + } + } + } + } + + /* Mimic a try/catch using a global string, since exceptions are not allowed + to cross OpenMP boundary */ + if (!DynamicModelCaller::error_msg.empty()) + mexErrMsgIdAndTxt(DynamicModelCaller::error_id.c_str(), DynamicModelCaller::error_msg.c_str()); + + if (compute_jacobian) + { + /* The stacked jacobian so far constructed can still reference some zero + elements, because some expressions that are symbolically different from + zero may still evaluate to zero for some endogenous/parameter values. The + following code further compresses the Jacobian by removing the references + to those extra zeros. This makes a significant speed difference when + inversing the Jacobian for some large models. */ + mwIndex k_orig = 0, k_new = 0; + for (mwIndex col = 0; col < periods * mfs; col++) + { + while (k_orig < jc[col + 1]) + { + if (stacked_jacobian[k_orig] != 0.0) + { + if (k_new != k_orig) + { + stacked_jacobian[k_new] = stacked_jacobian[k_orig]; + ir[k_new] = ir[k_orig]; + } + k_new++; + } + k_orig++; + } + jc[col + 1] = k_new; + } + } + + if (use_dll) + DynamicModelBlockDllCaller::unload_dll(); +} diff --git a/tests/deterministic_simulations/ramst.mod b/tests/deterministic_simulations/ramst.mod index cd05cc619abd04bdbbc3ddd5701486e9b0d1ec2e..acd5d9f0731abfb324930d5bd6a45dc539ccb9ea 100644 --- a/tests/deterministic_simulations/ramst.mod +++ b/tests/deterministic_simulations/ramst.mod @@ -9,7 +9,7 @@ bet=0.05; aa=0.5; -model; +model(block); c + k - aa*x*k(-1)^alph - (1-delt)*k(-1); c^(-gam) - (1+bet)^(-1)*(aa*alph*x(+1)*k^(alph-1) + 1 - delt)*c(+1)^(-gam); end;