Commit 5c709e47 authored by Stéphane Adjemian (Charybdis)'s avatar Stéphane Adjemian (Charybdis)
Browse files

Changed set_dynare_threads.m. The number of threads in parallelized mex files...

Changed set_dynare_threads.m. The number of threads in parallelized mex files to be used if dynare is built with the openmp flag
(--with-openmp) is not passed by an environment variable anymore. The function set_dynare_threads changes the default value of the
number of threads (default is 1) in the options_.threads structure. Changed calls to sparse_hessian_times_B_kronecker_C and
A_times_B_kronecker_C dlls accordingly.
parent 37f14e9b
......@@ -541,7 +541,7 @@ zx=[zx; zeros(M_.exo_nbr,np);zeros(M_.exo_det_nbr,np)];
zu=[zu; eye(M_.exo_nbr);zeros(M_.exo_det_nbr,M_.exo_nbr)];
[nrzx,nczx] = size(zx);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx,options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
rhs = -rhs;
......@@ -601,7 +601,7 @@ hu = dr.ghu(nstatic+1:nstatic+npred,:);
%kk = kk(1:npred,1:npred);
%rhs = -hessian*kron(zx,zu)-f1*dr.ghxx(end-nyf+1:end,kk(:))*kron(hx(1:npred,:),hu(1:npred,:));
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx,zu);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx,zu,options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
nyf1 = sum(kstate(:,2) == M_.maximum_endo_lag+2);
......@@ -610,7 +610,7 @@ hu1 = [hu;zeros(np-npred,M_.exo_nbr)];
[nrhx,nchx] = size(hx);
[nrhu1,nchu1] = size(hu1);
[err, abcOut] = A_times_B_kronecker_C(dr.ghxx,hx,hu1);
[err, abcOut] = A_times_B_kronecker_C(dr.ghxx,hx,hu1,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
B1 = B*abcOut;
rhs = -[rhs; zeros(n-M_.endo_nbr,size(rhs,2))]-B1;
......@@ -624,10 +624,10 @@ dr.ghxu = A\rhs;
kk = reshape([1:np*np],np,np);
kk = kk(1:npred,1:npred);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zu);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zu,options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
[err, B1] = A_times_B_kronecker_C(B*dr.ghxx,hu1);
[err, B1] = A_times_B_kronecker_C(B*dr.ghxx,hu1,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
rhs = -[rhs; zeros(n-M_.endo_nbr,size(rhs,2))]-B1;
......@@ -672,7 +672,7 @@ for i=1:M_.maximum_endo_lead
[junk,k3a,k3] = ...
find(M_.lead_lag_incidenceordered(M_.maximum_endo_lag+j+1,:));
nk3a = length(k3a);
[err, B1] = sparse_hessian_times_B_kronecker_C(hessian(:,kh(k3,k3)),gu(k3a,:));
[err, B1] = sparse_hessian_times_B_kronecker_C(hessian(:,kh(k3,k3)),gu(k3a,:),options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
RHS = RHS + jacobia_(:,k2)*guu(k2a,:)+B1;
end
......@@ -687,9 +687,9 @@ for i=1:M_.maximum_endo_lead
kk = find(kstate(:,2) == M_.maximum_endo_lag+i+1);
gu = dr.ghx*Gu;
[nrGu,ncGu] = size(Gu);
[err, G1] = A_times_B_kronecker_C(dr.ghxx,Gu);
[err, G1] = A_times_B_kronecker_C(dr.ghxx,Gu,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
[err, G2] = A_times_B_kronecker_C(hxx,Gu);
[err, G2] = A_times_B_kronecker_C(hxx,Gu,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
guu = dr.ghx*Guu+G1;
Gu = hx*Gu;
......
......@@ -282,7 +282,7 @@ zx=[zx; zeros(M_.exo_nbr,np);zeros(M_.exo_det_nbr,np)];
zu=[zu; eye(M_.exo_nbr);zeros(M_.exo_det_nbr,M_.exo_nbr)];
[nrzx,nczx] = size(zx);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx,options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
rhs = -rhs;
......@@ -342,7 +342,7 @@ hu = dr.ghu(nstatic+1:nstatic+npred,:);
%kk = kk(1:npred,1:npred);
%rhs = -hessian*kron(zx,zu)-f1*dr.ghxx(end-nyf+1:end,kk(:))*kron(hx(1:npred,:),hu(1:npred,:));
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx,zu);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zx,zu,options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
nyf1 = sum(kstate(:,2) == M_.maximum_endo_lag+2);
......@@ -351,7 +351,7 @@ hu1 = [hu;zeros(np-npred,M_.exo_nbr)];
[nrhx,nchx] = size(hx);
[nrhu1,nchu1] = size(hu1);
[err, abcOut] = A_times_B_kronecker_C(dr.ghxx,hx,hu1);
[err, abcOut] = A_times_B_kronecker_C(dr.ghxx,hx,hu1,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
B1 = B*abcOut;
rhs = -[rhs; zeros(n-M_.endo_nbr,size(rhs,2))]-B1;
......@@ -365,10 +365,10 @@ dr.ghxu = A\rhs;
kk = reshape([1:np*np],np,np);
kk = kk(1:npred,1:npred);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zu);
[err, rhs] = sparse_hessian_times_B_kronecker_C(hessian,zu,options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
[err, B1] = A_times_B_kronecker_C(B*dr.ghxx,hu1);
[err, B1] = A_times_B_kronecker_C(B*dr.ghxx,hu1,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
rhs = -[rhs; zeros(n-M_.endo_nbr,size(rhs,2))]-B1;
......@@ -413,7 +413,7 @@ for i=1:M_.maximum_endo_lead
[junk,k3a,k3] = ...
find(M_.lead_lag_incidenceordered(M_.maximum_endo_lag+j+1,:));
nk3a = length(k3a);
[err, B1] = sparse_hessian_times_B_kronecker_C(hessian(:,kh(k3,k3)),gu(k3a,:));
[err, B1] = sparse_hessian_times_B_kronecker_C(hessian(:,kh(k3,k3)),gu(k3a,:),options_.threads.kronecker.sparse_hessian_times_B_kronecker_C);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
RHS = RHS + jacobia_(:,k2)*guu(k2a,:)+B1;
end
......@@ -428,9 +428,9 @@ for i=1:M_.maximum_endo_lead
kk = find(kstate(:,2) == M_.maximum_endo_lag+i+1);
gu = dr.ghx*Gu;
[nrGu,ncGu] = size(Gu);
[err, G1] = A_times_B_kronecker_C(dr.ghxx,Gu);
[err, G1] = A_times_B_kronecker_C(dr.ghxx,Gu,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
[err, G2] = A_times_B_kronecker_C(hxx,Gu);
[err, G2] = A_times_B_kronecker_C(hxx,Gu,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
guu = dr.ghx*Guu+G1;
Gu = hx*Gu;
......
......@@ -152,8 +152,6 @@ for i=1:number_of_mex_files
matlab_path = path;
end
end
%% Initialize number of threads
set_dynare_threads(1);
%% Test if valid mex files are available, if a mex file is not available
%% a matlab version of the routine is included in the path.
disp(' ')
......
......@@ -53,6 +53,10 @@ options_.solve_tolx = eps^(2/3);
options_.solve_maxit = 500;
options_.deterministic_simulation_initialization = 0;
% Default number of threads for parallelized mex files.
options_.threads.kronecker.A_times_B_kronecker_C = 1;
options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = 1;
% steady state file
if exist([M_.fname '_steadystate.m'],'file')
options_.steadystate_flag = 1;
......
function [err, D] = A_times_B_kronecker_C(A,B,C)
function [err, D] = A_times_B_kronecker_C(A,B,C,fake)
%function [err, D] = A_times_B_kronecker_C(A,B,C)
% Computes A * kron(B,C).
%
......@@ -36,12 +36,12 @@ function [err, D] = A_times_B_kronecker_C(A,B,C)
% Chek number of inputs and outputs.
if nargin>3 || nargin<2 || nargout~=2
error('A_times_B_kronecker_C takes 2 or 3 input arguments and provides exactly 2 output arguments.')
error('A_times_B_kronecker_C takes 3 or 4 input arguments and provides exactly 2 output arguments.')
end
% Get & check dimensions. Initialization of the output matrix.
[mA,nA] = size(A);
[mB,nB] = size(B);
if nargin == 3
if nargin == 4
[mC,nC] = size(C);
if mB*mC ~= nA
error('Input dimension error!')
......@@ -57,7 +57,7 @@ else
end
% Computational part.
if loop
if nargin == 3
if nargin == 4
k1 = 1;
for i1=1:nB
for i2=1:nC
......@@ -75,7 +75,7 @@ if loop
end
end
else
if nargin == 3
if nargin == 4
D = A * kron(B,C);
else
D = A * kron(B,B);
......
function [err, D] = sparse_hessian_times_B_kronecker_C(A,B,C)
function [err, D] = sparse_hessian_times_B_kronecker_C(A,B,C,fake)
%function [err, D] = sparse_hessian_times_B_kronecker_C(A,B,C)
% Computes A * kron(B,C) where A is a sparse matrix.
%
......@@ -39,10 +39,10 @@ if nargout~=2
end
switch nargin
case 4
D = A_times_B_kronecker_C(A,B,C,fake);
case 3
D = A_times_B_kronecker_C(A,B,C);
case 2
D = A_times_B_kronecker_C(A,B,B);
D = A_times_B_kronecker_C(A,B,B,fake);
otherwise
error('Two or Three input arguments required!')
end
......
function set_dynare_threads(n)
function set_dynare_threads(mexname,n)
% This function sets the number of threads used by some MEX files when compiled
% with OpenMP support, i.e with --enable-openmp is given to configure.
% As of 2010-09-27, only A_times_B_kronecker_C and
% sparse_hessian_times_B_kronecker_C support this.
% with OpenMP support (i.e with --enable-openmp is given to configure) or any
% other parallel library.
%
% INPUTS
% o n [integer] scalar specifying the number of threads to be used.
% o mexname [string] Name of the mex file.
% o n [integer] scalar specifying the number of threads to be used.
%
% OUTPUTS
% none.
% Copyright (C) 2009-2010 Dynare Team
%
......@@ -23,5 +26,23 @@ function set_dynare_threads(n)
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
global options_
if ~ischar(mexname)
error('set_dynare_threads:: First argument has to be a string!')
end
if ~isint(n)
error('set_dynare_threads:: Second argument has to be an integer!')
end
setenv('DYNARE_NUM_THREADS',int2str(n));
switch mexname
case 'A_times_B_kronecker_C'
options_.threads.kronecker.A_times_B_kronecker_C = n;
case 'sparse_hessian_times_B_kronecker_C'
options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = n;
otherwise
message = [ mexname ' is not a known parallel mex file.' ];
message_id = 'Dynare:Threads:UnknownParallelMex';
warning(message_id,message);
end
\ No newline at end of file
......@@ -86,11 +86,11 @@ else
yhat2 = y_(dr.order_var(k2),i-1)-dr.ys(dr.order_var(k2));
epsilon = ex_(i-1,:)';
[err, abcOut1] = A_times_B_kronecker_C(.5*dr.ghxx,yhat1);
[err, abcOut1] = A_times_B_kronecker_C(.5*dr.ghxx,yhat1,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
[err, abcOut2] = A_times_B_kronecker_C(.5*dr.ghuu,epsilon);
[err, abcOut2] = A_times_B_kronecker_C(.5*dr.ghuu,epsilon,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
[err, abcOut3] = A_times_B_kronecker_C(dr.ghxu,yhat1,epsilon);
[err, abcOut3] = A_times_B_kronecker_C(dr.ghxu,yhat1,epsilon,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
y_(dr.order_var,i) = constant + dr.ghx*yhat2 + dr.ghu*epsilon ...
......@@ -102,11 +102,11 @@ else
yhat = y_(dr.order_var(k2),i-1)-dr.ys(dr.order_var(k2));
epsilon = ex_(i-1,:)';
[err, abcOut1] = A_times_B_kronecker_C(.5*dr.ghxx,yhat);
[err, abcOut1] = A_times_B_kronecker_C(.5*dr.ghxx,yhat,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
[err, abcOut2] = A_times_B_kronecker_C(.5*dr.ghuu,epsilon);
[err, abcOut2] = A_times_B_kronecker_C(.5*dr.ghuu,epsilon,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
[err, abcOut3] = A_times_B_kronecker_C(dr.ghxu,yhat,epsilon);
[err, abcOut3] = A_times_B_kronecker_C(dr.ghxu,yhat,epsilon,options_.threads.kronecker.A_times_B_kronecker_C);
mexErrCheck('A_times_B_kronecker_C', err);
y_(dr.order_var,i) = constant + dr.ghx*yhat + dr.ghu*epsilon ...
......
......@@ -31,12 +31,14 @@
# include <omp.h>
#endif
#define DEBUG_OMP 0
void
full_A_times_kronecker_B_C(double *A, double *B, double *C, double *D,
blas_int mA, blas_int nA, blas_int mB, blas_int nB, blas_int mC, blas_int nC)
blas_int mA, blas_int nA, blas_int mB, blas_int nB, blas_int mC, blas_int nC, int number_of_threads)
{
#if USE_OMP
# pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# pragma omp parallel for num_threads(number_of_threads)
for (long int colD = 0; colD < nB*nC; colD++)
{
# if DEBUG_OMP
......@@ -77,10 +79,10 @@ full_A_times_kronecker_B_C(double *A, double *B, double *C, double *D,
}
void
full_A_times_kronecker_B_B(double *A, double *B, double *D, blas_int mA, blas_int nA, blas_int mB, blas_int nB)
full_A_times_kronecker_B_B(double *A, double *B, double *D, blas_int mA, blas_int nA, blas_int mB, blas_int nB, int number_of_threads)
{
#if USE_OMP
# pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# pragma omp parallel for num_threads(number_of_threads)
for (long int colD = 0; colD < nB*nB; colD++)
{
# if DEBUG_OMP
......@@ -124,8 +126,8 @@ void
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
// Check input and output:
if (nrhs > 3 || nrhs < 2 || nlhs != 2)
DYN_MEX_FUNC_ERR_MSG_TXT("A_times_B_kronecker_C takes 2 or 3 input arguments and provides exactly 2 output arguments.");
if (nrhs > 4 || nrhs < 3 || nlhs != 2)
DYN_MEX_FUNC_ERR_MSG_TXT("A_times_B_kronecker_C takes 3 or 4 input arguments and provides exactly 2 output arguments.");
// Get & Check dimensions (columns and rows):
mwSize mA, nA, mB, nB, mC, nC;
......@@ -133,7 +135,7 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
nA = mxGetN(prhs[0]);
mB = mxGetM(prhs[1]);
nB = mxGetN(prhs[1]);
if (nrhs == 3) // A*kron(B,C) is to be computed.
if (nrhs == 4) // A*kron(B,C) is to be computed.
{
mC = mxGetM(prhs[2]);
nC = mxGetN(prhs[2]);
......@@ -147,15 +149,18 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
}
// Get input matrices:
double *B, *C, *A;
int numthreads;
A = mxGetPr(prhs[0]);
B = mxGetPr(prhs[1]);
if (nrhs == 3)
numthreads = (int) mxGetScalar(prhs[2]);
if (nrhs == 4)
{
C = mxGetPr(prhs[2]);
numthreads = (int) mxGetScalar(prhs[3]);
}
// Initialization of the ouput:
double *D;
if (nrhs == 3)
if (nrhs == 4)
{
plhs[1] = mxCreateDoubleMatrix(mA, nB*nC, mxREAL);
}
......@@ -165,13 +170,13 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
}
D = mxGetPr(plhs[1]);
// Computational part:
if (nrhs == 2)
if (nrhs == 3)
{
full_A_times_kronecker_B_B(A, B, &D[0], (int) mA, (int) nA, (int) mB, (int) nB);
full_A_times_kronecker_B_B(A, B, &D[0], (int) mA, (int) nA, (int) mB, (int) nB, numthreads);
}
else
{
full_A_times_kronecker_B_C(A, B, C, &D[0], (int) mA, (int) nA, (int) mB, (int) nB, (int) mC, (int) nC);
full_A_times_kronecker_B_C(A, B, C, &D[0], (int) mA, (int) nA, (int) mB, (int) nB, (int) mC, (int) nC, numthreads);
}
plhs[0] = mxCreateDoubleScalar(0);
}
......@@ -31,9 +31,11 @@
# include <omp.h>
#endif
#define DEBUG_OMP 0
void
sparse_hessian_times_B_kronecker_B(mwIndex *isparseA, mwIndex *jsparseA, double *vsparseA,
double *B, double *D, mwSize mA, mwSize nA, mwSize mB, mwSize nB)
double *B, double *D, mwSize mA, mwSize nA, mwSize mB, mwSize nB, int number_of_threads)
{
/*
** Loop over the columns of kron(B,B) (or of the result matrix D).
......@@ -41,7 +43,7 @@ sparse_hessian_times_B_kronecker_B(mwIndex *isparseA, mwIndex *jsparseA, double
** symmetric pattern of the hessian matrix.
*/
#if USE_OMP
# pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# pragma omp parallel for num_threads(number_of_threads)
#endif
for (int j1B = 0; j1B < nB; j1B++)
{
......@@ -90,13 +92,13 @@ sparse_hessian_times_B_kronecker_B(mwIndex *isparseA, mwIndex *jsparseA, double
void
sparse_hessian_times_B_kronecker_C(mwIndex *isparseA, mwIndex *jsparseA, double *vsparseA,
double *B, double *C, double *D,
mwSize mA, mwSize nA, mwSize mB, mwSize nB, mwSize mC, mwSize nC)
mwSize mA, mwSize nA, mwSize mB, mwSize nB, mwSize mC, mwSize nC, int number_of_threads)
{
/*
** Loop over the columns of kron(B,B) (or of the result matrix D).
*/
#if USE_OMP
# pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
# pragma omp parallel for num_threads(number_of_threads)
#endif
for (long int jj = 0; jj < nB*nC; jj++) // column of kron(B,C) index.
{
......@@ -141,8 +143,8 @@ void
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
// Check input and output:
if ((nrhs > 3) || (nrhs < 2) || nlhs!=2)
DYN_MEX_FUNC_ERR_MSG_TXT("sparse_hessian_times_B_kronecker_C takes 2 or 3 input arguments and provides exactly 2 output arguments.");
if ((nrhs > 4) || (nrhs < 3) || nlhs!=2)
DYN_MEX_FUNC_ERR_MSG_TXT("sparse_hessian_times_B_kronecker_C takes 3 or 4 input arguments and provides exactly 2 output argument.");
if (!mxIsSparse(prhs[0]))
DYN_MEX_FUNC_ERR_MSG_TXT("sparse_hessian_times_B_kronecker_C: First input must be a sparse (dynare) hessian matrix.");
......@@ -153,7 +155,7 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
nA = mxGetN(prhs[0]);
mB = mxGetM(prhs[1]);
nB = mxGetN(prhs[1]);
if (nrhs == 3) // A*kron(B,C) is to be computed.
if (nrhs == 4) // A*kron(B,C) is to be computed.
{
mC = mxGetM(prhs[2]);
nC = mxGetN(prhs[2]);
......@@ -167,10 +169,13 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
}
// Get input matrices:
double *B, *C;
int numthreads;
B = mxGetPr(prhs[1]);
if (nrhs == 3)
numthreads = (int) mxGetScalar(prhs[2]);
if (nrhs == 4)
{
C = mxGetPr(prhs[2]);
numthreads = (int) mxGetScalar(prhs[3]);
}
// Sparse (dynare) hessian matrix.
mwIndex *isparseA = (mwIndex *) mxGetIr(prhs[0]);
......@@ -178,7 +183,7 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
double *vsparseA = mxGetPr(prhs[0]);
// Initialization of the ouput:
double *D;
if (nrhs == 3)
if (nrhs == 4)
{
plhs[1] = mxCreateDoubleMatrix(mA, nB*nC, mxREAL);
}
......@@ -188,13 +193,13 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
}
D = mxGetPr(plhs[1]);
// Computational part:
if (nrhs == 2)
if (nrhs == 3)
{
sparse_hessian_times_B_kronecker_B(isparseA, jsparseA, vsparseA, B, D, mA, nA, mB, nB);
sparse_hessian_times_B_kronecker_B(isparseA, jsparseA, vsparseA, B, D, mA, nA, mB, nB,numthreads);
}
else
{
sparse_hessian_times_B_kronecker_C(isparseA, jsparseA, vsparseA, B, C, D, mA, nA, mB, nB, mC, nC);
sparse_hessian_times_B_kronecker_C(isparseA, jsparseA, vsparseA, B, C, D, mA, nA, mB, nB, mC, nC, numthreads);
}
plhs[0] = mxCreateDoubleScalar(0);
}
function test_kron(test)
% Copyright (C) 2007-2009 Dynare Team
function info = test_kron(test,number_of_threads)
% Copyright (C) 2007-2010 Dynare Team
%
% This file is part of Dynare.
%
......@@ -18,12 +17,17 @@ function test_kron(test)
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
if ~nargin
test = 3;
test = 2;
end
info = 1;
if test == 1
test1_1 = 1;
test1_2 = 1;
disp('')
disp('I''m building the test problem...')
tic
percentage_of_non_zero_elements = 10e-4;
NumberOfVariables = 549;%100;
NumberOfEquations = 256;%50
......@@ -43,24 +47,25 @@ if test == 1
end
A = sparse(A);
B = randn(NumberOfVariables,NumberOfColsInB);
disp('Done!')
toc
disp('')
disp('Computation of A*kron(B,B) with the mex file (v1):')
tic
D1 = sparse_hessian_times_B_kronecker_C(A,B);
[err, D1] = sparse_hessian_times_B_kronecker_C(A,B,number_of_threads);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
toc
disp('')
disp('Computation of A*kron(B,B) with the mex file (v2):')
tic
D2 = sparse_hessian_times_B_kronecker_C(A,B,B);
tic
[err, D2] = sparse_hessian_times_B_kronecker_C(A,B,B,number_of_threads);
mexErrCheck('sparse_hessian_times_B_kronecker_C', err);
toc
size(D1)
disp('');
disp(['Difference between D1 and D2 = ' num2str(max(max(abs(D1-D2))))]);
return
if max(max(abs(D1-D2)))>1e-10
test1_1=0;
end
disp(' ')
disp('Computation of A*kron(B,B) with two nested loops:')
tic
......@@ -75,91 +80,111 @@ if test == 1
toc
disp('');
disp(['Difference between D1 and D3 = ' num2str(max(max(abs(D1-D3))))]);
disp(' ')
disp('Direct computation of A*kron(B,B):')
tic
try
D4 = A*kron(B,B);
notest = 0;
catch
notest = 1;
disp('Out of memory')
if max(max(abs(D1-D3)))>1e-10
test1_2=0;
end
toc
if ~notest
disp('');
disp(['Difference between D1 and D4 = ' num2str(max(max(abs(D1-D4))))]);
% $$$ FOR THE DIMENSIONS CONSIDERED HERE THIS PART WILL RESULT IN A OUT OF MEMORY ERROR.
% $$$ disp(' ')
% $$$ disp('Direct computation of A*kron(B,B):')
% $$$ tic
% $$$ try
% $$$ D4 = A*kron(B,B);
% $$$ notest = 0;
% $$$ catch
% $$$ notest = 1;
% $$$ disp('Out of memory')
% $$$ end
% $$$ toc
% $$$ if ~notest
% $$$ disp('');
% $$$ disp(['Difference between D1 and D4 = ' num2str(max(max(abs(D1-D4))))]);
% $$$ end
if ~(test1_1 && test1_2)
info = 0;
end
end
if test > 1
if test==2
test2_1 = 1;
test2_2 = 1;
disp('I''m loading a real life problem...')
hessian = 0;
load nash_matrices;
disp('Done!')
r2 = size(zx,1);
c2 = size(zx,2);
r1 = size(hessian,1);
c1 = size(hessian,2);
disp(' ')
disp(['Percentage of non zero elements in the hessian matrix = ' num2str(100*nnz(hessian)/(c1*c2)) '%'])
disp('')
disp(' ')
disp('Computation of A*kron(B,B) with the mex file (v1):')
tic
D1 = sparse_hessian_times_B_kronecker_C(hessian,zx);
tic