diff --git a/doc/manual/source/the-model-file.rst b/doc/manual/source/the-model-file.rst index f2519885572a7c4755ec95db12095b55ad328c77..583a9b5b8e8df658019ce01093ee9c0348d42be0 100644 --- a/doc/manual/source/the-model-file.rst +++ b/doc/manual/source/the-model-file.rst @@ -7121,6 +7121,18 @@ observed variables. Stopping criteria. Default: ``1e-5`` for numerical derivatives, ``1e-7`` for analytic derivatives. + ``'robust'`` + + Trigger more robust but computationally more expensive line search. Default: ``false``. + + ``'TolGstep'`` + + Tolerance parameter used for tuning gradient step. Default: same value as ``TolFun``. + + ``'TolGstepRel'`` + + Parameter used for tuning gradient step, governing the tolerance relative to the functions value. Default: not triggered. + ``'verbosity'`` Controls verbosity of display during diff --git a/matlab/default_option_values.m b/matlab/default_option_values.m index b3a500c26e1880424f36c56c67725f160c33fc59..b143da47ff07110b43c584ed743286c338b7c78a 100644 --- a/matlab/default_option_values.m +++ b/matlab/default_option_values.m @@ -561,6 +561,9 @@ options_.csminwel=csminwel; %newrat optimization routine newrat.hess=1; % dynare numerical hessian +newrat.robust=false; +newrat.tolerance.gstep = NaN; +newrat.tolerance.gstep_rel = NaN; newrat.tolerance.f=1e-5; newrat.tolerance.f_analytic=1e-7; newrat.maxiter=1000; diff --git a/matlab/optimization/dynare_minimize_objective.m b/matlab/optimization/dynare_minimize_objective.m index 93737a85cf7e1422f0b3846c7640b94582452e0b..18115ded7dca894dd39d88762c67b047eac66a54 100644 --- a/matlab/optimization/dynare_minimize_objective.m +++ b/matlab/optimization/dynare_minimize_objective.m @@ -288,6 +288,9 @@ switch minimizer_algorithm newratflag = options_.newrat.hess; %default end nit=options_.newrat.maxiter; + robust = options_.newrat.robust; + gstep_crit = options_.newrat.tolerance.gstep; + gstep_crit_rel = options_.newrat.tolerance.gstep_rel; Verbose = options_.newrat.verbosity; Save_files = options_.newrat.Save_files; if ~isempty(options_.optim_opt) @@ -303,8 +306,14 @@ switch minimizer_algorithm else newratflag=flag; end + case 'robust' + robust = options_list{i,2}; case 'TolFun' crit = options_list{i,2}; + case 'TolGstep' + gstep_crit = options_list{i,2}; + case 'TolGstepRel' + gstep_crit_rel = options_list{i,2}; case 'verbosity' Verbose = options_list{i,2}; case 'SaveFiles' @@ -318,9 +327,14 @@ switch minimizer_algorithm Save_files = 0; Verbose = 0; end + if isnan(gstep_crit) + gstep_crit = crit; + end hess_info.gstep=options_.gstep; - hess_info.htol = 1.e-4; + hess_info.htol = gstep_crit; + hess_info.htol_rel = gstep_crit_rel; hess_info.h1=options_.gradient_epsilon*ones(n_params,1); + hess_info.robust=robust; % here we force 7th input argument (flagg) to be 0, since outer product % gradient Hessian is handled in dynare_estimation_1 [opt_par_values,hessian_mat,gg,fval,invhess,new_rat_hess_info] = newrat(objective_function,start_par_value,bounds,analytic_grad,crit,nit,0,Verbose,Save_files,hess_info,prior_information.p2,options_.gradient_epsilon,parameter_names,varargin{:}); %hessian_mat is the plain outer product gradient Hessian diff --git a/matlab/optimization/mr_gstep.m b/matlab/optimization/mr_gstep.m index 6a403ea7ade5de125f820b585b247e90cd5e2c56..fd83ea50bfaf85ca268af9ff109df9cd8251166d 100644 --- a/matlab/optimization/mr_gstep.m +++ b/matlab/optimization/mr_gstep.m @@ -1,5 +1,5 @@ -function [f0, x, ig] = mr_gstep(h1,x,bounds,func0,penalty,htol0,Verbose,Save_files,gradient_epsilon,parameter_names,varargin) -% [f0, x, ig] = mr_gstep(h1,x,bounds,func0,penalty,htol0,Verbose,Save_files,gradient_epsilon,parameter_names,varargin) +function [f0, x, ig] = mr_gstep(h1,x,bounds,func0,penalty,htol0,Verbose,Save_files,gradient_epsilon,parameter_names,robust,varargin) +% [f0, x, ig] = mr_gstep(h1,x,bounds,func0,penalty,htol0,Verbose,Save_files,gradient_epsilon,parameter_names,robust,varargin) % % Gibbs type step in optimisation % @@ -12,7 +12,7 @@ function [f0, x, ig] = mr_gstep(h1,x,bounds,func0,penalty,htol0,Verbose,Save_fil % varargin{7} --> BoundsInfo % varargin{8} --> oo_ -% Copyright © 2006-2020 Dynare Team +% Copyright © 2006-2023 Dynare Team % % This file is part of Dynare. % @@ -73,15 +73,86 @@ while i<n gg(i)=(f1(i)'-f_1(i)')./(2.*h1(i)); hh(i) = 1/max(1.e-9,abs( (f1(i)+f_1(i)-2*f0)./(h1(i)*h1(i)) )); if gg(i)*(hh(i)*gg(i))/2 > htol(i) - [f0, x, ~, ~] = csminit1(func0,x,penalty,f0,gg,0,diag(hh),Verbose,varargin{:}); + [ff, xx,~,retcode] = csminit1(func0,x,penalty,f0,gg,0,diag(hh),Verbose,varargin{:}); + if retcode && robust + if abs(x(i))<1.e-6 + xa=transpose(linspace(x(i)/2, sign(x(i))*1.e-6*3/2, 7)); + else + xa=transpose(linspace(x(i)/2, x(i)*3/2, 7)); + end + fa=NaN(7,1); + for k=1:7 + xh1(i)=xa(k); + fa(k,1) = penalty_objective_function(xh1,func0,penalty,varargin{:}); + end + b=[ones(7,1) xa xa.*xa./2]\fa; + gg(i)=x(i)*b(3)+b(2); + hh(i)=1/b(3); + [ff2, xx2, ~, ~] = csminit1(func0,x,penalty,f0,gg,0,diag(hh),Verbose,varargin{:}); + if ff2<ff + ff=ff2; + xx=xx2; + end + if min(fa)<ff + [ff, im]=min(fa); + xx(i)=xa(im); + end + end ig(i)=1; + if robust + if not(isequal(xx , check_bounds(xx,bounds))) + xx = check_bounds(xx,bounds); + if xx(i)<x(i) + % lower bound + xx(i) = min(xx(i)+h1(i), 0.5*(xx(i)+x(i))); + else + % upper bound + xx(i) = max(xx(i)-h1(i), 0.5*(xx(i)+x(i))); + end + [ff,exit_flag]=penalty_objective_function(xx,func0,penalty,varargin{:}); + if exit_flag~=1 + disp_verbose('last step exited with bad status!',Verbose) + elseif ff<f0 + f0=ff; + x=xx; + end + else + % check improvement wrt predicted one + if abs(f0-ff) < abs(gg(i)*(hh(i)*gg(i))/2/100) || abs(x(i)-xx(i))<1.e-10 + [ff1, xx1] = csminit1(func0,x,penalty,f0,-gg,0,diag(hh),Verbose,varargin{:}); + if not(isequal(xx1 , check_bounds(xx1,bounds))) + xx1 = check_bounds(xx1,bounds); + if xx1(i)<x(i) + % lower bound + xx1(i) = min(xx1(i)+h1(i), 0.5*(xx1(i)+x(i))); + else + % upper bound + xx1(i) = max(xx1(i)-h1(i), 0.5*(xx1(i)+x(i))); + end + [ff1,exit_flag]=penalty_objective_function(xx1,func0,penalty,varargin{:}); + if exit_flag~=1 + disp_verbose('last step exited with bad status!',Verbose) + end + end + if ff1<ff + ff=ff1; + xx=xx1; + end + end + f0=ff; + x=xx; + end + else + f0=ff; + x=xx; + x = check_bounds(x,bounds); + end if Verbose - fprintf(['Done for param %s = %8.4f\n'],parameter_names{i},x(i)) + fprintf(['Done for param %s = %8.4f; f = %8.4f\n'],parameter_names{i},x(i),f0) end end xh1=x; end - x = check_bounds(x,bounds); if Save_files save('gstep.mat','x','h1','f0') end @@ -97,10 +168,10 @@ function x = check_bounds(x,bounds) inx = find(x>=bounds(:,2)); if ~isempty(inx) - x(inx) = bounds(inx,2)-eps; + x(inx) = bounds(inx,2)-1.e-10; end inx = find(x<=bounds(:,1)); if ~isempty(inx) - x(inx) = bounds(inx,1)+eps; + x(inx) = bounds(inx,1)+1.e-10; end diff --git a/matlab/optimization/mr_hessian.m b/matlab/optimization/mr_hessian.m index f13bdd63f122eaa9533f4303b34707e71e24d18f..9c1ee6f7a16f11b031f6ca588d4e94071bf03e80 100644 --- a/matlab/optimization/mr_hessian.m +++ b/matlab/optimization/mr_hessian.m @@ -24,7 +24,7 @@ function [hessian_mat, gg, htol1, ihh, hh_mat0, hh1, hess_info] = mr_hessian(x,f % derivatives % - hess_info structure storing the step sizes for % computation of Hessian -% - bounds prior bounds of parameters +% - bounds prior bounds of parameters % - prior_std prior standard devation of parameters (can be NaN) % - Save_files indicator whether files should be saved % - varargin other inputs @@ -78,11 +78,16 @@ else end -hess_info.h1 = min(hess_info.h1,0.5.*hmax); +hess_info.h1 = min(hess_info.h1,0.9.*hmax); if htol0<hess_info.htol hess_info.htol=htol0; end +if not(isnan(hess_info.htol_rel)) + htol1=hess_info.htol; + hess_info.htol=abs(hess_info.htol_rel*f0); +end + xh1=x; f1=zeros(size(f0,1),n); f_1=f1; @@ -106,22 +111,39 @@ while i<n ic=0; icount = 0; h0=hess_info.h1(i); - while (abs(dx(it))<0.5*hess_info.htol || abs(dx(it))>(3*hess_info.htol)) && icount<10 && ic==0 + istoobig=false; + istoosmall=false; + give_it_up=false; + while (abs(dx(it))<0.5*hess_info.htol || abs(dx(it))>(3*hess_info.htol)) && icount<10 && ic==0 && hmax(i)>2*1.e-10 && not(give_it_up) icount=icount+1; + istoobig(it)=false; + istoosmall(it)=false; if abs(dx(it))<0.5*hess_info.htol + istoosmall(it)=true; + if hess_info.h1(i)==0.9*hmax(i)% || hess_info.h1(i)==0.3*abs(x(i)) + give_it_up=true; + end if abs(dx(it)) ~= 0 - hess_info.h1(i)=min(max(1.e-10,0.3*abs(x(i))), 0.9*hess_info.htol/abs(dx(it))*hess_info.h1(i)); + % htmp=min(max(1.e-10,0.3*abs(x(i))), 0.9*hess_info.htol/abs(dx(it))*hess_info.h1(i)); + htmp=0.9*hess_info.htol/abs(dx(it))*hess_info.h1(i); else - hess_info.h1(i)=2.1*hess_info.h1(i); + htmp=2.1*hess_info.h1(i); + end + htmp = min(htmp,0.9*hmax(i)); + if any(h0(istoobig(1:it))) %&& htmp>=min(h0(istoobig(1:it))) + htmp = 0.5*(min(h0(istoobig))+max(h0(istoosmall))); end - hess_info.h1(i) = min(hess_info.h1(i),0.5*hmax(i)); - hess_info.h1(i) = max(hess_info.h1(i),1.e-10); + hess_info.h1(i) = max(htmp,1.e-10); xh1(i)=x(i)+hess_info.h1(i); [fx,~,ffx]=penalty_objective_function(xh1,func,penalty,varargin{:}); end if abs(dx(it))>(3*hess_info.htol) - hess_info.h1(i)= hess_info.htol/abs(dx(it))*hess_info.h1(i); - hess_info.h1(i) = max(hess_info.h1(i),1e-10); + istoobig(it)=true; + htmp= hess_info.htol/abs(dx(it))*hess_info.h1(i); + if any(h0(istoosmall(1:it))) %&& htmp<=max(h0(istoosmall(1:it))) + htmp = 0.5*(min(h0(istoobig))+max(h0(istoosmall))); + end + hess_info.h1(i) = max(htmp,1e-10); xh1(i)=x(i)+hess_info.h1(i); [fx,~,ffx]=penalty_objective_function(xh1,func,penalty,varargin{:}); iter=0; @@ -136,11 +158,27 @@ while i<n it=it+1; dx(it)=(fx-f0); h0(it)=hess_info.h1(i); - if (hess_info.h1(i)<1.e-12*min(1,h2(i)) && hess_info.h1(i)<0.5*hmax(i)) + if (hess_info.h1(i)<1.e-12*min(1,h2(i)) && hess_info.h1(i)<0.9*hmax(i)) ic=1; hcheck=1; end end + if icount == 10 || hess_info.h1(i)==1.e-10 + istoobig(it)=false; + istoosmall(it)=false; + if abs(dx(it))<0.5*hess_info.htol + istoosmall(it)=true; + end + if abs(dx(it))>(3*hess_info.htol) + istoobig(it)=true; + end + if any(istoobig) && (istoosmall(it) || istoobig(it)) + % always better to be wrong from above, to avoid numerical noise + [ddx, ij]=min(dx(istoobig)); + fx=f0+ddx; + hess_info.h1(i)=h0(ij); + end + end f1(:,i)=fx; if outer_product_gradient if any(isnan(ffx)) || isempty(ffx) @@ -207,8 +245,8 @@ if outer_product_gradient dum = (f1(:,i)+f_1(:,i)-2*f0)./(hess_info.h1(i)*h_1(i)); hessian_mat(:,(i-1)*n+i)=dum; if any(dum<=eps) - hessian_mat(dum<=eps,(i-1)*n+i)=max(eps, gg(i)^2); - end + hessian_mat(dum<=eps,(i-1)*n+i)=max(eps, gg(i)^2); + end end end @@ -216,7 +254,7 @@ if outer_product_gradient hh_mat=gga'*gga; % rescaled outer product hessian hh_mat0=ggh'*ggh; % outer product hessian A=diag(2.*hess_info.h1); % rescaling matrix - % igg=inv(hh_mat); % inverted rescaled outer product hessian + % igg=inv(hh_mat); % inverted rescaled outer product hessian ihh=A'*(hh_mat\A); % inverted outer product hessian (based on rescaling) if hflag>0 && min(eig(reshape(hessian_mat,n,n)))>0 hh0 = A*reshape(hessian_mat,n,n)*A'; %rescaled second order derivatives @@ -224,7 +262,7 @@ if outer_product_gradient sd0=sqrt(diag(hh0)); %rescaled 'standard errors' using second order derivatives sd=sqrt(diag(hh_mat)); %rescaled 'standard errors' using outer product hh_mat=hh_mat./(sd*sd').*(sd0*sd0'); %rescaled inverse outer product with 'true' std's - ihh=A'*(hh_mat\A); % update inverted outer product hessian with 'true' std's + ihh=A'*(hh_mat\A); % update inverted outer product hessian with 'true' std's sd=sqrt(diag(ihh)); %standard errors sdh=sqrt(1./diag(hh)); %diagonal standard errors for j=1:length(sd) @@ -238,12 +276,12 @@ if outer_product_gradient igg=inv_A'*ihh*inv_A; % inverted rescaled outer product hessian with modified std's % hh_mat=inv(igg); % outer product rescaled hessian with modified std's hh_mat0=inv_A'/igg*inv_A; % outer product hessian with modified std's - % sd0=sqrt(1./diag(hh0)); %rescaled 'standard errors' using second order derivatives - % sd=sqrt(diag(igg)); %rescaled 'standard errors' using outer product - % igg=igg./(sd*sd').*(sd0*sd0'); %rescaled inverse outer product with 'true' std's - % hh_mat=inv(igg); % rescaled outer product hessian with 'true' std's - % ihh=A'*igg*A; % inverted outer product hessian - % hh_mat0=inv(A)'*hh_mat*inv(A); % outer product hessian with 'true' std's + % sd0=sqrt(1./diag(hh0)); %rescaled 'standard errors' using second order derivatives + % sd=sqrt(diag(igg)); %rescaled 'standard errors' using outer product + % igg=igg./(sd*sd').*(sd0*sd0'); %rescaled inverse outer product with 'true' std's + % hh_mat=inv(igg); % rescaled outer product hessian with 'true' std's + % ihh=A'*igg*A; % inverted outer product hessian + % hh_mat0=inv(A)'*hh_mat*inv(A); % outer product hessian with 'true' std's end if hflag<2 hessian_mat=hh_mat0(:); @@ -265,4 +303,6 @@ else hh1 = []; end -htol1=hhtol; +if isnan(hess_info.htol_rel) + htol1=hhtol; +end diff --git a/matlab/optimization/newrat.m b/matlab/optimization/newrat.m index d1d75f3b379b0857d3cf53640bd95d1ca283850d..d146af37d9fe45a3cf890ac0a2883db8996bc2f6 100644 --- a/matlab/optimization/newrat.m +++ b/matlab/optimization/newrat.m @@ -69,7 +69,7 @@ icount=0; nx=length(x); xparam1=x; %ftol0=1.e-6; -htol_base = max(1.e-7, ftol0); +htol_base = max(1.e-7, hess_info.htol); flagit=0; % mode of computation of hessian in each iteration; hard-coded outer-product of gradients as it performed best in tests ftol=ftol0; gtol=1.e-3; @@ -93,6 +93,7 @@ end outer_product_gradient=1; if isempty(hh) + penalty=fval0; [dum, gg, htol0, igg, hhg, h1, hess_info]=mr_hessian(x,func0,penalty,flagit,htol,hess_info,bounds,prior_std,Save_files,varargin{:}); if isempty(dum) outer_product_gradient=0; @@ -174,9 +175,25 @@ while norm(gg)>gtol && check==0 && jit<nit iggx(ig_pos,ig_pos) = inv( hhx(ig_pos,ig_pos) ); [~,x0] = csminit1(func0,x0,penalty,fval,ggx,0,iggx,Verbose,varargin{:}); end - x0 = check_bounds(x0,bounds); - [fvala, x0, ig] = mr_gstep(h1,x0,bounds,func0,penalty,htol0,Verbose,Save_files,gradient_epsilon, parameter_names,varargin{:}); - x0 = check_bounds(x0,bounds); + if not(isequal(x0 , check_bounds(x0,bounds))) + x0 = check_bounds(x0,bounds); + [fvala,exit_flag]=penalty_objective_function(x0,func0,penalty,varargin{:}); + if exit_flag==1 + penalty=fvala; + else + disp_verbose('last step exited with bad status!',Verbose) + end + end + [fvala, x0, ig] = mr_gstep(h1,x0,bounds,func0,penalty,htol0,Verbose,Save_files,gradient_epsilon, parameter_names, hess_info.robust, varargin{:}); + if not(isequal(x0 , check_bounds(x0,bounds))) + x0 = check_bounds(x0,bounds); + [fvala,exit_flag]=penalty_objective_function(x0,func0,penalty,varargin{:}); + if exit_flag==1 + penalty=fvala; + else + disp_verbose('last step exited with bad status!',Verbose) + end + end if Save_files nig=[nig ig]; end @@ -216,7 +233,7 @@ while norm(gg)>gtol && check==0 && jit<nit if flagit==2 hh=hh0; elseif flagg>0 - [dum, gg, htol0, igg, hhg, h1, hess_info]=mr_hessian(xparam1,func0,penalty,flagg,ftol0,hess_info,bounds,prior_std,Save_files,varargin{:}); + [dum, gg, htol0, igg, hhg, h1, hess_info]=mr_hessian(xparam1,func0,penalty,flagg,htol_base,hess_info,bounds,prior_std,Save_files,varargin{:}); if flagg==2 hh = reshape(dum,nx,nx); ee=eig(hh); diff --git a/tests/optimizers/fs2000_5.mod b/tests/optimizers/fs2000_5.mod index f7f9d22daabca4be8a8b3ffcd51d154474571680..5025db5296f319cb9f092e8f469b038003ccf5a6 100644 --- a/tests/optimizers/fs2000_5.mod +++ b/tests/optimizers/fs2000_5.mod @@ -2,3 +2,4 @@ estimation(mode_compute=5,silent_optimizer,order=1, datafile='../fs2000/fsdat_simul', nobs=192, mh_replic=0); estimation(mode_compute=5,silent_optimizer,order=1, datafile='../fs2000/fsdat_simul', nobs=192, mh_replic=0, optim=('Hessian',2)); +estimation(mode_compute=5,silent_optimizer,order=1, datafile='../fs2000/fsdat_simul', nobs=192, mh_replic=0, optim=('Hessian',1,'robust',true));