From f553c8bbc2c09bc68af33d500753de7b310bbff3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= <sebastien@dynare.org>
Date: Tue, 8 Oct 2024 15:16:54 -0400
Subject: [PATCH] WIP: Better preconditionner for stack_solve_algo={2,3} (GMRES
 + BiCGStab solvers)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement preconditionners as in UMFITER and ITERSTACK:
– For UMFITER, on first iteration, do a full LU decomposition, and use it as a preconditionner
  in further iterations (similar to UMFITER in TROLL).
– For ITERSTACK, build a preconditionner based on small LU decomposition
  repeated several times across the stack

TODO:
– better understand how the tolerance of GMRES and BiCGStab influence the
  result (hardcoded to 1e-12, needed for GIMF in the block-decomposed version);
  also put that value in options_ and maybe give it an interface?
– implement ITERSTACK for the block-decomposed version
– do the same in bytecode
– leave ILU as an alternative preconditionner?
– handle the error flag of gmres and bicgstab?
– remove the persistent variables in sim1.m
– turn various settings as user-accessible options

[skip ci]
---
 matlab/optimization/solve_one_boundary.m      |  74 +++---------
 matlab/perfect-foresight-models/sim1.m        |  61 ++++++++--
 .../solve_two_boundaries_stacked.m            | 113 ++++--------------
 3 files changed, 91 insertions(+), 157 deletions(-)

diff --git a/matlab/optimization/solve_one_boundary.m b/matlab/optimization/solve_one_boundary.m
index e9177db7d..17158a2f5 100644
--- a/matlab/optimization/solve_one_boundary.m
+++ b/matlab/optimization/solve_one_boundary.m
@@ -40,7 +40,7 @@ function [y, T, success, max_res, iter] = solve_one_boundary(fh, y, x, params, s
 % ALGORITHM
 %   Newton with LU or GMRES or BicGstab for dynamic block
 
-% Copyright © 1996-2023 Dynare Team
+% Copyright © 1996-2024 Dynare Team
 %
 % This file is part of Dynare.
 %
@@ -59,8 +59,7 @@ function [y, T, success, max_res, iter] = solve_one_boundary(fh, y, x, params, s
 
 Blck_size=size(y_index_eq,2);
 correcting_factor=0.01;
-ilu_setup.type='crout';
-ilu_setup.droptol=1e-10;
+itertol = 1e-12;
 max_resa=1e100;
 lambda = 1; % Length of Newton step
 reduced = 0;
@@ -203,63 +202,28 @@ for it_=start:incr:finish
                 else
                     y(y_index_eq) = ya;
                 end
-            elseif (stack_solve_algo==2 && is_dynamic) || (options_.solve_algo==7 && ~is_dynamic)
-                flag1=1;
+            elseif (is_dynamic && ismember(stack_solve_algo, [2 3])) || (~is_dynamic && ismember(options_.solve_algo, [7 8]))
                 if verbose && ~is_dynamic
-                    disp('steady: GMRES ')
-                end
-                while flag1>0
-                    [L1, U1]=ilu(g1,ilu_setup);
-                    [dx,flag1] = gmres(g1,-r,Blck_size,1e-6,Blck_size,L1,U1);
-                    if  flag1>0 || reduced
-                        if verbose
-                            if flag1==1
-                                disp(['Error in simul: No convergence inside GMRES after ' num2str(iter,'%6d') ' iterations, in block' num2str(Block_Num,'%3d')])
-                            elseif(flag1==2)
-                                disp(['Error in simul: Preconditioner is ill-conditioned, in block' num2str(Block_Num,'%3d')])
-                            elseif(flag1==3)
-                                disp(['Error in simul: GMRES stagnated (Two consecutive iterates were the same.), in block' num2str(Block_Num,'%3d')])
-                            end
-                        end
-                        ilu_setup.droptol = ilu_setup.droptol/10;
-                        reduced = 0;
+                    if options_.solve_algo == 7
+                        disp('steady: GMRES ')
                     else
-                        ya = ya + lambda*dx;
-                        if is_dynamic
-                            y(y_index_eq, it_) = ya;
-                        else
-                            y(y_index_eq) = ya';
-                        end
+                        disp('steady: BiCGStab')
                     end
                 end
-            elseif (stack_solve_algo==3 && is_dynamic) || (options_.solve_algo==8 && ~is_dynamic)
-                flag1=1;
-                if verbose && ~is_dynamic
-                    disp('steady: BiCGStab')
+                if iter == 0
+                    [L,U,P,Q] = lu(g1);
+                    zc = L\(P*r);
+                    zb = U\zc;
+                elseif (stack_solve_algo==2 && is_dynamic) || (options_.solve_algo==7 && ~is_dynamic)
+                    zb = gmres(P*g1a*Q, P*r, [], itertol, [], L, U);
+                else
+                    zb = bicgstab(P*g1a*Q, P*r, [], itertol, [], L, U);
                 end
-                while flag1>0
-                    [L1, U1]=ilu(g1,ilu_setup);
-                    [dx,flag1] = bicgstab(g1,-r,1e-6,Blck_size,L1,U1);
-                    if flag1>0 || reduced
-                        if verbose
-                            if(flag1==1)
-                                disp(['Error in simul: No convergence inside BiCGStab after ' num2str(iter,'%6d') ' iterations, in block' num2str(Block_Num,'%3d')])
-                            elseif(flag1==2)
-                                disp(['Error in simul: Preconditioner is ill-conditioned, in block' num2str(Block_Num,'%3d')])
-                            elseif(flag1==3)
-                                disp(['Error in simul: BiCGStab stagnated (Two consecutive iterates were the same.), in block' num2str(Block_Num,'%3d')])
-                            end
-                        end
-                        ilu_setup.droptol = ilu_setup.droptol/10;
-                        reduced = 0;
-                    else
-                        ya = ya + lambda*dx;
-                        if is_dynamic
-                            y(y_index_eq, it_) = ya;
-                        else
-                            y(y_index_eq) = ya';
-                        end
-                    end
+                ya = ya - lambda*Q*zb;
+                if is_dynamic
+                    y(y_index_eq, it_) = ya;
+                else
+                    y(y_index_eq) = ya';
                 end
             else
                 if is_dynamic
diff --git a/matlab/perfect-foresight-models/sim1.m b/matlab/perfect-foresight-models/sim1.m
index 28c4f49e3..087af2783 100644
--- a/matlab/perfect-foresight-models/sim1.m
+++ b/matlab/perfect-foresight-models/sim1.m
@@ -124,7 +124,7 @@ while ~(converged || iter > options_.simul.maxit)
         if options_.simul.robust_lin_solve
             dy = -lin_solve_robust(A, res, verbose, options_);
         else
-            dy = -lin_solve(A, res, verbose, options_);
+            dy = -lin_solve(A, res, verbose, options_, iter == 1);
         end
         if any(isnan(dy)) || any(isinf(dy))
             if verbose
@@ -181,7 +181,10 @@ if verbose
     skipline();
 end
 
-function x = lin_solve(A, b, verbose, options_)
+function x = lin_solve(A, b, verbose, options_, first_iter)
+
+persistent L U P Q
+
 if norm(b) < sqrt(eps) % then x = 0 is a solution
     x = 0;
     return
@@ -190,16 +193,52 @@ end
 if options_.stack_solve_algo == 0
     x = A\b;
 else
-    ilu_setup.type = 'ilutp';
-    ilu_setup.droptol = 1e-10;
-    [L1, U1] = ilu(A, ilu_setup);
-    if options_.stack_solve_algo == 2
-        x = gmres(A, b, [], [], [], L1, U1);
-    elseif options_.stack_solve_algo == 3
-        x = bicgstab(A, b, [], [], L1, U1);
-    else
-        error('sim1: invalid value for options_.stack_solve_algo')
+    preconditionner = 'umfiter'; % TODO: should be made an option
+
+    if strcmp(preconditionner, 'umfiter') && first_iter
+        [L,U,P,Q] = lu(A);
+        z = L\(P*b);
+        y = U\z;
+    elseif strcmp(preconditionner, 'iterstack')
+        %% The following two settings are similar to TROLL’s eponymous parameter for ITERSTACK
+        %% TODO: should be made options
+        maxlu = 20000;
+        relu = 0.5;
+        if size(A, 1) < maxlu
+            error('Too small problem') % TODO: better recovery
+        end
+        ny = size(A, 1) / options_.periods; % TODO: Should be passed as argument
+        if ny > maxlu
+            error('Too large model. Either increase maxlu or switch to another algrithm.')
+        end
+        lusize = floor(maxlu / ny) * ny;
+        luidx = floor((floor(size(A, 1) / lusize)-1) * relu) * lusize + (1:lusize);
+        [L1,U1,P1,Q1] = lu(A(luidx,luidx));
+        eyek = speye(floor(size(A, 1) / lusize));
+        L = kron(eyek, L1);
+        U = kron(eyek, U1);
+        P = kron(eyek, P1);
+        Q = kron(eyek, Q1);
+        r = rem(floor(size(A, 1)), lusize);
+        if r > 0 % Compute additional smaller LU for remainder, if any
+            [L2,U2,P2,Q2] = lu(A(end-r+1:end,end-r+1:end));
+            L = blkdiag(L, L2);
+            U = blkdiag(U, U2);
+            P = blkdiag(P, P2);
+            Q = blkdiag(Q, Q2);
+        end
+    end
+    if ~(strcmp(preconditionner, 'umfiter') && first_iter)
+        itertol = 1e-12;
+        if options_.stack_solve_algo == 2
+            y = gmres(P*A*Q, P*b, [], itertol, [], L, U);
+        elseif options_.stack_solve_algo == 3
+            y = bicgstab(P*A*Q, P*b, itertol, [], L, U);
+        else
+            error('sim1: invalid value for options_.stack_solve_algo')
+        end
     end
+    x = Q*y;
 end
 x(~isfinite(x)) = 0;
 relres = norm(b - A*x) / norm(b);
diff --git a/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m b/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m
index 3fdee24fa..eeb781861 100644
--- a/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m
+++ b/matlab/perfect-foresight-models/solve_two_boundaries_stacked.m
@@ -57,8 +57,7 @@ verbose = options_.verbosity;
 cvg=false;
 iter=0;
 correcting_factor=0.01;
-ilu_setup.droptol=1e-10;
-ilu_setup.type = 'ilutp';
+itertol = 1e-12;
 max_resa=1e100;
 lambda = 1; % Length of Newton step (unused for stack_solve_algo=4)
 reduced = 0;
@@ -159,98 +158,30 @@ while ~(cvg || iter > options_.simul.maxit)
             dx = g1a\b- ya;
             ya = ya + lambda*dx;
             y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods);
-        elseif stack_solve_algo==2
-            flag1=1;
-            while flag1>0
-                if preconditioner==2
-                    [L1, U1]=ilu(g1a,ilu_setup);
-                elseif preconditioner==3
-                    Size = Blck_size;
-                    gss1 =  g1a(Size + 1: 2*Size,Size + 1: 2*Size) + g1a(Size + 1: 2*Size,2*Size+1: 3*Size);
-                    [L1, U1]=lu(gss1);
-                    L(1:Size,1:Size) = L1;
-                    U(1:Size,1:Size) = U1;
-                    gss2 = g1a(Size + 1: 2*Size,1: Size) + g1a(Size + 1: 2*Size,Size+1: 2*Size) + g1a(Size + 1: 2*Size,2*Size+1: 3*Size);
-                    [L2, U2]=lu(gss2);
-                    L(Size+1:(periods-1)*Size,Size+1:(periods-1)*Size) = kron(eye(periods-2), L2);
-                    U(Size+1:(periods-1)*Size,Size+1:(periods-1)*Size) = kron(eye(periods-2), U2);
-                    gss2 = g1a(Size + 1: 2*Size,1: Size) + g1a(Size + 1: 2*Size,Size+1: 2*Size);
-                    [L3, U3]=lu(gss2);
-                    L((periods-1)*Size+1:periods*Size,(periods-1)*Size+1:periods*Size) = L3;
-                    U((periods-1)*Size+1:periods*Size,(periods-1)*Size+1:periods*Size) = U3;
-                    L1 = L;
-                    U1 = U;
-                elseif preconditioner==4
-                    Size = Blck_size;
-                    gss1 =  g1a(1: 3*Size, 1: 3*Size);
-                    [L, U] = lu(gss1);
-                    L1 = kron(eye(ceil(periods/3)),L);
-                    U1 = kron(eye(ceil(periods/3)),U);
-                    L1 = L1(1:periods * Size, 1:periods * Size);
-                    U1 = U1(1:periods * Size, 1:periods * Size);
-                end
-                [za,flag1] = gmres(g1a,b,Blck_size,1e-6,Blck_size*periods,L1,U1);
-                if (flag1>0 || reduced)
-                    if verbose
-                        if flag1==1
-                            disp(['Error in simul: No convergence inside GMRES after ' num2str(periods*10,'%6d') ' iterations, in block ' num2str(Blck_size,'%3d')]);
-                        elseif flag1==2
-                            disp(['Error in simul: Preconditioner is ill-conditioned, in block ' num2str(Blck_size,'%3d')]);
-                        elseif flag1==3
-                            disp(['Error in simul: GMRES stagnated (Two consecutive iterates were the same.), in block ' num2str(Blck_size,'%3d')]);
-                        end
-                    end
-                    ilu_setup.droptol = ilu_setup.droptol/10;
-                    reduced = 0;
-                else
-                    dx = za - ya;
-                    ya = ya + lambda*dx;
-                    y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods);
-                end
+        elseif ismember(stack_solve_algo, [2, 3])
+            if iter == 0
+                [L,U,P,Q] = lu(g1a);
+                zc = L\(P*b);
+                zb = U\zc;
+            elseif stack_solve_algo == 2
+                zb = gmres(P*g1a*Q, P*b, [], itertol, [], L, U);
+            elseif stack_solve_algo == 3
+                zb = bicgstab(P*g1a*Q, P*b, itertol, [], L, U);
             end
+            dx = Q*zb - ya;
+            ya = ya + lambda*dx;
+            y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods);
         elseif stack_solve_algo==3
-            flag1=1;
-            while flag1>0
-                if preconditioner==2
-                    [L1, U1]=ilu(g1a,ilu_setup);
-                    [za,flag1] = bicgstab(g1a,b,1e-7,Blck_size*periods,L1,U1);
-                elseif preconditioner==3
-                    Size = Blck_size;
-                    gss0 = g1a(Size + 1: 2*Size,1: Size) + g1a(Size + 1: 2*Size,Size+1: 2*Size) + g1a(Size + 1: 2*Size,2*Size+1: 3*Size);
-                    [L1, U1]=lu(gss0);
-                    P1 = eye(size(gss0));
-                    Q1 = eye(size(gss0));
-                    L = kron(eye(periods),L1);
-                    U = kron(eye(periods),U1);
-                    P = kron(eye(periods),P1);
-                    Q = kron(eye(periods),Q1);
-                    [za,flag1] = bicgstab1(g1a,b,1e-7,Blck_size*periods,L,U, P, Q);
-                else
-                    Size = Blck_size;
-                    gss0 = g1a(Size + 1: 2*Size,1: Size) + g1a(Size + 1: 2*Size,Size+1: 2*Size) + g1a(Size + 1: 2*Size,2*Size+1: 3*Size);
-                    [L1, U1]=lu(gss0);
-                    L1 = kron(eye(periods),L1);
-                    U1 = kron(eye(periods),U1);
-                    [za,flag1] = bicgstab(g1a,b,1e-7,Blck_size*periods,L1,U1);
-                end
-                if flag1>0 || reduced
-                    if verbose
-                        if flag1==1
-                            disp(['Error in simul: No convergence inside BICGSTAB after ' num2str(periods*10,'%6d') ' iterations, in block ' num2str(Blck_size,'%3d')]);
-                        elseif flag1==2
-                            disp(['Error in simul: Preconditioner is ill-conditioned, in block ' num2str(Blck_size,'%3d')]);
-                        elseif flag1==3
-                            disp(['Error in simul: GMRES stagnated (Two consecutive iterates were the same.), in block ' num2str(Blck_size,'%3d')]);
-                        end
-                    end
-                    ilu_setup.droptol = ilu_setup.droptol/10;
-                    reduced = 0;
-                else
-                    dx = za - ya;
-                    ya = ya + lambda*dx;
-                    y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods);
-                end
+            if iter == 0
+                [L,U,P,Q] = lu(g1a);
+                zc = L\(P*b);
+                zb = U\zc;
+            else
             end
+            za = Q*zb;
+            dx = za - ya;
+            ya = ya + lambda*dx;
+            y(y_index, y_kmin+(1:periods))=reshape(ya',length(y_index),periods);
         elseif stack_solve_algo==4
             stpmx = 100 ;
             stpmax = stpmx*max([sqrt(ya'*ya);size(y_index,2)]);
-- 
GitLab