diff --git a/dynare++/kord/approximation.hweb b/dynare++/kord/approximation.hweb
index 137511c219d4d40e4dc9043a1d76bdaaa40d68e2..82e2236e256bb188020e495fc5f00239bf25fbdc 100644
--- a/dynare++/kord/approximation.hweb
+++ b/dynare++/kord/approximation.hweb
@@ -145,6 +145,10 @@ public:@;
 
 	void walkStochSteady();
 	TwoDMatrix* calcYCov() const;
+	const FGSContainer* get_rule_ders() const
+	      	{@+ return rule_ders;@+}	   
+	const FGSContainer* get_rule_ders_ss() const
+	      	{@+ return rule_ders;@+}	   
 protected:@;
 	void approxAtSteady();
 	void calcStochShift(Vector& out, double at_sigma) const;
diff --git a/dynare++/kord/decision_rule.hweb b/dynare++/kord/decision_rule.hweb
index bc96bb130138c40e3237c4b91ef593ed71156538..bad14cc1fdedb7dff06f87c7681cc61501bde65f 100644
--- a/dynare++/kord/decision_rule.hweb
+++ b/dynare++/kord/decision_rule.hweb
@@ -300,7 +300,7 @@ TwoDMatrix* simulate(emethod em, int np, const Vector& ystart,
 	@<initialize vectors and subvectors for simulation@>;
 	@<perform the first step of simulation@>;
 	@<perform all other steps of simulations@>;
-	@<add the steady state to all columns of |res|@>;
+	@<add the steady state to columns of |res|@>;
 	return res;
 }
 
@@ -327,10 +327,11 @@ evaluate the polynomial.
 	eval(em, out, dyu);
 
 @ Also clear. If the result at some period is not finite, we pad the
-rest of the matrix with zeros and return immediatelly.
+rest of the matrix with zeros.
 
 @<perform all other steps of simulations@>=
-	for (int i = 1; i < np; i++) {
+        int i=1;
+	while (i < np) {
 		ConstVector ym(*res, i-1);
 		ConstVector dym(ym, ypart.nstat, ypart.nys());
 		dy = dym;
@@ -342,14 +343,16 @@ rest of the matrix with zeros and return immediatelly.
 				TwoDMatrix rest(*res, i+1, np-i-1);
 				rest.zeros();
 			}
-			return res;
+			break;
 		}
+		i++;
 	}
 
-@ Even clearer.
-@<add the steady state to all columns of |res|@>=
-	for (int i = 0; i < res->ncols(); i++) {
-		Vector col(*res, i);
+@ Even clearer. We add the steady state to the numbers computed above and leave the padded columns to zero.
+
+@<add the steady state to columns of |res|@>=
+	for (int j = 0; j < i; j++) {
+		Vector col(*res, j);
 		col.add(1.0, ysteady);
 	}
 
diff --git a/matlab/cycle_reduction.m b/matlab/cycle_reduction.m
index 572c52e613585115e67371b314a83d8708eefd7a..04fca27a746eac1ae6a9a45eda18c618782b4703 100644
--- a/matlab/cycle_reduction.m
+++ b/matlab/cycle_reduction.m
@@ -63,38 +63,49 @@ function [X, info] = cycle_reduction(A0, A1, A2, cvg_tol, ch)
 max_it = 300;
 it = 0;
 info = 0;
-crit = 1+cvg_tol;
-A_0 = A1;
+X = [];
+crit = Inf;
 A0_0 = A0;
-A1_0 = A1;
-A2_0 = A2;
+Ahat1 = A1;
+if (nargin == 5 && ~isempty(ch) )
+    A1_0 = A1;
+    A2_0 = A2;
+end
 n = length(A0);
-id = 1:n;
+id0 = 1:n;
+id2 = id0+n;
 
-while crit > cvg_tol && it < max_it;
-    tmp = [A2_0; A0_0]/A1_0;
-    TMP = tmp*A0_0;
-    A2_1 = - tmp(id,:)* A2_0;
-    A_1 = A_0 - TMP(id,:);
-    A1_1 = A1_0 - tmp(n+id,:) * A2_0 - TMP(id,:);
-    crit = sum(sum(abs(A0_0)));
-    A_0 = A_1;
-    A0_0 = -TMP(n+id,:);
-    A1_0 = A1_1;
-    A2_0 = A2_1;
+cont = 1;
+while cont 
+    tmp = ([A0; A2]/A1)*[A0 A2];
+    A1 = A1 - tmp(id0,id2) - tmp(id2,id0);
+    A0 = -tmp(id0,id0);
+    A2 = -tmp(id2,id2);
+    Ahat1 = Ahat1 -tmp(id2,id0);
+    crit = norm(A0,1);
+    if crit < cvg_tol
+        % keep iterating until condition on A2 is met
+        if norm(A2,1) < cvg_tol
+            cont = 0;
+        end
+    elseif isnan(crit) || it == max_it
+        if crit < cvg_tol
+            info(1) = 4;
+            info(2) = log(norm(A2,1));
+        else
+            info(1) = 3;
+            info(2) = log(norm(A1,1));
+        end
+        return
+    end        
     it = it + 1;
 end
 
-if it==max_it
-    disp(['convergence not achieved after ' int2str(it) ' iterations']);
-    info = 1;
-end
-
-X = -A_0\A0;
+X = -Ahat1\A0_0;
 
 if (nargin == 5 && ~isempty(ch) )
     %check the solution
-    res = A0 + A1 * X + A2 * X * X;
+    res = A0_0 + A1_0 * X + A2_0 * X * X;
     if (sum(sum(abs(res))) > cvg_tol)
         disp(['the norm residual of the residu ' num2str(res) ' compare to the tolerance criterion ' num2str(cvg_tol)]);
     end
diff --git a/matlab/dyn_first_order_solver.m b/matlab/dyn_first_order_solver.m
index 004adaa76fc39761ffd5d2a207cf55b2292950e7..8ad529c9e473a7b4f4e2190793af3d9576d6eb15 100644
--- a/matlab/dyn_first_order_solver.m
+++ b/matlab/dyn_first_order_solver.m
@@ -175,31 +175,37 @@ B(:,cols_b) = aa(:,index_c);  % Jacobian matrix for contemporaneous endogeneous
 C = aa(:,index_p);  % Jacobain matrix for led endogeneous variables
 
 info = 0;
-info1 = 1;
 if task ~= 1 && (DynareOptions.dr_cycle_reduction || DynareOptions.dr_logarithmic_reduction)
     if n_current < DynareModel.endo_nbr
         if DynareOptions.dr_cycle_reduction
             error(['The cycle reduction algorithme can''t be used when the ' ...
-               'coefficient matrix for current variables is singular'])
+               'coefficient matrix for current variables isn''t invertible'])
         elseif DynareOptions.dr_logarithmic_reduction
             error(['The logarithmic reduction algorithme can''t be used when the ' ...
-                   'coefficient matrix for current variables is singular'])
+                   'coefficient matrix for current variables isn''t invertible'])
         end
-    end        
+    end  
+    if DynareOptions.gpu
+        gpuArray(A1);
+        gpuArray(B1);
+        gpuArray(C1);
+    end
     A1 = [aa(row_indx,index_m ) zeros(ndynamic,nfwrd)];
     B1 = [aa(row_indx,index_0m) aa(row_indx,index_0p) ];
     C1 = [zeros(ndynamic,npred) aa(row_indx,index_p)];
     if DynareOptions.dr_cycle_reduction == 1
-        [ghx, info1] = cycle_reduction(A1, B1, C1, DynareOptions.dr_cycle_reduction_tol);
+        [ghx, info] = cycle_reduction(A1, B1, C1, DynareOptions.dr_cycle_reduction_tol);
     else
-        [ghx, info1] = logarithmic_reduction(C1, B1, A1, DynareOptions.dr_logarithmic_reduction_tol, DynareOptions.dr_logarithmic_reduction_maxiter);
+        [ghx, info] = logarithmic_reduction(C1, B1, A1, DynareOptions.dr_logarithmic_reduction_tol, DynareOptions.dr_logarithmic_reduction_maxiter);
+    end
+    if info
+        % cycle_reduction or logarithmic redution failed and set info
+        return
     end
     ghx = ghx(:,index_m);
     hx = ghx(1:npred+nboth,:);
     gx = ghx(1+npred:end,:);
-end
-
-if info1 == 1
+else
     D = zeros(ndynamic+nboth);
     E = zeros(ndynamic+nboth);
     D(row_indx_de_1,index_d1) = aa(row_indx,index_d);
@@ -209,11 +215,11 @@ if info1 == 1
     
     E = [-aa(row_indx,[index_m index_0p])  ; [zeros(nboth,nboth+npred) eye(nboth,nboth+nfwrd) ] ];
 
-    [err, ss, tt, w, sdim, dr.eigval, info2] = mjdgges(E,D,DynareOptions.qz_criterium);
+    [err, ss, tt, w, sdim, dr.eigval, info1] = mjdgges(E,D,DynareOptions.qz_criterium);
     mexErrCheck('mjdgges', err);
 
-    if info2
-        if info2 == -30
+    if info1
+        if info1 == -30
             % one eigenvalue is close to 0/0
             info(1) = 7;
         else
@@ -255,20 +261,24 @@ if info1 == 1
                                                  % derivatives with respect to dynamic state variables
                                                  % forward variables
     Z = w';
-    Z11t = Z(indx_stable_root,    indx_stable_root)';
+    Z11 = Z(indx_stable_root,    indx_stable_root);
     Z21  = Z(indx_explosive_root, indx_stable_root);
     Z22  = Z(indx_explosive_root, indx_explosive_root);
-    if ~isscalar(Z22) && (condest(Z22) > 1e9)
+    [minus_gx,rc] = linsolve(Z22,Z21);
+    if rc < 1e-9
+        % Z22 is near singular
         info(1) = 5;
-        info(2) = condest(Z22);
+        info(2) = -log(rc);
         return;
-    else
-        gx = - Z22 \ Z21;
     end
+    gx  = -minus_gx;
     % predetermined variables
-    hx =  Z11t * inv(tt(indx_stable_root, indx_stable_root)) * ss(indx_stable_root, indx_stable_root) * inv(Z11t);
+    opts.UT = true;
+    opts.TRANSA = true;
+    hx1 = linsolve(tt(indx_stable_root, indx_stable_root),Z11,opts)';
+    hx2 = linsolve(Z11,ss(indx_stable_root, indx_stable_root)')';
+    hx =  hx1*hx2;
     ghx = [hx(k1,:); gx(k2(nboth+1:end),:)];
-
 end
 
 dr.gx = gx;
diff --git a/matlab/dynare_config.m b/matlab/dynare_config.m
index 98ed0c0bbec9565735c547e9e791d20b4657b9e5..0644f8e7f7afefb437902c9eb232c1b8066bbe3e 100644
--- a/matlab/dynare_config.m
+++ b/matlab/dynare_config.m
@@ -93,6 +93,11 @@ if (exist('OCTAVE_VERSION') && ~user_has_octave_forge_package('statistics')) ...
     addpath([dynareroot '/missing/nanmean'])
 end
 
+% linsolve is missing in Octave
+if (exist('OCTAVE_VERSION'))
+    addpath([dynareroot '/missing/linsolve'])
+end
+
 % Add path to MEX files
 if exist('OCTAVE_VERSION')
     addpath([dynareroot '../mex/octave/']);
diff --git a/matlab/dyntable.m b/matlab/dyntable.m
index ded727537a2f67957d9d02bc6e8710b63d047a55..6df882c81779095e9bd470c0fd54b8f172bbf630 100644
--- a/matlab/dyntable.m
+++ b/matlab/dyntable.m
@@ -30,7 +30,7 @@ label_width = max(size(deblank(char(headers(1,:),labels)),2))+2;
 
 label_fmt = sprintf('%%-%ds',label_width);
 
-values_length = max(ceil(max(max(log10(abs(values))))),1)+val_precis+1;
+values_length = max(ceil(max(max(log10(abs(values(isfinite(values))))))),1)+val_precis+1;
 if any(values) < 0
     values_length = values_length+1;
 end
diff --git a/matlab/global_initialization.m b/matlab/global_initialization.m
index 8ef82d5b6350188198c59fc714946e906c4e3db2..fdb7af901301ed7ef7b935bc8eeabc958884aaec 100644
--- a/matlab/global_initialization.m
+++ b/matlab/global_initialization.m
@@ -521,8 +521,12 @@ options_.graph_save_formats.eps = 1;
 options_.graph_save_formats.pdf = 0;
 options_.graph_save_formats.fig = 0;
 
+% risky steady state
 options_.risky_steadystate = 0;
 
+% use GPU
+options_.gpu = 0;
+
 % initialize persistent variables in priordens()
 priordens([],[],[],[],[],[],1);
 % initialize persistent variables in dyn_first_order_solver()
diff --git a/matlab/missing/linsolve/linsolve.m b/matlab/missing/linsolve/linsolve.m
new file mode 100644
index 0000000000000000000000000000000000000000..2ae167bd4455b97020a3ae46ee3d113f5ea0e238
--- /dev/null
+++ b/matlab/missing/linsolve/linsolve.m
@@ -0,0 +1,33 @@
+function [x,c] = linsolve(A,B,opts)
+% (very imperfect) Clone of Matlab's linsolve.
+
+% Copyright (C) 2010-2011 Dynare Team
+%
+% This file is part of Dynare.
+%
+% Dynare is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation, either version 3 of the License, or
+% (at your option) any later version.
+%
+% Dynare is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+
+    c = [];
+    x = [];
+    if nargin == 3
+        if isfield(opts,'TRANSA')
+            A = A';
+        end
+    end
+    if nargout == 2
+        c = rcond(A);
+    end
+    
+    x = A\B;
+    
diff --git a/matlab/simult.m b/matlab/simult.m
index 315c89d403a12eb2b7683707c5d3c60c1b6116fe..31c594cf53171da1edc106cb5957ddc61e5b980f 100644
--- a/matlab/simult.m
+++ b/matlab/simult.m
@@ -78,7 +78,9 @@ chol_S = chol(DynareModel.Sigma_e(i_exo_var,i_exo_var));
 
 for i=1:replic
     if ~isempty(DynareModel.Sigma_e)
-        DynareResults.exo_simul(:,i_exo_var) = randn(DynareOptions.periods,nxs)*chol_S;
+        % we fill the shocks row wise to have the same values
+        % independently of the length of the simulation
+        DynareResults.exo_simul(:,i_exo_var) = randn(nxs,DynareOptions.periods)'*chol_S;
     end
     y_ = simult_(y0,dr,DynareResults.exo_simul,order);
     % elimninating initial value
diff --git a/preprocessor/ComputingTasks.cc b/preprocessor/ComputingTasks.cc
index d4ff1708c7c93ee2655d655523f4d030101d7f9c..a3b260fe28c1e63267eaad3d0d55ecfe9892cb53 100644
--- a/preprocessor/ComputingTasks.cc
+++ b/preprocessor/ComputingTasks.cc
@@ -95,6 +95,9 @@ void
 SimulStatement::checkPass(ModFileStructure &mod_file_struct, WarningConsolidation &warnings)
 {
   mod_file_struct.simul_present = true;
+
+  // The following is necessary to allow shocks+endval+simul in a loop
+  mod_file_struct.shocks_present_but_simul_not_yet = false;
 }
 
 void
diff --git a/preprocessor/NumericalInitialization.cc b/preprocessor/NumericalInitialization.cc
index 63d1d73ae0670c290335d4614528feabc98142b1..98327d66383d53cf03f3f821dbeebad8a2d69633 100644
--- a/preprocessor/NumericalInitialization.cc
+++ b/preprocessor/NumericalInitialization.cc
@@ -150,9 +150,9 @@ EndValStatement::EndValStatement(const init_values_t &init_values_arg,
 void
 EndValStatement::checkPass(ModFileStructure &mod_file_struct, WarningConsolidation &warnings)
 {
-  if (mod_file_struct.shocks_present)
+  if (mod_file_struct.shocks_present_but_simul_not_yet)
     {
-      cerr << "ERROR: Putting a \"shocks\" block before an \"endval\" block is not permitted. Please swap the two blocks. This limitation will be removed in the next major release of Dynare." << endl;
+      cerr << "ERROR: Putting a \"shocks\" block before an \"endval\" block is not permitted. Please swap the two blocks. This limitation will be removed in a future release of Dynare." << endl;
       exit(EXIT_FAILURE);
     }
 }
diff --git a/preprocessor/Shocks.cc b/preprocessor/Shocks.cc
index 949ff5f198966b6f633f71d4bad6aadc5d168268..3536053e186c502145b214b1ef687858ddf099e4 100644
--- a/preprocessor/Shocks.cc
+++ b/preprocessor/Shocks.cc
@@ -196,7 +196,7 @@ void
 ShocksStatement::checkPass(ModFileStructure &mod_file_struct, WarningConsolidation &warnings)
 {
   // Workaround for trac ticket #35
-  mod_file_struct.shocks_present = true;
+  mod_file_struct.shocks_present_but_simul_not_yet = true;
 
   // Determine if there is a calibrated measurement error
   for (var_and_std_shocks_t::const_iterator it = var_shocks.begin();
@@ -245,7 +245,7 @@ void
 MShocksStatement::checkPass(ModFileStructure &mod_file_struct, WarningConsolidation &warnings)
 {
   // Workaround for trac ticket #35
-  mod_file_struct.shocks_present = true;
+  mod_file_struct.shocks_present_but_simul_not_yet = true;
 }
 
 ConditionalForecastPathsStatement::ConditionalForecastPathsStatement(const AbstractShocksStatement::det_shocks_t &paths_arg, const SymbolTable &symbol_table_arg) :
diff --git a/preprocessor/Statement.cc b/preprocessor/Statement.cc
index 4c890baffc3b3381b1467b0786a663c085da7634..48abb3a984101f77f422605659606e3dcbf29e1a 100644
--- a/preprocessor/Statement.cc
+++ b/preprocessor/Statement.cc
@@ -37,7 +37,7 @@ ModFileStructure::ModFileStructure() :
   identification_present(false),
   estimation_analytic_derivation(false),
   partial_information(false),
-  shocks_present(false),
+  shocks_present_but_simul_not_yet(false),
   histval_present(false),
   k_order_solver(false),
   calibrated_measurement_errors(false),
diff --git a/preprocessor/Statement.hh b/preprocessor/Statement.hh
index 3847d3cccda706b00eeae9fc7c7f6bbe09b35994..1c5c00829994d27867f410876087c4b30a045944 100644
--- a/preprocessor/Statement.hh
+++ b/preprocessor/Statement.hh
@@ -69,9 +69,11 @@ public:
   bool estimation_analytic_derivation;
   //! Whether the option partial_information is given to stoch_simul/estimation/osr/ramsey_policy
   bool partial_information;
-  //! Whether a shocks or mshocks block is present
-  /*! Used for the workaround for trac ticket #35 */
-  bool shocks_present;
+  //! Whether a shocks or mshocks block has been parsed and no simul command yet run
+  /*! Used for the workaround for trac ticket #35. When a simul command is
+      seen, this flag is cleared in order to allow a sequence
+      shocks+endval+simul in a loop */
+  bool shocks_present_but_simul_not_yet;
   //! Whether a histval bloc is present
   /*! Used for the workaround for trac ticket #157 */
   bool histval_present;
diff --git a/tests/first_order/fs2000_cr.mod b/tests/first_order/fs2000_cr.mod
new file mode 100644
index 0000000000000000000000000000000000000000..a84eddadecea21c523ed81c1fb4df16b830a4712
--- /dev/null
+++ b/tests/first_order/fs2000_cr.mod
@@ -0,0 +1,73 @@
+// See fs2000.mod in the examples/ directory for details on the model
+
+@#define countries = 1:100
+
+var 
+@#for c in countries
+ m_@{c} P_@{c} c_@{c} e_@{c} W_@{c} R_@{c} k_@{c} d_@{c} n_@{c} l_@{c} gy_obs_@{c} gp_obs_@{c} y_@{c} dA_@{c}
+@#endfor
+;
+varexo e_a e_m;
+
+parameters alp bet gam mst rho psi del;
+
+alp = 0.33;
+bet = 0.99;
+gam = 0.003;
+mst = 1.011;
+rho = 0.7;
+psi = 0.787;
+del = 0.02;
+
+model;
+@#for c in countries
+dA_@{c} = exp(gam+e_a);
+log(m_@{c}) = (1-rho)*log(mst) + rho*log(m_@{c}(-1))+e_m;
+-P_@{c}/(c_@{c}(+1)*P_@{c}(+1)*m_@{c})+bet*P_@{c}(+1)*(alp*exp(-alp*(gam+log(e_@{c}(+1))))*k_@{c}^(alp-1)*n_@{c}(+1)^(1-alp)+(1-del)*exp(-(gam+log(e_@{c}(+1)))))/(c_@{c}(+2)*P_@{c}(+2)*m_@{c}(+1))=0;
+W_@{c} = l_@{c}/n_@{c};
+-(psi/(1-psi))*(c_@{c}*P_@{c}/(1-n_@{c}))+l_@{c}/n_@{c} = 0;
+R_@{c} = P_@{c}*(1-alp)*exp(-alp*(gam+e_a))*k_@{c}(-1)^alp*n_@{c}^(-alp)/W_@{c};
+1/(c_@{c}*P_@{c})-bet*P_@{c}*(1-alp)*exp(-alp*(gam+e_a))*k_@{c}(-1)^alp*n_@{c}^(1-alp)/(m_@{c}*l_@{c}*c_@{c}(+1)*P_@{c}(+1)) = 0;
+c_@{c}+k_@{c} = exp(-alp*(gam+e_a))*k_@{c}(-1)^alp*n_@{c}^(1-alp)+(1-del)*exp(-(gam+e_a))*k_@{c}(-1);
+P_@{c}*c_@{c} = m_@{c};
+m_@{c}-1+d_@{c} = l_@{c};
+e_@{c} = exp(e_a);
+y_@{c} = k_@{c}(-1)^alp*n_@{c}^(1-alp)*exp(-alp*(gam+e_a));
+gy_obs_@{c} = dA_@{c}*y_@{c}/y_@{c}(-1);
+gp_obs_@{c} = (P_@{c}/P_@{c}(-1))*m_@{c}(-1)/dA_@{c};
+@#endfor
+end;
+
+initval;
+@#for c in countries
+k_@{c} = 6;
+m_@{c} = mst;
+P_@{c} = 2.25;
+c_@{c} = 0.45;
+e_@{c} = 1;
+W_@{c} = 4;
+R_@{c} = 1.02;
+d_@{c} = 0.85;
+n_@{c} = 0.19;
+l_@{c} = 0.86;
+y_@{c} = 0.6;
+gy_obs_@{c} = exp(gam);
+gp_obs_@{c} = exp(-gam);
+dA_@{c} = exp(gam);
+@#endfor
+end;
+
+shocks;
+var e_a; stderr 0.014;
+var e_m; stderr 0.005;
+end;
+
+steady;
+
+tic;
+check;
+disp(toc);
+
+tic;
+stoch_simul(order=1,dr=cycle_reduction,irf=0,nomoments,noprint);
+disp(toc);