diff --git a/matlab/DsgeLikelihood.m b/matlab/DsgeLikelihood.m
index c4ba26e0cedc7ed00ff3c97f2c3e38e0ad4cd31c..c07dede8fda11a02252a7fc91cb46254f5f02a70 100644
--- a/matlab/DsgeLikelihood.m
+++ b/matlab/DsgeLikelihood.m
@@ -464,11 +464,15 @@ singularity_flag = 0;
 
 if ((kalman_algo==1) || (kalman_algo==3))% Multivariate Kalman Filter
     if no_missing_data_flag
-        LIK = kalman_filter(Y,diffuse_periods+1,size(Y,2), ...
-                            a,Pstar, ...
-                            kalman_tol, riccati_tol, ...
-                            DynareOptions.presample, ...
-                            T,Q,R,H,Z,mm,pp,rr,Zflag,diffuse_periods);
+        if options_.block == 1
+            LIK = block_kalman_filter(T,R,Q,H,Pstar,Y,start,mf,kalman_tol,riccati_tol, M_.nz_state_var, M_.n_diag); 
+        else
+            LIK = kalman_filter(Y,diffuse_periods+1,size(Y,2), ...
+                                a,Pstar, ...
+                                kalman_tol, riccati_tol, ...
+                                DynareOptions.presample, ...
+                                T,Q,R,H,Z,mm,pp,rr,Zflag,diffuse_periods);
+        end
         if analytic_derivation
             if no_DLIK==0
                 [DLIK] = score(T,R,Q,H,Pstar,Y,DT,DYss,DOm,DH,DP,start,mf,kalman_tol,riccati_tol);
@@ -540,4 +544,4 @@ fval    = (likelihood-lnprior);
 DynareOptions.kalman_algo = kalman_algo;
 
 % Update the penalty.
-penalty = fval;
\ No newline at end of file
+penalty = fval;
diff --git a/mex/sources/block_kalman_filter/block_kalman_filter.cc b/mex/sources/block_kalman_filter/block_kalman_filter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e7b9e690a23a1f7bfb253758a59e607bd24c0c2f
--- /dev/null
+++ b/mex/sources/block_kalman_filter/block_kalman_filter.cc
@@ -0,0 +1,884 @@
+/*
+ * Copyright (C) 2007-2011 Dynare Team
+ *
+ * This file is part of Dynare.
+ *
+ * Dynare is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Dynare is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <cstring>
+#include <cmath>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+using namespace std;
+//#define BLAS
+
+#define DIRECT
+
+#ifndef DEBUG_EX
+# include <dynmex.h>
+#else
+# include "mex_interface.hh"
+#endif
+
+#ifdef ATLAS
+# include <cblas.h>
+#else
+# ifdef MKL
+#  include <mkl_blas.h>
+   typedef ptrdiff_t blas_int;
+# else
+#  include <dynblas.h>
+# endif
+#endif
+
+#define BLOCK
+
+void
+mexDisp(mxArray* P)
+{
+  unsigned int n = mxGetN(P);
+  unsigned int m = mxGetM(P);
+  double *M = mxGetPr(P);
+  mexPrintf("%d x %d\n", m, n);
+  mexEvalString("drawnow;");
+  for (unsigned int i = 0; i < m; i++)
+    {
+      for (unsigned int j = 0; j < n; j++)
+        mexPrintf(" %9.4f",M[i+ j * m]);
+      mexPrintf("\n");
+    }
+}
+
+/*
+(T,R,Q,H,P,Y,start,mf,kalman_tol,riccati_tol, block)
+% Computes the likelihood of a stationnary state space model.
+%
+% INPUTS
+%    T                      [double]    n*n transition matrix of the state equation.
+%    R                      [double]    n*rr matrix, mapping structural innovations to state variables.
+%    Q                      [double]    rr*rr covariance matrix of the structural innovations.
+%    H                      [double]    pp*pp (or 1*1 =0 if no measurement error) covariance matrix of the measurement errors. 
+%    P                      [double]    n*n variance-covariance matrix with stationary variables
+%    Y                      [double]    pp*smpl matrix of (detrended) data, where pp is the maximum number of observed variables.
+%    start                  [integer]   scalar, likelihood evaluation starts at 'start'.
+%    mf                     [integer]   pp*1 vector of indices.
+%    kalman_tol             [double]    scalar, tolerance parameter (rcond).
+%    riccati_tol            [double]    scalar, tolerance parameter (riccati iteration).
+%
+% OUTPUTS
+%    LIK        [double]    scalar, MINUS loglikelihood
+%    lik        [double]    vector, density of observations in each period.
+%
+% REFERENCES
+%   See "Filtering and Smoothing of State Vector for Diffuse State Space
+%   Models", S.J. Koopman and J. Durbin (2003, in Journal of Time Series
+%   Analysis, vol. 24(1), pp. 85-98).
+%
+% NOTES
+%   The vector "lik" is used to evaluate the jacobian of the likelihood.
+
+% Copyright (C) 2004-2011 Dynare Team
+%
+% This file is part of Dynare.
+%
+% Dynare is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation, either version 3 of the License, or
+% (at your option) any later version.
+%
+% Dynare is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+%global options_
+smpl = size(Y,2);                               % Sample size.
+n   = size(T,2);                               % Number of state variables.
+pp   = size(Y,1);                               % Maximum number of observed variables.
+a    = zeros(n,1);                             % State vector.
+dF   = 1;                                       % det(F).
+QQ   = R*Q*transpose(R);                        % Variance of R times the vector of structural innovations.
+t    = 0;                                       % Initialization of the time index.
+lik  = zeros(smpl,1);                           % Initialization of the vector gathering the densities.
+LIK  = Inf;                                     % Default value of the log likelihood.
+oldK = Inf;
+notsteady   = 1;                                % Steady state flag.
+F_singular  = 1;
+if block
+    %nz_state_var = M_.nz_state_var;
+    while notsteady && t<smpl
+        t  = t+1;
+        v  = Y(:,t)-a(mf);
+        F  = P(mf,mf) + H;
+        if rcond(F) < kalman_tol
+            if ~all(abs(F(:))<kalman_tol)
+                return
+            else
+                a = T*a;
+                P = T*P*transpose(T)+QQ;
+            end
+        else
+            F_singular = 0;
+            dF     = det(F);
+            iF     = inv(F);
+            lik(t) = log(dF)+transpose(v)*iF*v;
+            K      = P(:,mf)*iF;
+            a      = T*(a+K*v);
+            P = block_pred_Vcov_KF(mf, P, K, T, QQ);
+            %P      = T*(P-K*P(mf,:))*transpose(T)+QQ;
+            notsteady = max(abs(K(:)-oldK)) > riccati_tol;
+            oldK = K(:);
+        end
+    end;
+else
+    while notsteady && t<smpl
+        t  = t+1;
+        v  = Y(:,t)-a(mf);
+        F  = P(mf,mf) + H;
+        if rcond(F) < kalman_tol
+            if ~all(abs(F(:))<kalman_tol)
+                return
+            else
+                a = T*a;
+                P = T*P*transpose(T)+QQ;
+            end
+        else
+            F_singular = 0;
+            dF     = det(F);
+            iF     = inv(F);
+            lik(t) = log(dF)+transpose(v)*iF*v;
+            K      = P(:,mf)*iF;
+            a      = T*(a+K*v);
+            P      = T*(P-K*P(mf,:))*transpose(T)+QQ;
+            notsteady = max(abs(K(:)-oldK)) > riccati_tol;
+            oldK = K(:);
+        
+        end
+    end
+end
+
+if F_singular
+    error('The variance of the forecast error remains singular until the end of the sample')
+end
+
+if t < smpl
+    t0 = t+1;
+    while t < smpl
+        t = t+1;
+        v = Y(:,t)-a(mf);
+        a = T*(a+K*v);
+        lik(t) = transpose(v)*iF*v;
+    end
+    lik(t0:smpl) = lik(t0:smpl) + log(dF);
+end    
+
+% adding log-likelihhod constants
+lik = (lik + pp*log(2*pi))/2;
+
+LIK = sum(lik(start:end)); % Minus the log-likelihood.*/
+
+
+
+#if defined(MATLAB_MEX_FILE) && defined(_WIN32)
+extern "C" int dgecon(const char *norm, const int *n, double *a, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info); 
+extern "C" int dgetrf(const int *m, const int *n, double *a, const int *lda, int *lpiv, int *info); 
+extern "C" double dlange(const char *norm, const int *m, const int *n, const double *a, const int *lda, double *work);
+extern "C" int dgetri(const int *n, double* a, const int *lda, const int *lpiv, double* work, const int *lwork, int *info);
+extern "C" void dgemm(const char *transa, const char *transb, const int *m, const int *n,
+                      const int *k, const double *alpha, const double *a, const int *lda,
+                      const double *b, const int *ldb, const double *beta,
+                      double *c, const int *ldc);
+extern "C" void dsymm(const char *side, const char *uplo, const int *m, const int *n,
+             const double *alpha, const double *a, const int *lda,
+             const double *b, const int *ldb, const double *beta,
+             double *c, const int *ldc);
+#else
+extern "C" int dgecon_(const char *norm, const int *n, double *a, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info); 
+extern "C" int dgetrf_(const int *m, const int *n, double *a, const int *lda, int *lpiv, int *info); 
+extern "C" double dlange_(const char *norm, const int *m, const int *n, const double *a, const int *lda, double *work);
+extern "C" int dgetri_(const int *n, double* a, const int *lda, const int *lpiv, double* work, const int *lwork, int *info);
+extern "C" void dgemm_(const char *transa, const char *transb, const int *m, const int *n,
+                      const int *k, const double *alpha, const double *a, const int *lda,
+                      const double *b, const int *ldb, const double *beta,
+                      double *c, const int *ldc);
+extern "C" void dsymm_(const char *side, const char *uplo, const int *m, const int *n,
+             const double *alpha, const double *a, const int *lda,
+             const double *b, const int *ldb, const double *beta,
+             double *c, const int *ldc);
+#endif
+bool
+not_all_abs_F_bellow_crit(double* F, int size, double crit)
+{
+   int i = 0;
+   while (i < size && abs(F[i])<crit)
+     {
+       i++;
+     }
+   if (i < size)
+     return false;
+   else
+     return true;
+}
+
+
+double
+det(double* F, int dim)
+{
+  double det = 1.0;
+  for (int i = 0; i < dim; i++)
+    det *= F[i * (1 + dim)];
+  return det;
+}
+
+void
+mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+  if (nlhs > 2)
+    DYN_MEX_FUNC_ERR_MSG_TXT("kalman_filter provides at most 2 output argument.");
+  if (nrhs != 12)
+    DYN_MEX_FUNC_ERR_MSG_TXT("kalman_filter requires exactly 12 input arguments.");
+//(T,R,Q,H,P,Y,start,mf,kalman_tol,riccati_tol, block)
+  mxArray *pT = mxDuplicateArray(prhs[0]);
+  mxArray *pR = mxDuplicateArray(prhs[1]);
+  mxArray *pQ = mxDuplicateArray(prhs[2]);
+  mxArray *pH = mxDuplicateArray(prhs[3]);
+  mxArray *pP = mxDuplicateArray(prhs[4]);
+  mxArray *pY = mxDuplicateArray(prhs[5]);
+  
+  double *T = mxGetPr(pT);
+  double *R = mxGetPr(pR);
+  double *Q = mxGetPr(pQ);
+  double *H = mxGetPr(pH);
+  double *P = mxGetPr(pP);
+  double *Y = mxGetPr(pY);
+  int start = mxGetScalar(prhs[6]);
+  double* mfd = (double*)mxGetData(prhs[7]);
+  
+    
+  double kalman_tol = mxGetScalar(prhs[8]);
+  double riccati_tol = mxGetScalar(prhs[9]);
+  
+  /* Reading the sparse structure of matrix in (§A) */
+  typedef struct 
+  {
+      int indx_1;
+      int indx_2;
+      int indx_3;
+  } t_Var;
+  /*mxArray *M_;
+  M_ = mexGetVariable("global", "M_");
+  if (M_ == NULL)
+    mexErrMsgTxt(" in main, global variable not found: M_\n");*/
+
+  //mexEvalString("drawnow;");
+  /*Defining the initials values*/
+  int smpl = mxGetN(pY);                                 // Sample size.          ;
+  int n   = mxGetN(pT);                                 // Number of state variables.
+  int pp   = mxGetM(pY);                                 // Maximum number of observed variables.
+  int n_state = n - pp;
+
+
+#ifdef DIRECT  
+  /*mxArray* nze = mxGetFieldByNumber(M_, 0, mxGetFieldNumber(M_, "nz_state_var"));
+  double* nz_state_var = (double*)mxGetData(nze);*/
+  double* nz_state_var = (double*)mxGetData(prhs[10]);
+  int *i_nz_state_var = (int*)mxMalloc(n*sizeof(int));
+  for (int i = 0; i < n; i++)
+    {
+      i_nz_state_var[i] = nz_state_var[i];
+      //mexPrintf("i_nz_state_var[%d]=%d\n",i,i_nz_state_var[i]  );
+    }
+  /*mxArray* nn_diag = mxGetFieldByNumber(M_, 0, mxGetFieldNumber(M_, "n_diag"));
+  int n_diag = mxGetScalar(nn_diag);*/
+  int n_diag = mxGetScalar(prhs[11]);
+#else
+  mxArray *mxa = mxGetFieldByNumber(M_, 0, mxGetFieldNumber(M_, "fname"));
+  int buflen = mxGetM(mxa) * mxGetN(mxa) + 1;
+  char *fname;
+  fname = (char *) mxCalloc(buflen+1, sizeof(char));
+  int status = mxGetString(mxa, fname, buflen);
+  fname[buflen] = ' ';
+  if (status != 0)
+    mexWarnMsgTxt("Not enough space. Filename is truncated.");
+  string file_name = fname;
+  file_name.append(".kfi");
+  /*mexPrintf("file_name=%s\n", file_name.c_str());
+  mexEvalString("drawnow;");*/
+  
+  ifstream KF_index_file;
+  KF_index_file.open(file_name.c_str(), ios::in | ios::binary);
+  int n_diag;
+  KF_index_file.read(reinterpret_cast<char *>(&n_diag), sizeof(n_diag));
+  /*mexPrintf("n_diag=%d\n",n_diag);*/
+  
+  int size_index_KF;
+  KF_index_file.read(reinterpret_cast<char *>(&size_index_KF), sizeof(size_index_KF));
+  //mexPrintf("size_index_KF=%d\n", size_index_KF);
+  t_Var *Var = (t_Var*)mxMalloc(size_index_KF * sizeof(t_Var));
+  KF_index_file.read(reinterpret_cast<char *>(Var), size_index_KF * sizeof(t_Var));
+  
+  int size_index_KF_2;
+  KF_index_file.read(reinterpret_cast<char *>(&size_index_KF_2), sizeof(size_index_KF_2));
+  //mexPrintf("size_index_KF_2=%d\n", size_index_KF_2);
+  t_Var *Var_2 = (t_Var*)mxMalloc(size_index_KF_2 * sizeof(t_Var));
+  KF_index_file.read(reinterpret_cast<char *>(Var_2), size_index_KF_2 * sizeof(t_Var));
+  KF_index_file.close();
+#endif
+
+  mxArray* pa = mxCreateDoubleMatrix(n, 1, mxREAL);         // State vector.
+  double* a = mxGetPr(pa);
+  double* tmp_a = (double*)mxMalloc(n * sizeof(double));
+  double dF = 0.0;                                            // det(F).
+  mxArray* p_tmp = mxCreateDoubleMatrix(n, n_state, mxREAL);
+  double *tmp = mxGetPr(p_tmp);
+  mxArray* p_tmp1 = mxCreateDoubleMatrix(n, pp, mxREAL);
+  double *tmp1 = mxGetPr(p_tmp1);
+  int t    = 0;                                               // Initialization of the time index.
+  mxArray* plik  = mxCreateDoubleMatrix(smpl, 1, mxREAL);
+  double* lik = mxGetPr(plik);
+  double Inf =  mxGetInf();                                   
+  double LIK  = 0.0;                                          // Default value of the log likelihood.
+
+  bool notsteady   = true;                                 // Steady state flag.
+  bool F_singular  = true;
+  double* v_pp = (double*)mxMalloc(pp * sizeof(double));
+  double* v_n = (double*)mxMalloc(n * sizeof(double));
+  int* mf = (int*)mxMalloc(pp * sizeof(int));
+  for (int i = 0; i < pp; i++)
+    mf[i] = mfd[i] - 1;
+  double pi = atan2((double)0.0,(double)-1.0);
+  
+  /*compute QQ = R*Q*transpose(R)*/                        // Variance of R times the vector of structural innovations.;
+  // tmp = R * Q;
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < pp; j++)
+      {
+        double res = 0.0;
+        for (int k = 0; k < pp; k++)
+          res += R[i + k * n] * Q[j * pp + k];
+        tmp1[i + j * n] = res;
+      }
+
+  // QQ = tmp * transpose(R)
+  mxArray* pQQ = mxCreateDoubleMatrix(n, n, mxREAL);
+  double* QQ = mxGetPr(pQQ);
+  for (int i = 0; i < n; i++)
+    for (int j = i; j < n; j++)
+      {
+        double res = 0.0;
+        for (int k = 0; k < pp; k++)
+          res += tmp1[i + k * n] * R[k * n + j];
+        QQ[i + j * n] = QQ[j + i * n] = res;
+      }
+  mxDestroyArray(p_tmp1);
+
+  mxArray* pv = mxCreateDoubleMatrix(pp, 1, mxREAL);
+  double* v = mxGetPr(pv);
+  mxArray* pF =  mxCreateDoubleMatrix(pp, pp, mxREAL);
+  double* F = mxGetPr(pF);
+  mxArray* piF =  mxCreateDoubleMatrix(pp, pp, mxREAL);
+  double* iF = mxGetPr(piF);
+  int lw = pp * 4;
+  double* w = (double*)mxMalloc(lw * sizeof(double));
+  int* iw = (int*)mxMalloc(pp * sizeof(int));
+  int* ipiv = (int*)mxMalloc(pp * sizeof(int));
+  int info = 0;
+  double anorm, rcond;
+  #ifdef BLAS
+  mxArray* p_P_t_t1 = mxCreateDoubleMatrix(n, n, mxREAL);
+  #else
+  mxArray* p_P_t_t1 = mxCreateDoubleMatrix(n_state, n_state, mxREAL);
+  #endif
+  double* P_t_t1 = mxGetPr(p_P_t_t1);
+  mxArray* pK = mxCreateDoubleMatrix(n, pp, mxREAL);
+  double* K = mxGetPr(pK);
+  #ifdef BLAS
+  mxArray* p_K_P = mxCreateDoubleMatrix(n, n, mxREAL);
+  #else
+  mxArray* p_K_P = mxCreateDoubleMatrix(n_state, n_state, mxREAL);
+  #endif
+  double* K_P = mxGetPr(p_K_P);
+  double* oldK  = (double*)mxMalloc(n * pp * sizeof(double));
+  double* P_mf = (double*)mxMalloc(n * pp * sizeof(double));
+  for (int i = 0; i < pp; i++)
+    oldK[i] = Inf;
+  
+  while (notsteady && t < smpl)
+    {
+      //v = Y(:,t) - a(mf)
+      for (int i = 0; i < pp; i++)
+        v[i]  = Y[i + t * pp] - a[mf[i]];
+      
+      //F  = P(mf,mf) + H;
+      for (int i = 0; i < pp; i++)
+        for (int j = 0; j < pp; j++)
+          iF[i + j * pp] = F[i + j * pp] = P[mf[i] + mf[j] * n] + H[i + j * pp];
+      
+      /* Computes the norm of x */ 
+#if defined(MATLAB_MEX_FILE) && defined(_WIN32)
+      double anorm = dlange("1", &pp, &pp, iF, &pp, w); 
+#else
+      double anorm = dlange_("1", &pp, &pp, iF, &pp, w); 
+#endif
+
+      /* Modifies F in place with a LU decomposition */ 
+#if defined(MATLAB_MEX_FILE) && defined(_WIN32)
+      dgetrf(&pp, &pp, iF, &pp, ipiv, &info); 
+#else
+      dgetrf_(&pp, &pp, iF, &pp, ipiv, &info); 
+#endif
+      if (info != 0) fprintf(stderr, "failure with error %d\n", info); 
+ 
+      /* Computes the reciprocal norm */ 
+#if defined(MATLAB_MEX_FILE) && defined(_WIN32)
+      dgecon("1", &pp, iF, &pp, &anorm, &rcond, w, iw, &info); 
+#else
+      dgecon_("1", &pp, iF, &pp, &anorm, &rcond, w, iw, &info); 
+#endif
+      
+      if (rcond < kalman_tol)
+        if (not_all_abs_F_bellow_crit(F, pp * pp, kalman_tol))   //~all(abs(F(:))<kalman_tol)
+          {
+            mexPrintf("error: F singular\n");
+            double LIK  = Inf;
+            if (nlhs >= 1)
+              {
+                plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
+                double* pind = mxGetPr(plhs[0]);
+                pind[0] = LIK;
+              }
+            if (nlhs == 2)
+              {
+                for (int i = t; i < smpl; i++)
+                  lik[i] = Inf;
+                plhs[1] = plik;
+              }
+            return;
+          }
+        else
+          {
+            mexPrintf("F singular\n");
+            //a = T*a;
+            for (int i = 0; i < n; i++)
+              {
+                double res = 0.0;
+                for (int j = pp; j < n; j++)
+                  res += T[i + j *n] * a[j];
+                tmp_a[i] = res;
+              }
+           memcpy(a, tmp_a, n * sizeof(double));
+
+           //P = T*P*transpose(T)+QQ;
+           memset(tmp, 0, n * n_state * sizeof(double));
+#ifdef DIRECT
+           for (int i = 0; i < n; i++)
+            for (int j = pp; j < n; j++)
+              {
+                int j1 = j - pp;
+                int j1_n_state = j1 * n_state - pp ;
+                //if ((i < pp) || (i >= n_diag + pp) || (j1 >= n_diag))
+                  for (int k = pp; k < i_nz_state_var[i]; k++)
+                    {
+                      tmp[i + j1 * n ] += T[i + k * n] * P[k + j1_n_state];
+                    }
+              }
+          /*for (int i = pp; i < pp + n_diag; i++)
+             for (int j = pp; j < pp + n_diag; j++)
+               tmp[i + (j - pp) * n] = T[i + i * n] * P_t_t1[j - pp + (i - pp) * n_state];*/
+          memset(P, 0, n * n * sizeof(double));
+          int n_n_obs = n * pp;
+          for (int i = 0; i < n; i++)
+            for (int j = i; j < n; j++)
+              {
+                //if ((i < pp) || (i >= n_diag + pp) || (j < pp) || (j >= n_diag + pp))
+                  for (int k = pp; k < i_nz_state_var[j]; k++)
+                    {
+                      int k_n = k * n;
+                      P[i * n + j] += tmp[i + k_n - n_n_obs] * T[j + k_n];
+                    }
+              }
+#else
+           #pragma omp parallel for shared(T, P_t_t1, tmp, Var) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+           //for (int i = 0; i < n; i++)
+             for (int j = 0; j < size_index_KF; j++)
+               {
+                 t_Var *s_Var = &Var[j];
+                 tmp[s_Var->indx_1 ] += T[s_Var->indx_2] * P[s_Var->indx_3];
+               }
+           for (int i = pp; i < pp + n_diag; i++)
+             for (int j = pp; j < pp + n_diag; j++)
+               tmp[i + (j - pp) * n] = T[i + i * n] * P[j + i * n];
+           memset(P, 0, n * n * sizeof(double));
+           #pragma omp parallel for shared(T, tmp, P, Var_2) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+           //for (int i = 0; i < n; i++)
+             for (int j = 0; j < size_index_KF_2; j++)
+               {
+                 t_Var *s_Var = &Var_2[j];
+                 P[s_Var->indx_1 /*+ i*/] += tmp[s_Var->indx_2 /*+ i*/] * T[s_Var->indx_3];
+               }
+#endif
+          /*for (int i = pp; i < pp + n_diag; i++)
+            for (int j = i; j < pp + n_diag; j++)
+              P[j + i * n] = T[i + i * n] * T[j + j * n] * P[j + i * n];*/
+              //P[j + i * n] = tmp[i +]
+          
+          for ( int i = 0; i < n; i++)
+            {
+              for ( int j = i ; j < n; j++)
+                P[j + i * n] += QQ[j + i * n];
+              for ( int j = i + 1; j < n; j++)
+                P[i + j * n] = P[j + i * n];
+            }
+         }
+      else
+        {
+          F_singular = false;
+          
+          //dF     = det(F);
+          dF     = abs(det(iF, pp));
+
+          //iF     = inv(F);
+          //int lwork = 4/*2*/* pp;
+#if defined(MATLAB_MEX_FILE) && defined(_WIN32)
+          dgetri(&pp, iF, &pp, ipiv, w, &lw, &info);
+#else          
+          dgetri_(&pp, iF, &pp, ipiv, w, &lw, &info);
+#endif
+
+          //lik(t) = log(dF)+transpose(v)*iF*v;
+          for (int i = 0; i < pp; i++)
+            {
+              double res = 0.0;
+              for (int j = 0; j < pp; j++)
+                res += v[j] * iF[j  * pp + i];
+              v_pp[i] = res;
+            }
+          double res = 0.0;
+          for (int i = 0; i < pp; i++)
+            res += v_pp[i] * v[i];
+
+          /*lik[t] = log(dF) + res;*/
+          lik[t] = (log(dF) + res + pp * log(2.0*pi))/2;
+          if (t + 1 >= start)
+            LIK += lik[t];
+
+          //K      = P(:,mf)*iF;
+          for (int i = 0; i < n; i++)
+            for (int j = 0; j < pp; j++)
+              P_mf[i + j * n] = P[i + mf[j] * n];
+          for (int i = 0; i < n; i++)
+            for (int j = 0; j < pp; j++)
+              {
+                double res = 0.0;
+                int j_pp = j * pp;
+                for (int k = 0; k < pp; k++)
+                  res += P_mf[i + k * n] * iF[j_pp + k];
+                K[i + j * n] = res;
+              }
+
+          //a      = T*(a+K*v);
+          for (int i = pp; i < n; i++)
+            {
+              double res = 0.0;
+              for (int j = 0; j < pp; j++)
+                res += K[j  * n + i] * v[j];
+              v_n[i] = res + a[i];
+            }
+          
+          for (int i = 0; i < n; i++)
+            {
+              double res = 0.0;
+              for (int j = pp; j < n; j++)
+                res += T[j  * n + i] * v_n[j];
+              a[i] = res;
+            }
+
+          //P      = T*(P-K*P(mf,:))*transpose(T)+QQ;
+          for (int i = 0; i < pp; i++)
+            for (int j = pp; j < n; j++)
+              P_mf[i + j * pp] = P[mf[i] + j * n];
+#ifdef BLAS
+          #pragma omp parallel for shared(K, P, K_P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+          for (int i = 0; i < n; i++)
+            for (int j = i; j < n; j++)
+              {
+                double res = 0.0;
+                //int j_pp = j * pp;
+                for (int k = 0; k < pp; k++)
+                  res += K[i + k * n] * P_mf[k + j * pp];
+                K_P[i * n + j] = K_P[j * n + i] = res;
+              }
+          //#pragma omp parallel for shared(P, K_P, P_t_t1)
+          for (int i = pp; i < n; i++)
+            for (int j = i; j < n; j++)
+              {
+                unsigned int k = i * n + j;
+                P_t_t1[j * n + i] = P_t_t1[k] = P[k] - K_P[k];
+              }
+          double one  = 1.0;
+          double zero = 0.0;
+          memcpy(P, QQ, n * n *sizeof(double));
+          dsymm("R", "U", &n, &n,
+                &one, P_t_t1, &n,
+                T, &n, &zero,
+                tmp, &n);
+          dgemm("N", "T", &n, &n,
+                &n, &one, tmp, &n,
+                T, &n, &one,
+                P, &n);
+          mexPrintf("P\n");
+          mexDisp(pP);
+#else
+          //#pragma omp parallel for shared(K, P, K_P) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+          
+          for (int i = pp; i < n; i++)
+            {
+              unsigned int i1 = i - pp;
+              for (int j = i ; j < n; j++)
+                {
+                  unsigned int j1 = j - pp;
+                  double res = 0.0;
+                  int j_pp = j * pp;
+                  for (int k = 0; k < pp; k++)
+                    res += K[i + k * n] * P_mf[k + j_pp];
+                  K_P[i1 * n_state + j1] = K_P[j1 * n_state + i1] = res;
+                }
+            }
+          /*for (int j = pp; j < n; j++)
+            {
+              unsigned int j1 = j - pp;
+              double res = P[mf[j] + j * n]
+              for (int i = 0; i < n; i++)
+                {
+                  unsigned int i1 = i - pp;
+                  K_P[i1 * n_state + j1] = res * K[i + j * n];
+                }
+              for (int i = pp; i < n; i++)
+                {
+                  unsigned int i1 = i - pp;
+                  double res = 0.0;
+                  for (int k = 0; k < pp; k++)
+                    res += K[i + k * n] * P[mf[k] + j * n];
+                  K_P[i1 * n_state + j1] = K_P[j1 * n_state + i1] = res;
+                }
+            }*/
+            
+          //#pragma omp parallel for shared(P, K_P, P_t_t1)
+          for (int i = pp; i < n; i++)
+            {
+              unsigned int i1 = i - pp;
+              for (int j = i; j < n; j++)
+                {
+                  unsigned int j1 = j - pp;
+                  unsigned int k1 = i1 * n_state + j1;
+                  P_t_t1[j1 * n_state + i1] = P_t_t1[k1] = P[i * n + j] - K_P[k1];
+                }
+            }
+          
+          memset(tmp, 0, n * n_state * sizeof(double));
+#ifdef DIRECT
+          for (int i = 0; i < n; i++)
+            {
+              int max_k = i_nz_state_var[i];
+              for (int j = pp; j < n; j++)
+                {
+                  int j1 = j - pp;
+                  int j1_n_state = j1 * n_state - pp ;
+                  int indx_tmp = i + j1 * n ;
+                  //if ((i < pp) || (i >= n_diag + pp) || (j1 >= n_diag))
+                  for (int k = pp; k < max_k; k++)
+                    tmp[indx_tmp] += T[i + k * n] * P_t_t1[k + j1_n_state];
+                }
+            }
+          /*for (int i = pp; i < pp + n_diag; i++)
+             for (int j = pp; j < pp + n_diag; j++)
+               tmp[i + (j - pp) * n] = T[i + i * n] * P_t_t1[j - pp + (i - pp) * n_state];*/
+          memset(P, 0, n * n * sizeof(double));
+          
+          for (int i = 0; i < n; i++)
+            {
+              int n_n_obs = - n * pp ;
+              for (int j = i; j < n; j++)
+                {
+                  int max_k = i_nz_state_var[j];
+                  int P_indx = i * n + j;
+                  //if ((i < pp) || (i >= n_diag + pp) || (j < pp) || (j >= n_diag + pp))
+                  for (int k = pp; k < max_k; k++)
+                    {
+                      int k_n = k * n;
+                      P[P_indx] += tmp[i + k_n + n_n_obs] * T[j + k_n];
+                    }
+                }
+            }
+#else
+          //mexPrintf("t=%d, OK0\n",t);  
+          //#pragma omp parallel for shared(T, P_t_t1, tmp, Var) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+           //for (int i = 0; i < n; i++)
+             for (int j = 0; j < size_index_KF; j++)
+               {
+                 t_Var *s_Var = &Var[j];
+                 tmp[s_Var->indx_1 ] += T[s_Var->indx_2 ] * P_t_t1[s_Var->indx_3];
+               }
+           for (int i = pp; i < pp + n_diag; i++)
+             for (int j = pp; j < pp + n_diag; j++)
+               tmp[i + (j - pp) * n] = T[i + i * n] * P_t_t1[j - pp + (i - pp) * n_state];
+           memset(P, 0, n * n * sizeof(double));
+           //#pragma omp parallel for shared(T, tmp, P, Var_2) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+           //for (int i = 0; i < n; i++)
+             for (int j = 0; j < size_index_KF_2; j++)
+               {
+                 t_Var *s_Var = &Var_2[j];
+                 P[s_Var->indx_1 /*+ i*/] += tmp[s_Var->indx_2 /*+ i*/] * T[s_Var->indx_3];
+               }
+#endif
+          /*#pragma omp parallel for shared(T, P_t_t1, tmp, Var) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+          for (int i = 0; i < size_index_KF; i++)
+            {
+              t_Var *s_Var = &Var[i];
+              tmp[s_Var->indx_1] += T[s_Var->indx_2] * P_t_t1[s_Var->indx_3];
+            }
+          for (int i = pp; i < pp + n_diag; i++)
+            for (int j = pp; j < pp + n_diag; j++)
+              tmp[i + (j - pp) * n] = T[i + i * n] * P_t_t1[j - pp + (i - pp) * n_state];
+          memset(P, 0, n * n * sizeof(double));
+          #pragma omp parallel for shared(T, tmp, P, Var_2) num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
+          for (int i = 0; i < size_index_KF_2; i++)
+            {
+              t_Var *s_Var = &Var_2[i];
+              P[s_Var->indx_1] += tmp[s_Var->indx_2] * T[s_Var->indx_3];
+            }
+          */
+          /*for (int i = pp; i < pp + n_diag; i++)
+            for (int j = i; j < pp + n_diag; j++)
+              P[j + i * n] = T[i + i * n] * T[j + j * n] * P_t_t1[j - pp + (i - pp) * n_state];*/
+              //P[j + i * n] = tmp[i +]
+          
+          for ( int i = 0; i < n; i++)
+            {
+              for ( int j = i ; j < n; j++)
+                P[j + i * n] += QQ[j + i * n];
+              for ( int j = i + 1; j < n; j++)
+                P[i + j * n] = P[j + i * n];
+            }
+          //mexPrintf("      OK1\n");
+          /*mexPrintf("P\n");
+          mexDisp(pP);*/
+
+#endif
+          //notsteady = max(abs(K(:)-oldK)) > riccati_tol;
+          double max_abs = 0.0;
+          for (int i = 0; i < n * pp; i++)
+            {
+              double res = abs(K[i] - oldK[i]);
+              if (res > max_abs)
+                max_abs = res;
+            }
+          notsteady = max_abs > riccati_tol;
+
+          //oldK = K(:);
+          memcpy(oldK, K, n * pp * sizeof(double));
+        }
+      t++;
+    }
+
+  if (F_singular)
+    mexErrMsgTxt("The variance of the forecast error remains singular until the end of the sample\n");
+  
+
+  if (t+1 < smpl)
+    {
+      while (t < smpl)
+        {
+          //v = Y(:,t)-a(mf);
+          for (int i = 0; i < pp; i++)
+            v[i]  = Y[i + t * pp] - a[mf[i]];
+          
+          //a = T*(a+K*v);
+          for (int i = pp; i < n; i++)
+            {
+              double res = 0.0;
+              for (int j = 0; j < pp; j++)
+                res += K[j  * n + i] * v[j];
+              v_n[i] = res + a[i];
+            }
+          for (int i = 0; i < n; i++)
+            {
+              double res = 0.0;
+              for (int j = pp; j < n; j++)
+                res += T[j  * n + i] * v_n[j];
+              a[i] = res;
+            }
+        
+          //lik(t) = transpose(v)*iF*v;
+          for (int i = 0; i < pp; i++)
+            {
+              double res = 0.0;
+              for (int j = 0; j < pp; j++)
+                res += v[j] * iF[j  * n + i];
+              v_pp[i] = res;
+            }
+          double res = 0.0;
+          for (int i = 0; i < pp; i++)
+            res += v_pp[i] * v[i];
+
+          lik[t] = (log(dF) + res + pp * log(2.0*pi))/2;
+          if (t + 1 > start)
+            LIK += lik[t];
+
+          t++;
+        }
+    }
+    
+  if (nlhs >= 1)
+    {
+      plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
+      double* pind = mxGetPr(plhs[0]);
+      pind[0] = LIK;
+    }
+
+  if (nlhs == 2)
+    plhs[1] = plik;
+  mxFree(w);
+#ifdef DIRECT
+  /*mxDestroyArray(nze);
+  mxDestroyArray(nn_diag);*/
+  mxFree(i_nz_state_var);
+#else
+  mxFree(Var);
+  mxFree(Var_2);
+#endif
+  mxFree(tmp_a);
+  mxFree(v_pp);
+  mxFree(v_n);
+  mxFree(mf);
+  mxFree(w);
+  mxFree(iw);
+  mxFree(ipiv);
+  mxFree(oldK);
+  mxFree(P_mf);
+  mxDestroyArray(pa);
+  mxDestroyArray(p_tmp);
+  mxDestroyArray(pQQ);
+  mxDestroyArray(pv);
+  mxDestroyArray(pF);
+  mxDestroyArray(piF);
+  mxDestroyArray(p_P_t_t1);
+  mxDestroyArray(pK);
+  mxDestroyArray(p_K_P);
+}
+
diff --git a/preprocessor/DynamicModel.cc b/preprocessor/DynamicModel.cc
index 76b5988f97cd46667cf65d3bbc566987dca7a37a..eccc4e9c5aa38f09e4f84c93ee7c847ce7ff2041 100644
--- a/preprocessor/DynamicModel.cc
+++ b/preprocessor/DynamicModel.cc
@@ -2253,7 +2253,7 @@ DynamicModel::writeDynamicModel(ostream &DynamicOutput, bool use_dll) const
 }
 
 void
-DynamicModel::writeOutput(ostream &output, const string &basename, bool block_decomposition, bool byte_code, bool use_dll, int order) const
+DynamicModel::writeOutput(ostream &output, const string &basename, bool block_decomposition, bool byte_code, bool use_dll, int order, bool estimation_present) const
 {
   /* Writing initialisation for M_.lead_lag_incidence matrix
      M_.lead_lag_incidence is a matrix with as many columns as there are
@@ -2264,7 +2264,7 @@ DynamicModel::writeOutput(ostream &output, const string &basename, bool block_de
      model at a given period.
   */
 
-  vector<int> state_var;
+  vector<int> state_var, state_equ;
   output << "M_.lead_lag_incidence = [";
   // Loop on endogenous variables
   int nstatic = 0, 
@@ -2558,6 +2558,14 @@ DynamicModel::writeOutput(ostream &output, const string &basename, bool block_de
       for (int i = 0; i < nb_endo; i++)
         output << " " << equation_reordered[i]+1;
       output << "];\n";
+      vector<int> variable_inv_reordered(nb_endo);
+
+      for (int i = 0; i< nb_endo; i++)
+        variable_inv_reordered[variable_reordered[i]] = i;
+ 
+      for (vector<int>::const_iterator it=state_var.begin(); it != state_var.end(); it++)
+        state_equ.push_back(equation_reordered[variable_inv_reordered[*it - 1]]+1);
+
       map<pair< int, pair<int, int> >,  int>  lag_row_incidence;
       for (first_derivatives_t::const_iterator it = first_derivatives.begin();
            it != first_derivatives.end(); it++)
@@ -2586,6 +2594,235 @@ DynamicModel::writeOutput(ostream &output, const string &basename, bool block_de
           output << it->first.second.first+1 << " " << it->first.second.second+1 << ";\n";
         }
       output << "];\n";
+      if (estimation_present)
+        {
+          ofstream KF_index_file;
+          string main_name = basename;
+          main_name += ".kfi";
+          KF_index_file.open(main_name.c_str(), ios::out | ios::binary | ios::ate);
+          int n_obs = symbol_table.observedVariablesNbr();
+          int n_state = state_var.size();
+          int n = n_obs + n_state;
+          int nb_diag = 0;
+          //map<pair<int,int>, int>::const_iterator  row_state_var_incidence_it = row_state_var_incidence.begin();
+          vector<int> i_nz_state_var(n);
+          unsigned int lp = symbol_table.observedVariablesNbr();
+          for (int i = 0; i < n_obs; i++)
+            i_nz_state_var[i] = n;
+          
+          for (int block = 0;  block < nb_blocks; block++)
+            {
+              int block_size = getBlockSize(block);
+              int nze = 0;
+              
+              for (int i = 0; i < block_size; i++)
+                {
+                  int var = getBlockVariableID(block, i);
+                  vector<int>::const_iterator it_state_var = find(state_var.begin(), state_var.end(), var+1);
+                  if (it_state_var != state_var.end())
+                    nze++;
+                }
+              if (block == 0)
+                {
+                  set<pair<int, int> > row_state_var_incidence;
+                  for (block_derivatives_equation_variable_laglead_nodeid_t::const_iterator it = blocks_derivatives[block].begin(); it != (blocks_derivatives[block]).end(); it++)
+                    {
+                      vector<int>::const_iterator it_state_var = find(state_var.begin(), state_var.end(), getBlockVariableID(block, it->first.second)+1);
+                      if (it_state_var != state_var.end())
+                        {
+                          vector<int>::const_iterator it_state_equ = find(state_equ.begin(), state_equ.end(), getBlockEquationID(block, it->first.first)+1);
+                          if (it_state_equ != state_equ.end())
+                            row_state_var_incidence.insert(make_pair(it_state_equ - state_equ.begin(), it_state_var - state_var.begin()));
+                        }  
+                      
+                      
+                    }
+                  /*tmp_block_endo_derivative[make_pair(it->second.first, make_pair(it->first.second, it->first.first))] = it->second.second;
+                  if (block == 0)
+                        {
+                          
+                          vector<int>::const_iterator it_state_equ = find(state_equ.begin(), state_equ.end(), getBlockEquationID(block, i)+1);
+                          if (it_state_equ != state_equ.end())
+                            {
+                              cout << "row_state_var_incidence[make_pair([" << *it_state_equ << "] " << it_state_equ - state_equ.begin() << ", [" << *it_state_var << "] " << it_state_var - state_var.begin() << ")] =  1;\n";
+                              row_state_var_incidence.insert(make_pair(it_state_equ - state_equ.begin(), it_state_var - state_var.begin()));
+                            }
+                        }*/
+                  set<pair<int,int> >::const_iterator  row_state_var_incidence_it = row_state_var_incidence.begin();
+                  bool diag = true;
+                  int nb_diag_r = 0;
+                  while (row_state_var_incidence_it != row_state_var_incidence.end() && diag)
+                    {
+                      diag = (row_state_var_incidence_it->first == row_state_var_incidence_it->second);
+                      if (diag)
+                        {
+                          int equ = row_state_var_incidence_it->first;
+                          row_state_var_incidence_it++;
+                          if (equ != row_state_var_incidence_it->first)
+                            nb_diag_r++;
+                        }
+                        
+                    }
+                  set<pair<int,int> >  col_state_var_incidence;
+                  for(set<pair<int,int> >::const_iterator row_state_var_incidence_it = row_state_var_incidence.begin();row_state_var_incidence_it != row_state_var_incidence.end(); row_state_var_incidence_it++)
+                    col_state_var_incidence.insert(make_pair(row_state_var_incidence_it->second, row_state_var_incidence_it->first));
+                  set<pair<int,int> >::const_iterator  col_state_var_incidence_it = col_state_var_incidence.begin();
+                  diag = true;
+                  int nb_diag_c = 0;
+                  while (col_state_var_incidence_it != col_state_var_incidence.end() && diag)
+                    {
+                      diag = (col_state_var_incidence_it->first == col_state_var_incidence_it->second);
+                      if (diag)
+                        {
+                          int var = col_state_var_incidence_it->first;
+                          col_state_var_incidence_it++;
+                          if (var != col_state_var_incidence_it->first)
+                            nb_diag_c++;
+                        }
+                    }
+                  nb_diag = min( nb_diag_r, nb_diag_c);
+                  row_state_var_incidence.clear();
+                  col_state_var_incidence.clear();
+                }
+              for (int i = 0; i < nze; i++)
+                i_nz_state_var[lp + i] = lp + nze; 
+              lp += nze; 
+            }
+          output << "M_.nz_state_var = [";
+          for (int i = 0; i < lp; i++)
+            output << i_nz_state_var[i] << " ";
+          output << "];" << endl;
+          output << "M_.n_diag = " << nb_diag << ";" << endl;
+          KF_index_file.write(reinterpret_cast<char *>(&nb_diag), sizeof(nb_diag));
+          
+          
+          typedef pair<int, pair<int, int > > index_KF;
+          vector<index_KF> v_index_KF;
+          
+          /*   DO 170, J = 1, N
+                TEMP1 = ALPHA*A( J, J )
+                DO 110, I = 1, M
+                   C( I, J ) = TEMP1*B( I, J )
+  11  110       CONTINUE
+                DO 140, K = 1, J - 1
+                   TEMP1 = ALPHA*A( K, J )
+                   DO 130, I = 1, M
+                      C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  13  130          CONTINUE
+  14  140       CONTINUE
+                DO 160, K = J + 1, N
+                   TEMP1 = ALPHA*A( J, K )
+                   DO 150, I = 1, M
+                      C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  15  150          CONTINUE
+  16  160       CONTINUE
+  17  170    CONTINUE
+          for(int j = 0; j < n; j++)
+            {
+              double temp1 = P_t_t1[j + j * n];
+              for (int i = 0; i < n; i++)
+                tmp[i + j * n] = tmp1 * T[i + j * n];
+              for (int k = 0; k < j - 1; k++)
+                {
+                  temp1 = P_t_t1[k + j * n];
+                  for (int i = 0; i < n; i++)
+                    tmp[i + j * n] += temp1 * T[i + k * n];
+                }
+              for (int k = j + 1; k < n; k++)
+                {
+                  temp1 = P_t_t1[j + k * n];
+                  for (int i = 0; i < n; i++)
+                    tmp[i + j * n] += temp1 * T[i + k * n];
+                }
+            }
+
+          for(int j = n_obs; j < n; j++)
+            {
+              int j1 = j - n_obs;
+              double temp1 = P_t_t1[j1 + j1 * n_state];
+              for (int i = 0; i < n; i++)
+                tmp[i + j1 * n] = tmp1 * T[i + j * n];
+              for (int k = n_obs; k < j - 1; k++)
+                {
+                  int k1 = k - n_obs;
+                  temp1 = P_t_t1[k1 + j1 * n_state];
+                  for (int i = 0; i < n; i++)
+                    tmp[i + j1 * n] += temp1 * T[i + k * n];
+                }
+              for (int k = max(j + 1, n_obs); k < n; k++)
+                {
+                  int k1 = k - n_obs;
+                  temp1 = P_t_t1[j1 + k1 * n_state];
+                  for (int i = 0; i < n; i++)
+                    tmp[i + j1 * n] += temp1 * T[i + k * n];
+                }
+            }
+          
+          for(int j = n_obs; j < n; j++)
+            {
+              int j1 = j - n_obs;
+              double temp1 = P_t_t1[j1 + j1 * n_state];
+              for (int i = 0; i < n; i++)
+                tmp[i + j1 * n] = tmp1 * T[i + j * n];
+              for (int k = n_obs; k < j - 1; k++)
+                {
+                  int k1 = k - n_obs;
+                  temp1 = P_t_t1[k1 + j1 * n_state];
+                  for (int i = 0; i < n; i++)
+                  if ((i < n_obs) || (i >= nb_diag + n_obs) || (j1 >= nb_diag))
+                      tmp[i + j1 * n] += temp1 * T[i + k * n];
+                }
+              for (int k = max(j + 1, n_obs); k < n; k++)
+                {
+                  int k1 = k - n_obs;
+                  temp1 = P_t_t1[j1 + k1 * n_state];
+                  for (int i = 0; i < n; i++)
+                    if ((i < n_obs) || (i >= nb_diag + n_obs) || (j1 >= nb_diag))
+                      tmp[i + j1 * n] += temp1 * T[i + k * n];
+                }
+            }*/
+          for (int i = 0; i < n; i++)
+            //int i = 0;
+            for (int j = n_obs; j < n; j++)
+              {
+                int j1 = j - n_obs;
+                int j1_n_state = j1 * n_state - n_obs ;
+                if ((i < n_obs) || (i >= nb_diag + n_obs) || (j1 >= nb_diag))
+                  for (int k = n_obs; k < i_nz_state_var[i]; k++)
+                    {
+                      v_index_KF.push_back(make_pair(i + j1 * n, make_pair(i + k * n, k + j1_n_state)));
+                    }
+              }
+          int size_v_index_KF = v_index_KF.size();
+          cout << "size_v_index_KF=" << size_v_index_KF << endl;
+          KF_index_file.write(reinterpret_cast<char *>(&size_v_index_KF), sizeof(size_v_index_KF));      
+          for (vector<index_KF>::iterator it = v_index_KF.begin(); it != v_index_KF.end(); it++)
+            KF_index_file.write(reinterpret_cast<char *>(&(*it)), sizeof(index_KF));
+
+          //typedef pair<pair<int, int>, pair<int, int > > index_KF_2;
+          vector<index_KF> v_index_KF_2;
+          int n_n_obs = n * n_obs;
+          for (int i = 0; i < n; i++)
+          //i = 0;
+            for (int j = i; j < n; j++)
+              {
+                if ((i < n_obs) || (i >= nb_diag + n_obs) || (j < n_obs) || (j >= nb_diag + n_obs))
+                  for (int k = n_obs; k < i_nz_state_var[j]; k++)
+                    {
+                      int k_n = k * n;
+                      v_index_KF_2.push_back(make_pair(i * n + j,  make_pair(i + k_n - n_n_obs, j + k_n)));
+                    }
+              }
+          int size_v_index_KF_2 = v_index_KF_2.size();
+          cout << "size_v_index_KF_2=" << size_v_index_KF_2 << endl;
+          KF_index_file.write(reinterpret_cast<char *>(&size_v_index_KF_2), sizeof(size_v_index_KF_2));      
+          for (vector<index_KF>::iterator it = v_index_KF_2.begin(); it != v_index_KF_2.end(); it++)
+            KF_index_file.write(reinterpret_cast<char *>(&(*it)), sizeof(index_KF));      
+          KF_index_file.close();
+          /*ofstream KF_index_file;
+          streamoff Code_Size;
+          KF_index_file.open((file_name + ".kfi").c_str(), std::ios::in | std::ios::binary| std::ios::ate);*/
+        }
     }
   // Writing initialization for some other variables
   output << "M_.state_var = [";
diff --git a/preprocessor/DynamicModel.hh b/preprocessor/DynamicModel.hh
index f26f93b98a5c6c1632ce1cf0b9f8ce77d6ab9ebc..673ed862ffc65be3206bed5b2ffd3f304116edcd 100644
--- a/preprocessor/DynamicModel.hh
+++ b/preprocessor/DynamicModel.hh
@@ -245,7 +245,7 @@ public:
   void computingPass(bool jacobianExo, bool hessian, bool thirdDerivatives, bool paramsDerivatives,
                      const eval_context_t &eval_context, bool no_tmp_terms, bool block, bool use_dll, bool bytecode);
   //! Writes model initialization and lead/lag incidence matrix to output
-  void writeOutput(ostream &output, const string &basename, bool block, bool byte_code, bool use_dll, int order) const;
+  void writeOutput(ostream &output, const string &basename, bool block, bool byte_code, bool use_dll, int order, bool estimation_present) const;
 
   //! Adds informations for simulation in a binary file
   void Write_Inf_To_Bin_File_Block(const string &dynamic_basename, const string &bin_basename,
diff --git a/preprocessor/ModFile.cc b/preprocessor/ModFile.cc
index 662703b707187aeef35167dedd33f149aac56300..c28f7d35acb8948f42a219827b77362a19cd94dd 100644
--- a/preprocessor/ModFile.cc
+++ b/preprocessor/ModFile.cc
@@ -568,7 +568,7 @@ ModFile::writeOutputFiles(const string &basename, bool clear_all, bool console,
 
   if (dynamic_model.equation_number() > 0)
     {
-      dynamic_model.writeOutput(mOutputFile, basename, block, byte_code, use_dll, mod_file_struct.order_option);
+      dynamic_model.writeOutput(mOutputFile, basename, block, byte_code, use_dll, mod_file_struct.order_option, mod_file_struct.estimation_present);
       if (!no_static)
         static_model.writeOutput(mOutputFile, block);
     }
diff --git a/preprocessor/ModelTree.cc b/preprocessor/ModelTree.cc
index eb9b749b9a96dcf03bea1d7e1faa0942b8125c7c..0fcd062e41699488b76994f1d943c594ff5a2ac6 100644
--- a/preprocessor/ModelTree.cc
+++ b/preprocessor/ModelTree.cc
@@ -745,7 +745,7 @@ ModelTree::printBlockDecomposition(const vector<pair<int, int> > &blocks) const
           if (size > largest_block)
             {
               largest_block = size;
-              Nb_feedback_variable = blocks[Nb_SimulBlocks-1].second;
+              Nb_feedback_variable = getBlockMfs(block);
             }
         }
     }