diff --git a/mex/build/kronecker.am b/mex/build/kronecker.am
index ac07bb602029bd2331d225c5c58a68e7f71f6b7d..236d18625b78cb2f7111e088d213a0d8433a5f55 100644
--- a/mex/build/kronecker.am
+++ b/mex/build/kronecker.am
@@ -1,17 +1,19 @@
 mex_PROGRAMS = sparse_hessian_times_B_kronecker_C A_times_B_kronecker_C
 
-nodist_sparse_hessian_times_B_kronecker_C_SOURCES = sparse_hessian_times_B_kronecker_C.cc
+nodist_sparse_hessian_times_B_kronecker_C_SOURCES = sparse_hessian_times_B_kronecker_C.f08 matlab_mex.F08
 nodist_A_times_B_kronecker_C_SOURCES = A_times_B_kronecker_C.f08 matlab_mex.F08 blas_lapack.F08
 
-sparse_hessian_times_B_kronecker_C_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
-sparse_hessian_times_B_kronecker_C_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS)
+# Technically, only the sparse version needs OpenMP compilation flags.
+# However, using per-object compilation flags triggers object renaming in Automake,
+# with many problems (notably with Fortran modules, whose .o will be renamed, but
+# not their .mod).
+AM_FCFLAGS += -fopenmp
+AM_LDFLAGS += $(OPENMP_LDFLAGS)
 
 BUILT_SOURCES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES)
 CLEANFILES = $(nodist_sparse_hessian_times_B_kronecker_C_SOURCES) $(nodist_A_times_B_kronecker_C_SOURCES)
 
-%.cc: $(top_srcdir)/../../sources/kronecker/%.cc
-	$(LN_S) -f $< $@
-
+sparse_hessian_times_B_kronecker_C.o : matlab_mex.mod
 A_times_B_kronecker_C.o : matlab_mex.mod lapack.mod
 
 %.f08: $(top_srcdir)/../../sources/kronecker/%.f08
diff --git a/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc b/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc
deleted file mode 100644
index 8dd74b4791257462efc1fb5f766ef1799a6c030f..0000000000000000000000000000000000000000
--- a/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.cc
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright © 2007-2021 Dynare Team
- *
- * This file is part of Dynare.
- *
- * Dynare is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Dynare is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Dynare.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-/*
- * This mex file computes A·(B⊗C) or A·(B⊗B) without explicitly building B⊗C or B⊗B, so that
- * one can consider large matrices A, B and/or C, and assuming that A is a the hessian of a DSGE model
- * (dynare format). This mex file should not be used outside dyn_second_order_solver.m.
- */
-
-#include <algorithm>
-
-#include <dynmex.h>
-
-#include <omp.h>
-
-#define DEBUG_OMP 0
-
-void
-sparse_hessian_times_B_kronecker_B(const mwIndex *isparseA, const mwIndex *jsparseA, const double *vsparseA,
-                                   const double *B, double *D, size_t mA, size_t nA, size_t mB, size_t nB, int number_of_threads)
-{
-  /*
-  **   Loop over the columns of B⊗B (or of the result matrix D).
-  **   This loop is splitted into two nested loops because we use the
-  **   symmetric pattern of the hessian matrix.
-  */
-#pragma omp parallel for num_threads(number_of_threads)
-  for (mwIndex j1B = 0; j1B < static_cast<mwIndex>(nB); j1B++)
-    {
-#if DEBUG_OMP
-      mexPrintf("%d thread number is %d (%d).\n", j1B, omp_get_thread_num(), omp_get_num_threads());
-#endif
-      for (mwIndex j2B = j1B; j2B < static_cast<mwIndex>(nB); j2B++)
-        {
-          mwIndex jj = j1B*nB+j2B; // column of B⊗B index.
-          mwIndex iv = 0;
-          int nz_in_column_ii_of_A = 0;
-          mwIndex k1 = 0;
-          mwIndex k2 = 0;
-          /*
-          ** Loop over the rows of B⊗B (column jj).
-          */
-          for (mwIndex ii = 0; ii < static_cast<mwIndex>(nA); ii++)
-            {
-              k1 = jsparseA[ii];
-              k2 = jsparseA[ii+1];
-              if (k1 < k2) // otherwise column ii of A does not have non zero elements (and there is nothing to compute).
-                {
-                  ++nz_in_column_ii_of_A;
-                  mwIndex i1B = ii / mB;
-                  mwIndex i2B = ii % mB;
-                  double bb = B[j1B*mB+i1B]*B[j2B*mB+i2B];
-                  /*
-                  ** Loop over the non zero entries of A(:,ii).
-                  */
-                  for (mwIndex k = k1; k < k2; k++)
-                    {
-                      mwIndex kk = isparseA[k];
-                      D[jj*mA+kk] = D[jj*mA+kk] + bb*vsparseA[iv];
-                      iv++;
-                    }
-                }
-            }
-          if (nz_in_column_ii_of_A > 0)
-            std::copy_n(&D[jj*mA], mA, &D[(j2B*nB+j1B)*mA]);
-        }
-    }
-}
-
-void
-sparse_hessian_times_B_kronecker_C(const mwIndex *isparseA, const mwIndex *jsparseA, const double *vsparseA,
-                                   const double *B, const double *C, double *D,
-                                   size_t mA, size_t nA, size_t mB, size_t nB, size_t mC, size_t nC, int number_of_threads)
-{
-  /*
-  **   Loop over the columns of B⊗B (or of the result matrix D).
-  */
-#pragma omp parallel for num_threads(number_of_threads)
-  for (mwIndex jj = 0; jj < static_cast<mwIndex>(nB*nC); jj++) // column of B⊗C index.
-    {
-      // Uncomment the following line to check if all processors are used.
-#if DEBUG_OMP
-      mexPrintf("%d thread number is %d (%d).\n", jj, omp_get_thread_num(), omp_get_num_threads());
-#endif
-      mwIndex jB = jj/nC;
-      mwIndex jC = jj%nC;
-      mwIndex k1 = 0;
-      mwIndex k2 = 0;
-      mwIndex iv = 0;
-      int nz_in_column_ii_of_A = 0;
-      /*
-      ** Loop over the rows of B⊗C (column jj).
-      */
-      for (mwIndex ii = 0; ii < static_cast<mwIndex>(nA); ii++)
-        {
-          k1 = jsparseA[ii];
-          k2 = jsparseA[ii+1];
-          if (k1 < k2) // otherwise column ii of A does not have non zero elements (and there is nothing to compute).
-            {
-              ++nz_in_column_ii_of_A;
-              mwIndex iC = ii % mC;
-              mwIndex iB = ii / mC;
-              double cb = C[jC*mC+iC]*B[jB*mB+iB];
-              /*
-              ** Loop over the non zero entries of A(:,ii).
-              */
-              for (mwIndex k = k1; k < k2; k++)
-                {
-                  mwIndex kk = isparseA[k];
-                  D[jj*mA+kk] = D[jj*mA+kk] + cb*vsparseA[iv];
-                  iv++;
-                }
-            }
-        }
-    }
-}
-
-void
-mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
-{
-  // Check input and output:
-  if (nrhs > 4 || nrhs < 3 || nlhs != 1)
-    {
-      mexErrMsgTxt("sparse_hessian_times_B_kronecker_C takes 3 or 4 input arguments and provides 1 output argument.");
-      return; // Needed to shut up some GCC warnings
-    }
-
-  if (!mxIsSparse(prhs[0]))
-    mexErrMsgTxt("sparse_hessian_times_B_kronecker_C: First input must be a sparse (dynare) hessian matrix.");
-
-  // Get & Check dimensions (columns and rows):
-  size_t mA = mxGetM(prhs[0]);
-  size_t nA = mxGetN(prhs[0]);
-  size_t mB = mxGetM(prhs[1]);
-  size_t nB = mxGetN(prhs[1]);
-  size_t mC, nC;
-  if (nrhs == 4) // A·(B⊗C) is to be computed.
-    {
-      mC = mxGetM(prhs[2]);
-      nC = mxGetN(prhs[2]);
-      if (mB*mC != nA)
-        mexErrMsgTxt("Input dimension error!");
-    }
-  else // A·(B⊗B) is to be computed.
-    {
-      if (mB*mB != nA)
-        mexErrMsgTxt("Input dimension error!");
-    }
-  // Get input matrices:
-  int numthreads;
-  const double *B = mxGetPr(prhs[1]);
-  const double *C;
-  numthreads = static_cast<int>(mxGetScalar(prhs[2]));
-  if (nrhs == 4)
-    {
-      C = mxGetPr(prhs[2]);
-      numthreads = static_cast<int>(mxGetScalar(prhs[3]));
-    }
-  // Sparse (dynare) hessian matrix.
-  const mwIndex *isparseA = mxGetIr(prhs[0]);
-  const mwIndex *jsparseA = mxGetJc(prhs[0]);
-  const double *vsparseA = mxGetPr(prhs[0]);
-  // Initialization of the ouput:
-  if (nrhs == 4)
-    plhs[0] = mxCreateDoubleMatrix(mA, nB*nC, mxREAL);
-  else
-    plhs[0] = mxCreateDoubleMatrix(mA, nB*nB, mxREAL);
-  double *D = mxGetPr(plhs[0]);
-
-  // Computational part:
-  if (nrhs == 3)
-    sparse_hessian_times_B_kronecker_B(isparseA, jsparseA, vsparseA, B, D, mA, nA, mB, nB, numthreads);
-  else
-    sparse_hessian_times_B_kronecker_C(isparseA, jsparseA, vsparseA, B, C, D, mA, nA, mB, nB, mC, nC, numthreads);
-}
diff --git a/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.f08 b/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.f08
new file mode 100644
index 0000000000000000000000000000000000000000..acd7532bee774b4ed445132f24d7eac0a1309a7e
--- /dev/null
+++ b/mex/sources/kronecker/sparse_hessian_times_B_kronecker_C.f08
@@ -0,0 +1,164 @@
+! This mex file computes A·(B⊗C) or A·(B⊗B) without explicitly building B⊗C or
+! B⊗B, so that one can consider large matrices A, B and/or C, and assuming that
+! A is a the hessian of a DSGE model (Dynare format). This MEX file should not
+! be used outside of dyn_second_order_solver.m.
+
+! Copyright © 2007-2021 Dynare Team
+!
+! This file is part of Dynare.
+!
+! Dynare is free software: you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation, either version 3 of the License, or
+! (at your option) any later version.
+!
+! Dynare is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License
+! along with Dynare.  If not, see <https://www.gnu.org/licenses/>.
+
+subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
+  use iso_fortran_env
+  use iso_c_binding
+  use matlab_mex
+  implicit none
+
+  type(c_ptr), dimension(*), intent(in), target :: prhs
+  type(c_ptr), dimension(*), intent(out) :: plhs
+  integer(c_int), intent(in), value :: nlhs, nrhs
+
+  integer(c_size_t) :: mA, nA, mB, nB, mC, nC
+  real(real64), dimension(:), pointer, contiguous :: A_v
+  integer(mwSize), dimension(:), pointer, contiguous :: A_ir, A_jc
+  real(real64), dimension(:, :), pointer, contiguous :: B, C, D
+  integer :: numthreads
+
+  if (nrhs > 4 .or. nrhs < 3 .or. nlhs /= 1) then
+     call mexErrMsgTxt("sparse_hessian_times_B_kronecker_C takes 3 or 4 input arguments and provides 1 output argument")
+  end if
+
+  if (.not. mxIsDouble(prhs(1)) .or. mxIsComplex(prhs(1)) .or. .not. mxIsSparse(prhs(1))) then
+     call mexErrMsgTxt("sparse_hessian_times_B_kronecker_C: first input must be a sparse (Dynare) hessian matrix")
+  end if
+  mA = mxGetM(prhs(1))
+  nA = mxGetN(prhs(1))
+  A_ir => mxGetIr(prhs(1))
+  A_jc => mxGetJc(prhs(1))
+  A_v => mxGetPr(prhs(1))
+
+  if (.not. mxIsDouble(prhs(2)) .or. mxIsComplex(prhs(2)) .or. mxIsSparse(prhs(2))) then
+     call mexErrMsgTxt("sparse_hessian_times_B_kronecker_C: second argument must be a (dense) real matrix")
+  end if
+
+  mB = mxGetM(prhs(2))
+  nB = mxGetN(prhs(2))
+  B(1:mB,1:nB) => mxGetPr(prhs(2))
+
+  if (nrhs == 4) then
+     ! A·(B⊗C) is to be computed.
+     if (.not. mxIsDouble(prhs(3)) .or. mxIsComplex(prhs(3)) .or. mxIsSparse(prhs(3))) then
+        call mexErrMsgTxt("sparse_hessian_times_B_kronecker_C: third argument must be a (dense) real matrix")
+     end if
+     mC = mxGetM(prhs(3))
+     nC = mxGetN(prhs(3))
+     if (mB*mC /= nA) then
+        call mexErrMsgTxt("Input dimension error!")
+     end if
+     C(1:mC,1:nC) => mxGetPr(prhs(3))
+
+     if (.not. mxIsScalar(prhs(4))) then
+        call mexErrMsgTxt("sparse_hessian_times_B_kronecker_C: fourth argument must be a scalar")
+     end if
+     numthreads = int(mxGetScalar(prhs(4)))
+
+     plhs(1) = mxCreateDoubleMatrix(mA, nB*nC, mxREAL)
+     D(1:mA,1:nB*nC) => mxGetPr(plhs(1))
+
+     call sparse_hessian_times_B_kronecker_C
+  else
+     ! A·(B⊗B) is to be computed.
+     if (mB*mB /= nA) then
+        call mexErrMsgTxt("Input dimension error!")
+     end if
+
+     if (.not. mxIsScalar(prhs(3))) then
+        call mexErrMsgTxt("sparse_hessian_times_B_kronecker_C: third argument must be a scalar")
+     end if
+     numthreads = int(mxGetScalar(prhs(3)))
+
+     plhs(1) = mxCreateDoubleMatrix(mA, nB*nB, mxREAL)
+     D(1:mA,1:nB*nB) => mxGetPr(plhs(1))
+
+     call sparse_hessian_times_B_kronecker_B
+  end if
+contains
+  subroutine sparse_hessian_times_B_kronecker_C
+    integer(c_size_t) :: ii,jj,jB,jC,iB,iC
+    integer(mwIndex) :: k1,k2,k,kk
+    real(real64) :: bc
+
+    ! Loop over the columns of B⊗C (or of the result matrix D)
+    !$omp parallel do num_threads(numthreads) default(none) shared(nA,nB,nC,mC,A_jc,A_ir,A_v,B,C,D) &
+    !$omp  private(ii,jj,jB,jC,iB,iC,k1,k2,k,kk,bc)
+    do jj = 1,nB*nC ! column of B⊗C index
+       jB = (jj-1)/nC+1
+       jC = mod(jj-1, nC)+1
+       ! Loop over the rows of B⊗C (column jj)
+       do ii=1,nA
+          k1 = A_jc(ii)
+          k2 = A_jc(ii+1)
+          if (k1 < k2) then ! Otherwise column ii of A does not have non zero elements (and there is nothing to compute)
+             iC = mod(ii-1, mC)+1
+             iB = (ii-1)/mC+1
+             bc = B(iB,jB)*C(iC,jC)
+             ! Loop over the non zero entries of A(:,ii)
+             do k=k1+1,k2
+                kk = A_ir(k)+1
+                D(kk,jj) = D(kk,jj) + A_v(k)*bc
+             end do
+          end if
+       end do
+    end do
+  end subroutine sparse_hessian_times_B_kronecker_C
+
+  subroutine sparse_hessian_times_B_kronecker_B
+    integer(c_size_t) :: ii,jj,j1B,j2B,i1B,i2B,nz_in_column_ii_of_A
+    integer(mwIndex) :: k1,k2,k,kk
+    real(real64) :: bb
+
+    ! Loop over the columns of B⊗B (or of the result matrix D)
+    ! This loop is splitted into two nested loops because we use the
+    ! symmetric pattern of the hessian matrix.
+
+    !$omp parallel do num_threads(numthreads) default(none) shared(nA,nB,mB,A_jc,A_ir,A_v,B,D) &
+    !$omp  private(ii,jj,j1B,j2B,i1B,i2B,nz_in_column_ii_of_A,k1,k2,k,kk,bb)
+    do j1B = 1,nB
+       do j2B = j1B,nB
+          jj = (j1B-1)*nB+j2B ! Column of B⊗B index
+          nz_in_column_ii_of_A = 0
+          ! Loop over the rows of B⊗C (column jj)
+          do ii=1,nA
+             k1 = A_jc(ii)
+             k2 = A_jc(ii+1)
+             if (k1 < k2) then ! Otherwise column ii of A does not have non zero elements (and there is nothing to compute)
+                nz_in_column_ii_of_A = nz_in_column_ii_of_A + 1
+                i1B = (ii-1)/mB+1
+                i2B = mod(ii-1, mB)+1
+                bb = B(i1B,j1B)*B(i2B,j2B)
+                ! Loop over the non zero entries of A(:,ii)
+                do k=k1+1,k2
+                   kk = A_ir(k)+1
+                   D(kk,jj) = D(kk,jj) + A_v(k)*bb
+                end do
+             end if
+          end do
+          if (nz_in_column_ii_of_A > 0) then
+             D(:,(j2B-1)*nB+j1B) = D(:,jj)
+          end if
+       end do
+    end do
+  end subroutine sparse_hessian_times_B_kronecker_B
+end subroutine mexFunction
diff --git a/mex/sources/matlab_mex.F08 b/mex/sources/matlab_mex.F08
index c8e90c973291609be45abf826324637bbe783260..ff84678acb0baff87771384bcfd6eb706feab661 100644
--- a/mex/sources/matlab_mex.F08
+++ b/mex/sources/matlab_mex.F08
@@ -177,15 +177,26 @@ module matlab_mat
        integer(mxComplexity), intent(in), value :: ComplexFlag
      end function mxCreateSparse
 
-     type(c_ptr) function mxGetIr(pm) bind(c, name="mxGetIr"//API_VER2)
+     logical(c_bool) function mxIsSparse(pm) bind(c, name="mxIsSparse"//API_VER)
        use iso_c_binding
        type(c_ptr), intent(in), value :: pm
-     end function mxGetIr
+     end function mxIsSparse
 
-     type(c_ptr) function mxGetJc(pm) bind(c, name="mxGetJc"//API_VER2)
+     integer(mwSize) function mxGetNzmax(pm) bind(c, name="mxGetNzmax"//API_VER2)
        use iso_c_binding
+       import :: mwSize
        type(c_ptr), intent(in), value :: pm
-     end function mxGetJc
+     end function mxGetNzmax
+
+     type(c_ptr) function mxGetIr_internal(pm) bind(c, name="mxGetIr"//API_VER2)
+       use iso_c_binding
+       type(c_ptr), intent(in), value :: pm
+     end function mxGetIr_internal
+
+     type(c_ptr) function mxGetJc_internal(pm) bind(c, name="mxGetJc"//API_VER2)
+       use iso_c_binding
+       type(c_ptr), intent(in), value :: pm
+     end function mxGetJc_internal
 
      ! Nonnumeric types
      type(c_ptr) function mxGetData(pm) bind(c, name="mxGetData"//API_VER)
@@ -318,6 +329,18 @@ contains
   end function mxGetPi
 #endif
 
+  function mxGetIr(pm)
+    type(c_ptr), intent(in) :: pm
+    integer(mwIndex), dimension(:), pointer, contiguous :: mxGetIr
+    call c_f_pointer(mxGetIr_internal(pm), mxGetIr, [ mxGetNzmax(pm) ])
+  end function mxGetIr
+
+  function mxGetJc(pm)
+    type(c_ptr), intent(in) :: pm
+    integer(mwIndex), dimension(:), pointer, contiguous :: mxGetJc
+    call c_f_pointer(mxGetJc_internal(pm), mxGetJc, [ mxGetN(pm)+1 ])
+  end function mxGetJc
+
   function mxGetLogicals(array_ptr)
     type(c_ptr), intent(in) :: array_ptr
     logical(mxLogical), dimension(:), pointer, contiguous :: mxGetLogicals