Skip to content
Snippets Groups Projects
Verified Commit 4bfd687b authored by Sébastien Villemot's avatar Sébastien Villemot
Browse files

Merge branch 'local_state_space_iteration_3' of git.dynare.org:normann/dynare

Ref. !2045
parents 27075fa8 c6d5c48f
Branches
No related tags found
No related merge requests found
...@@ -8,8 +8,8 @@ nodist_libkordersim_a_SOURCES = \ ...@@ -8,8 +8,8 @@ nodist_libkordersim_a_SOURCES = \
partitions.f08 \ partitions.f08 \
simulation.f08 \ simulation.f08 \
struct.f08 \ struct.f08 \
pthread.F08 \ pthread.F08
pparticle.f08
BUILT_SOURCES = $(nodist_libkordersim_a_SOURCES) BUILT_SOURCES = $(nodist_libkordersim_a_SOURCES)
CLEANFILES = $(nodist_libkordersim_a_SOURCES) CLEANFILES = $(nodist_libkordersim_a_SOURCES)
...@@ -27,8 +27,5 @@ partitions.mod: partitions.o ...@@ -27,8 +27,5 @@ partitions.mod: partitions.o
simulation.o: partitions.mod lapack.mod simulation.o: partitions.mod lapack.mod
simulation.mod: simulation.o simulation.mod: simulation.o
pparticle.o: simulation.mod matlab_mex.mod
pparticle.mod: pparticle.o
%.f08: $(top_srcdir)/../../sources/libkordersim/%.f08 %.f08: $(top_srcdir)/../../sources/libkordersim/%.f08
$(LN_S) -f $< $@ $(LN_S) -f $< $@
mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_k mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_3 local_state_space_iteration_k
nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
nodist_local_state_space_iteration_3_SOURCES = local_state_space_iteration_3.f08
nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.f08 nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.f08
local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
local_state_space_iteration_2_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS) local_state_space_iteration_2_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS)
local_state_space_iteration_3_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
local_state_space_iteration_3_LDADD = ../libkordersim/libkordersim.a
local_state_space_iteration_k_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread local_state_space_iteration_k_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
local_state_space_iteration_k_LDADD = ../libkordersim/libkordersim.a local_state_space_iteration_k_LDADD = ../libkordersim/libkordersim.a
BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) \ BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) \
$(nodist_local_state_space_iteration_3_SOURCES) \
$(nodist_local_state_space_iteration_k_SOURCES) $(nodist_local_state_space_iteration_k_SOURCES)
CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) \ CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) \
$(nodist_local_state_space_iteration_3_SOURCES) \
$(nodist_local_state_space_iteration_k_SOURCES) $(nodist_local_state_space_iteration_k_SOURCES)
%.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc %.f08: $(top_srcdir)/../../sources/local_state_space_iterations/%.f08
$(LN_S) -f $< $@ $(LN_S) -f $< $@
%.f08: $(top_srcdir)/../../sources/local_state_space_iterations/%.f08 %.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc
$(LN_S) -f $< $@ $(LN_S) -f $< $@
\ No newline at end of file
...@@ -6,6 +6,7 @@ EXTRA_DIST = \ ...@@ -6,6 +6,7 @@ EXTRA_DIST = \
blas_lapack.F08 \ blas_lapack.F08 \
defines.F08 \ defines.F08 \
matlab_mex.F08 \ matlab_mex.F08 \
pthread.F08 \
mjdgges \ mjdgges \
kronecker \ kronecker \
bytecode \ bytecode \
......
...@@ -22,15 +22,16 @@ ...@@ -22,15 +22,16 @@
module partitions module partitions
use pascal use pascal
use sort use sort
use iso_fortran_env
implicit none implicit none
! index represents the aforementioned (α₁,…,αₘ) objects ! index represents the aforementioned (α₁,…,αₘ) objects
type index type index
integer, dimension(:), allocatable :: ind integer, dimension(:), allocatable :: coor
end type index end type index
interface index interface index
module procedure :: init_index module procedure :: init_index, init_index_vec, init_index_int
end interface index end interface index
! a dictionary that matches folded indices with folded offsets ! a dictionary that matches folded indices with folded offsets
...@@ -56,18 +57,36 @@ module partitions ...@@ -56,18 +57,36 @@ module partitions
contains contains
! Constructor for the index type ! Constructors for the index type
type(index) function init_index(d, ind) ! Simply allocates the index with the size provided as input
type(index) function init_index(d)
integer, intent(in) :: d integer, intent(in) :: d
integer, dimension(d), intent(in) :: ind allocate(init_index%coor(d))
allocate(init_index%ind(d))
init_index%ind = ind
end function init_index end function init_index
! Creates an index with the vector provided as inputs
type(index) function init_index_vec(ind)
integer, dimension(:), intent(in) :: ind
allocate(init_index_vec%coor(size(ind)))
init_index_vec%coor = ind
end function init_index_vec
! Creates the index with a given size
! and fills it with a given integer
type(index) function init_index_int(d, m)
integer, intent(in) :: d, m
integer :: i
allocate(init_index_int%coor(d))
do i=1,d
init_index_int%coor(i) = m
end do
end function init_index_int
! Operators for the index type
! Comparison for the index type. Returns true if the two indices are different ! Comparison for the index type. Returns true if the two indices are different
type(logical) function diff_indices(i1,i2) type(logical) function diff_indices(i1,i2)
type(index), intent(in) :: i1, i2 type(index), intent(in) :: i1, i2
if (size(i1%ind) /= size(i2%ind) .or. any(i1%ind /= i2%ind)) then if (size(i1%coor) /= size(i2%coor) .or. any(i1%coor /= i2%coor)) then
diff_indices = .true. diff_indices = .true.
else else
diff_indices = .false. diff_indices = .false.
...@@ -91,7 +110,7 @@ contains ...@@ -91,7 +110,7 @@ contains
integer :: i integer :: i
i = 1 i = 1
if (d>1) then if (d>1) then
do while ((i < d) .and. (idx%ind(i+1) == idx%ind(1))) do while ((i < d) .and. (idx%coor(i+1) == idx%coor(1)))
i = i+1 i = i+1
end do end do
end if end if
...@@ -99,11 +118,10 @@ contains ...@@ -99,11 +118,10 @@ contains
end function get_prefix_length end function get_prefix_length
! Gets the folded index associated with an unfolded index ! Gets the folded index associated with an unfolded index
type(index) function u_index_to_f_index(idx, d) type(index) function u_index_to_f_index(idx)
type(index), intent(in) :: idx type(index), intent(in) :: idx
integer, intent(in) :: d u_index_to_f_index = index(idx%coor)
u_index_to_f_index = index(d, idx%ind) call sort_int(u_index_to_f_index%coor)
call sort_int(u_index_to_f_index%ind)
end function u_index_to_f_index end function u_index_to_f_index
! Converts the offset of an unfolded tensor to the associated unfolded tensor index ! Converts the offset of an unfolded tensor to the associated unfolded tensor index
...@@ -112,11 +130,11 @@ contains ...@@ -112,11 +130,11 @@ contains
type(index) function u_offset_to_u_index(j, n, d) type(index) function u_offset_to_u_index(j, n, d)
integer, intent(in) :: j, n, d ! offset, number of variables and dimensions respectively integer, intent(in) :: j, n, d ! offset, number of variables and dimensions respectively
integer :: i, tmp, r integer :: i, tmp, r
allocate(u_offset_to_u_index%ind(d)) allocate(u_offset_to_u_index%coor(d))
tmp = j-1 ! We substract 1 as j ∈ {1, ..., n} so that tmp ∈ {0, ..., n-1} and our modular operations work tmp = j-1 ! We substract 1 as j ∈ {1, ..., n} so that tmp ∈ {0, ..., n-1} and our modular operations work
do i=d,1,-1 do i=d,1,-1
r = mod(tmp, n) r = mod(tmp, n)
u_offset_to_u_index%ind(i) = r u_offset_to_u_index%coor(i) = r
tmp = (tmp-r)/n tmp = (tmp-r)/n
end do end do
end function u_offset_to_u_index end function u_offset_to_u_index
...@@ -136,11 +154,26 @@ contains ...@@ -136,11 +154,26 @@ contains
j = 1 j = 1
else else
prefix = get_prefix_length(idx,d) prefix = get_prefix_length(idx,d)
tmp = index(d-prefix, idx%ind(prefix+1:) - idx%ind(1)) tmp = index(idx%coor(prefix+1:) - idx%coor(1))
j = get(d, n+d-1, p) - get(d, n-idx%ind(1)+d-1, p) + f_index_to_f_offset(tmp, n-idx%ind(1), d-prefix, p) j = get(d, n+d-1, p) - get(d, n-idx%coor(1)+d-1, p) + f_index_to_f_offset(tmp, n-idx%coor(1), d-prefix, p)
end if end if
end function f_index_to_f_offset end function f_index_to_f_offset
! Returns the unfolded tensor offset associated with an unfolded tensor index
! Written in a recursive way, the unfolded offset off(α₁,…,αₘ) associated with the
! index (α₁,…,αₘ) with αᵢ ∈ {1, ..., n} verifies
! off(α₁,…,αₘ) = n*off(α₁,…,αₘ₋₁) + αₘ
integer function u_index_to_u_offset(idx, n, d)
type(index), intent(in) :: idx ! unfolded index
integer, intent(in) :: n, d ! number of variables and dimensions
integer :: j
u_index_to_u_offset = 0
do j=1,d
u_index_to_u_offset = n*u_index_to_u_offset + idx%coor(j)-1
end do
u_index_to_u_offset = u_index_to_u_offset + 1
end function u_index_to_u_offset
! Function that searches a value in an array of a given length ! Function that searches a value in an array of a given length
type(integer) function find(a, v, l) type(integer) function find(a, v, l)
integer, intent(in) :: l ! length of the array integer, intent(in) :: l ! length of the array
...@@ -180,7 +213,7 @@ contains ...@@ -180,7 +213,7 @@ contains
c = dict(n, d, p) c = dict(n, d, p)
do j=1,n**d do j=1,n**d
tmp = u_offset_to_u_index(j,n,d) tmp = u_offset_to_u_index(j,n,d)
tmp = u_index_to_f_index(tmp, d) tmp = u_index_to_f_index(tmp)
found = find(c%indices, tmp, c%pr) found = find(c%indices, tmp, c%pr)
if (found == 0) then if (found == 0) then
c%pr = c%pr+1 c%pr = c%pr+1
...@@ -193,6 +226,127 @@ contains ...@@ -193,6 +226,127 @@ contains
end do end do
end subroutine fill_folded_indices end subroutine fill_folded_indices
! ! Specialized code for local_state_space_iteration_3
! ! Considering the folded tensor gᵥᵥ, for each folded offset,
! ! fills (i) the corresponding index, (ii) the corresponding
! ! unfolded offset in the corresponding unfolded tensor
! ! and (iii) the number of equivalent unfolded indices the folded index
! ! associated with the folded offset represents
! subroutine index_2(indices, uoff, neq, q)
! integer, intent(in) :: q ! size of v
! integer, dimension(:), intent(inout) :: uoff, neq ! list of corresponding unfolded offsets and number of equivalent unfolded indices
! type(index), dimension(:), intent(inout) :: indices ! list of folded indices
! integer :: m, j
! m = q*(q+1)/2 ! total number of folded indices : ⎛q+2-1⎞
! ! ⎝ 2 ⎠
! uoff(1) = 1
! neq(1) = 1
! ! offsets such that j ∈ { 2, ..., q } are associated with
! ! indices (1, α), α ∈ { 2, ..., q }
! do j=2,q
! neq(j) = 2
! end do
! end subroutine index_2
! In order to list folded indices α = (α₁,…,αₘ) with αᵢ ∈ { 1, ..., n },
! at least 2 algorithms exist: a recursive one and an iterative one.
! The recursive algorithm list_folded_indices(n,m,q) that returns
! the list of all folded indices α = (α₁,…,αₘ) with αᵢ ∈ { 1+q, ..., n+q } works as follows:
! if n=0, return an empty list
! else if m=0, return the list containing the sole zero-sized index
! otherwise,
! return the concatenation of ([1+q, ℓ] for ℓ ∈ list_folded_indices(n,m-1,q))
! and list_folded_indices(n-1,m,1,q+1)]
! A call to list_folded_indices(n,m,0) then returns the list
! of folded indices α = (α₁,…,αₘ) with αᵢ ∈ { 1, ..., n }
! The problem with recursive functions is that the compiler may manage poorly
! the stack, which slows down the function's execution
! recursive function list_folded_indices(n, m, q) result(list)
! integer :: n, m, q
! type(index), allocatable, dimension(:) :: list, temp
! integer :: j
! if (m==0) then
! list = [index(0)]
! elseif (n == 0) then
! allocate(list(0))
! else
! temp = list_folded_indices(n,m-1,q)
! list = [(index([1+q,temp(j)%coor]), j=1, size(temp)), list_folded_indices(n-1,m,q+1)]
! end if
! end function list_folded_indices
! Considering the folded tensor gᵥᵐ, for each folded offset,
! fills the lists of (i) the corresponding index, (ii) the corresponding
! unfolded offset in the corresponding unfolded tensor
! and (iii) the number of equivalent unfolded indices the folded index
! (associated with the folded offset) represents
! The algorithm to get the folded index associated with a folded offset
! relies on the definition of the lexicographic order.
! Considering α = (α₁,…,αₘ) with αᵢ ∈ { 1, ..., n },
! the next index α' is such that there exists i that verifies
! αⱼ = αⱼ' for all j < i, αᵢ' > αᵢ. Note that all the coordinates
! αᵢ', ... , αₘ' need to be as small as the lexicographic order allows
! for α' to immediately follow α.
! Suppose j is the latest incremented coordinate:
! if αⱼ < n, then αⱼ' = αⱼ + 1
! otherwise αⱼ = n, set αₖ' = αⱼ₋₁ + 1 for all k ≥ j-1
! if αⱼ₋₁ = n, set j := j-1
! otherwise, set j := m
! The algorithm to count the number of equivalent unfolded indices
! works as follows. A folded index can be written as α = (x₁, ..., x₁, ..., xₚ, ..., xₚ)
! such that x₁ < x₂ < ... < xₚ. Denote kᵢ the number of coordinates equal to xᵢ.
! The number of unfolded indices equivalent to α is c(α) = ⎛ d ⎞
! ⎝ k₁, k₂, ..., kₚ ⎠
! Suppose j is the latest incremented coordinate.
! If αⱼ < n, then αⱼ' = αⱼ + 1, k(αⱼ) := k(αⱼ)-1, k(αⱼ') := k(αⱼ')+1.
! In this case, c(α') = c(α)*(k(αⱼ)+1)/k(αⱼ')
! otherwise, αⱼ = n: set αₖ' = αⱼ₋₁ + 1 for all k ≥ j-1,
! k(αⱼ₋₁) := k(αⱼ₋₁)-1, k(n) := 0, k(αⱼ₋₁') = m-(j-1)+1
! In this case, we compute c(α') with the multinomial formula above
! Finally, the algorithm that returns the unfolded offset of a given folded index works
! as follows. Suppose j is the latest incremented coordinate and off(α) is the unfolded offset
! associated with index α:
! if αⱼ < n, then αⱼ' = αⱼ + 1 and off(α') = off(α)+1
! otherwise, αⱼ = n: set αₖ' = αⱼ₋₁ + 1 for all k ≥ j-1
! and off(α') can be computed using the u_index_to_u_offset routine
subroutine folded_offset_loop(ind, nbeq, off, n, m, p)
type(index), dimension(:), intent(inout) :: ind ! list of indices
integer, dimension(:), intent(inout) :: nbeq, off ! lists of numbers of equivalent indices and of offsets
integer, intent(in) :: n, m
type(pascal_triangle), intent(in) :: p
integer :: j, lastinc, k(n)
ind(1) = index(m, 1)
nbeq(1) = 1
k = 0
k(1) = m
off(1) = 1
j = 2
lastinc = m
do while (j <= size(ind))
ind(j) = index(ind(j-1)%coor)
if (ind(j-1)%coor(lastinc) == n) then
ind(j)%coor(lastinc-1:m) = ind(j-1)%coor(lastinc-1)+1
k(ind(j-1)%coor(lastinc-1)) = k(ind(j-1)%coor(lastinc-1))-1
k(n) = 0
k(ind(j)%coor(lastinc-1)) = m - (lastinc-1) + 1
nbeq(j) = multinomial(k,m,p)
off(j) = u_index_to_u_offset(ind(j), n, m)
if (ind(j)%coor(m) == n) then
lastinc = lastinc-1
else
lastinc = m
end if
else
ind(j)%coor(lastinc) = ind(j-1)%coor(lastinc)+1
k(ind(j)%coor(lastinc)) = k(ind(j)%coor(lastinc))+1
nbeq(j) = nbeq(j-1)*k(ind(j-1)%coor(lastinc))/k(ind(j)%coor(lastinc))
k(ind(j-1)%coor(lastinc)) = k(ind(j-1)%coor(lastinc))-1
off(j) = off(j-1)+1
end if
j = j+1
end do
end subroutine folded_offset_loop
end module partitions end module partitions
! gfortran -o partitions partitions.f08 pascal.f08 sort.f08 ! gfortran -o partitions partitions.f08 pascal.f08 sort.f08
...@@ -203,8 +357,11 @@ contains ...@@ -203,8 +357,11 @@ contains
! implicit none ! implicit none
! type(index) :: uidx, fidx, i1, i2 ! type(index) :: uidx, fidx, i1, i2
! integer, dimension(:), allocatable :: folded ! integer, dimension(:), allocatable :: folded
! integer :: i, uj, n, d ! integer :: i, uj, n, d, j, nb_folded_idcs
! type(pascal_triangle) :: p ! type(pascal_triangle) :: p
! type(index), dimension(:), allocatable :: list_folded_idcs
! integer, dimension(:), allocatable :: nbeq, off
! ! Unfolded indices and offsets ! ! Unfolded indices and offsets
! ! 0,0,0 1 1,0,0 10 2,0,0 19 ! ! 0,0,0 1 1,0,0 10 2,0,0 19
! ! 0,0,1 2 1,0,1 11 2,0,1 20 ! ! 0,0,1 2 1,0,1 11 2,0,1 20
...@@ -231,15 +388,15 @@ contains ...@@ -231,15 +388,15 @@ contains
! ! u_offset_to_u_index ! ! u_offset_to_u_index
! uidx = u_offset_to_u_index(uj,n,d) ! uidx = u_offset_to_u_index(uj,n,d)
! print '(3i2)', (uidx%ind(i), i=1,d) ! should display 0 2 1 ! print '(3i2)', (uidx%coor(i), i=1,d) ! should display 0 2 1
! ! f_index_to_f_offset ! ! f_index_to_f_offset
! fidx = u_index_to_f_index(uidx, d) ! fidx = u_index_to_f_index(uidx)
! print '(i2)', f_index_to_f_offset(fidx, n, d, p) ! should display 5 ! print '(i2)', f_index_to_f_offset(fidx, n, d, p) ! should display 5
! ! /= ! ! /=
! i1 = index(5, (/1,2,3,4,5/)) ! i1 = index((/1,2,3,4,5/))
! i2 = index(5, (/1,2,3,4,6/)) ! i2 = index((/1,2,3,4,6/))
! if (i1 /= i2) then ! if (i1 /= i2) then
! print *, "Same!" ! print *, "Same!"
! else ! else
...@@ -247,10 +404,25 @@ contains ...@@ -247,10 +404,25 @@ contains
! end if ! end if
! ! fill_folded_indices ! ! fill_folded_indices
! allocate(folded(n**d)) ! ! allocate(folded(n**d))
! call fill_folded_indices(folded,n,d) ! ! call fill_folded_indices(folded,n,d,p)
! print *, "Matching offsets unfolded -> folded" ! ! print *, "Matching offsets unfolded -> folded"
! print '(1000i4)', (i, i=1,n**d) ! ! print '(1000i4)', (i, i=1,n**d)
! print '(1000i4)', (folded(i), i=1,n**d) ! ! print '(1000i4)', (folded(i), i=1,n**d)
! n = 3
! d = 3
! p = pascal_triangle(n+d-1)
! nb_folded_idcs = get(d,n+d-1,p)
! ! recursive list_folded_indices
! ! list_folded_idcs = list_folded_indices(n, d, 0)
! ! print '(4i2)', ((list_folded_idcs(i)%coor(j), j=1,d), i=1,nb_folded_idcs)
! ! iterative list_folded_indices
! allocate(list_folded_idcs(nb_folded_idcs), nbeq(nb_folded_idcs), off(nb_folded_idcs))
! call folded_offset_loop(list_folded_idcs, nbeq, off, n, d, p)
! print '(3i2)', ((list_folded_idcs(i)%coor(j), j=1,d), i=1,nb_folded_idcs)
! print '(i3)', (nbeq(i), i=1,nb_folded_idcs)
! print '(i4)', (off(i), i=1,nb_folded_idcs)
! end program test ! end program test
\ No newline at end of file
...@@ -60,13 +60,28 @@ contains ...@@ -60,13 +60,28 @@ contains
! Returns ⎛n⎞ stored in pascal_triangle p ! Returns ⎛n⎞ stored in pascal_triangle p
! ⎝k⎠ ! ⎝k⎠
integer function get(k,n,p)
type(integer) function get(k,n,p)
integer, intent(in) :: k, n integer, intent(in) :: k, n
type(pascal_triangle), intent(in) :: p type(pascal_triangle), intent(in) :: p
get = p%lines(n)%coeffs(k+1) get = p%lines(n)%coeffs(k+1)
end function get end function get
! Returns ⎛ d ⎞
! ⎝ k₁, k₂, ..., kₙ ⎠
integer function multinomial(k,d,p)
integer, intent(in) :: k(:), d
type(pascal_triangle), intent(in) :: p
integer :: s, i
s = d
multinomial = 1
i = 1
do while (s > 0)
multinomial = multinomial*get(k(i), s, p)
s = s-k(i)
i = i+1
end do
end function
end module pascal end module pascal
! gfortran -o pascal pascal.f08 ! gfortran -o pascal pascal.f08
...@@ -86,4 +101,10 @@ end module pascal ...@@ -86,4 +101,10 @@ end module pascal
! end if ! end if
! end do ! end do
! end do ! end do
! d = 3
! p = pascal_triangle(d)
! print '(i2)', multinomial([1,2,3], d, p) ! should print 60
! print '(i2)', multinomial([0,0,0,3], d, p) ! should print 20
! end program test ! end program test
\ No newline at end of file
! Copyright © 2021-2022 Dynare Team
!
! This file is part of Dynare.
!
! Dynare is free software: you can redistribute it and/or modify
! it under the terms of the GNU General Public License as published by
! the Free Software Foundation, either version 3 of the License, or
! (at your option) any later version.
!
! Dynare is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU General Public License for more details.
!
! You should have received a copy of the GNU General Public License
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
! Routines and data structures for multithreading over particles in local_state_space_iteration_k
module pparticle
use iso_c_binding
use simulation
use matlab_mex
implicit none
type tdata
integer :: nm, nys, endo_nbr, nvar, order, nrestricted, nparticles
real(real64), allocatable :: yhat(:,:), e(:,:), ynext(:,:), ys_reordered(:), restrict_var_list(:)
type(pol), dimension(:), allocatable :: udr
end type tdata
type(tdata) :: thread_data
contains
subroutine thread_eval(arg) bind(c)
type(c_ptr), intent(in), value :: arg
integer, pointer :: im
integer :: i, j, start, end, q, r, ind
type(horner), dimension(:), allocatable :: h
real(real64), dimension(:), allocatable :: dyu
! Checking that the thread number got passed as argument
if (.not. c_associated(arg)) then
call mexErrMsgTxt("No argument was passed to thread_eval")
end if
call c_f_pointer(arg, im)
! Allocating local arrays
allocate(h(0:thread_data%order), dyu(thread_data%nvar))
do i=0, thread_data%order
allocate(h(i)%c(thread_data%endo_nbr, thread_data%nvar**i))
end do
! Specifying bounds for the curent thread
q = thread_data%nparticles / thread_data%nm
r = mod(thread_data%nparticles, thread_data%nm)
start = (im-1)*q+1
if (im < thread_data%nm) then
end = start+q-1
else
end = thread_data%nparticles
end if
! Using the Horner algorithm to evaluate the decision rule at the chosen yhat and epsilon
do j=start,end
dyu(1:thread_data%nys) = thread_data%yhat(:,j)
dyu(thread_data%nys+1:) = thread_data%e(:,j)
call eval(h, dyu, thread_data%udr, thread_data%endo_nbr, thread_data%nvar, thread_data%order)
do i=1,thread_data%nrestricted
ind = int(thread_data%restrict_var_list(i))
thread_data%ynext(i,j) = h(0)%c(ind,1) + thread_data%ys_reordered(ind)
end do
end do
end subroutine thread_eval
end module pparticle
This diff is collapsed.
...@@ -15,6 +15,69 @@ ...@@ -15,6 +15,69 @@
! You should have received a copy of the GNU General Public License ! You should have received a copy of the GNU General Public License
! along with Dynare. If not, see <https://www.gnu.org/licenses/>. ! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
! Routines and data structures for multithreading over particles in local_state_space_iteration_k
module pparticle
use iso_c_binding
use simulation
use matlab_mex
implicit none
type tdata
integer :: nm, nys, endo_nbr, nvar, order, nrestricted, nparticles
real(real64), allocatable :: yhat(:,:), e(:,:), ynext(:,:), ys_reordered(:), restrict_var_list(:)
type(pol), dimension(:), allocatable :: udr
end type tdata
type(tdata) :: thread_data
contains
subroutine thread_eval(arg) bind(c)
type(c_ptr), intent(in), value :: arg
integer, pointer :: im
integer :: i, j, start, end, q, r, ind
type(horner), dimension(:), allocatable :: h
real(real64), dimension(:), allocatable :: dyu
! Checking that the thread number got passed as argument
if (.not. c_associated(arg)) then
call mexErrMsgTxt("No argument was passed to thread_eval")
end if
call c_f_pointer(arg, im)
! Allocating local arrays
allocate(h(0:thread_data%order), dyu(thread_data%nvar))
do i=0, thread_data%order
allocate(h(i)%c(thread_data%endo_nbr, thread_data%nvar**i))
end do
! Specifying bounds for the curent thread
q = thread_data%nparticles / thread_data%nm
r = mod(thread_data%nparticles, thread_data%nm)
start = (im-1)*q+1
if (im < thread_data%nm) then
end = start+q-1
else
end = thread_data%nparticles
end if
! Using the Horner algorithm to evaluate the decision rule at the chosen yhat and epsilon
do j=start,end
dyu(1:thread_data%nys) = thread_data%yhat(:,j)
dyu(thread_data%nys+1:) = thread_data%e(:,j)
call eval(h, dyu, thread_data%udr, thread_data%endo_nbr, thread_data%nvar, thread_data%order)
do i=1,thread_data%nrestricted
ind = int(thread_data%restrict_var_list(i))
thread_data%ynext(i,j) = h(0)%c(ind,1) + thread_data%ys_reordered(ind)
end do
end do
end subroutine thread_eval
end module pparticle
! The code of the local_state_space_iteration_k routine
! input: ! input:
! yhat values of endogenous variables ! yhat values of endogenous variables
! epsilon values of the exogenous shock ! epsilon values of the exogenous shock
...@@ -24,7 +87,6 @@ ...@@ -24,7 +87,6 @@
! udr struct containing the model unfolded tensors ! udr struct containing the model unfolded tensors
! output: ! output:
! ynext simulated next-period results ! ynext simulated next-period results
subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction') subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
use iso_fortran_env use iso_fortran_env
use iso_c_binding use iso_c_binding
......
...@@ -652,7 +652,8 @@ PARTICLEFILES = \ ...@@ -652,7 +652,8 @@ PARTICLEFILES = \
particle/dsge_base2.mod \ particle/dsge_base2.mod \
particle/first_spec.mod \ particle/first_spec.mod \
particle/first_spec_MCMC.mod \ particle/first_spec_MCMC.mod \
particle/local_state_space_iteration_k_test.mod particle/local_state_space_iteration_k_test.mod \
particle/local_state_space_iteration_3_test.mod
XFAIL_MODFILES = ramst_xfail.mod \ XFAIL_MODFILES = ramst_xfail.mod \
...@@ -959,6 +960,9 @@ discretionary_policy/dennis_1_estim.o.trs: discretionary_policy/dennis_1.o.trs ...@@ -959,6 +960,9 @@ discretionary_policy/dennis_1_estim.o.trs: discretionary_policy/dennis_1.o.trs
discretionary_policy/Gali_2015_chapter_3_nonlinear.m.trs: discretionary_policy/Gali_2015_chapter_3.m.trs discretionary_policy/Gali_2015_chapter_3_nonlinear.m.trs: discretionary_policy/Gali_2015_chapter_3.m.trs
discretionary_policy/Gali_2015_chapter_3_nonlinear.o.trs: discretionary_policy/Gali_2015_chapter_3.o.trs discretionary_policy/Gali_2015_chapter_3_nonlinear.o.trs: discretionary_policy/Gali_2015_chapter_3.o.trs
particle/local_state_space_iteration_3_test.m.trs: particle/first_spec.m.trs
particle/local_state_space_iteration_3_test.o.trs: particle/first_spec.o.trs
particle/local_state_space_iteration_k_test.m.trs: particle/first_spec.m.trs particle/local_state_space_iteration_k_test.m.trs: particle/first_spec.m.trs
particle/local_state_space_iteration_k_test.o.trs: particle/first_spec.o.trs particle/local_state_space_iteration_k_test.o.trs: particle/first_spec.o.trs
......
/*
Tests that local_state_space_iteration_3 and local_state_space_iteration_k (for k=3) return the same results
This file must be run after first_spec.mod (both are based on the same model).
*/
@#include "first_spec_common.inc"
varobs q ca;
shocks;
var eeps = 0.04^2;
var nnu = 0.03^2;
var q = 0.01^2;
var ca = 0.01^2;
end;
// Initialize various structures
estimation(datafile='my_data.mat',order=3,mode_compute=0,mh_replic=0,filter_algorithm=sis,nonlinear_filter_initialization=2
,cova_compute=0 %tell program that no covariance matrix was computed
);
stoch_simul(order=3, periods=200, irf=0, k_order_solver);
// Really perform the test
nparticles = options_.particle.number_of_particles;
nsims = 1e6/nparticles;
/* We generate particles using realistic distributions (though this is not
strictly needed) */
nstates = M_.npred + M_.nboth;
state_idx = oo_.dr.order_var((M_.nstatic+1):(M_.nstatic+nstates));
yhat = chol(oo_.var(state_idx,state_idx))*randn(nstates, nparticles);
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
dr = oo_.dr;
// “rf” stands for “Reduced Form”
rf_ghx = dr.ghx(dr.restrict_var_list, :);
rf_ghu = dr.ghu(dr.restrict_var_list, :);
rf_constant = dr.ys(dr.order_var)+0.5*dr.ghs2;
rf_constant = rf_constant(dr.restrict_var_list, :);
rf_ghxx = dr.ghxx(dr.restrict_var_list, :);
rf_ghuu = dr.ghuu(dr.restrict_var_list, :);
rf_ghxu = dr.ghxu(dr.restrict_var_list, :);
rf_ghxxx = dr.ghxxx(dr.restrict_var_list, :);
rf_ghuuu = dr.ghuuu(dr.restrict_var_list, :);
rf_ghxxu = dr.ghxxu(dr.restrict_var_list, :);
rf_ghxuu = dr.ghxuu(dr.restrict_var_list, :);
rf_ghxss = dr.ghxss(dr.restrict_var_list, :);
rf_ghuss = dr.ghuss(dr.restrict_var_list, :);
options_.threads.local_state_space_iteration_3 = 12;
options_.threads.local_state_space_iteration_k = 12;
% Without pruning
tStart1 = tic; for i=1:nsims, ynext1 = local_state_space_iteration_3(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, rf_ghxxx, rf_ghuuu, rf_ghxxu, rf_ghxuu, rf_ghxss, rf_ghuss, options_.threads.local_state_space_iteration_3); end, tElapsed1 = toc(tStart1);
tStart2 = tic; [udr] = folded_to_unfolded_dr(dr, M_, options_); for i=1:nsims, ynext2 = local_state_space_iteration_k(yhat, epsilon, dr, M_, options_, udr); end, tElapsed2 = toc(tStart2);
if max(max(abs(ynext1-ynext2))) > 1e-10
error('Without pruning: Inconsistency between local_state_space_iteration_3 and local_state_space_iteration_k')
end
if tElapsed1<tElapsed2
skipline()
dprintf('Without pruning: local_state_space_iteration_3 is %5.2f times faster than local_state_space_iteration_k', tElapsed2/tElapsed1)
skipline()
else
skipline()
dprintf('Without pruning: local_state_space_iteration_3 is %5.2f times slower than local_state_space_iteration_k', tElapsed1/tElapsed2)
skipline()
end
% With pruning
rf_ss = dr.ys(dr.order_var);
rf_ss = rf_ss(dr.restrict_var_list, :);
yhat_ = chol(oo_.var(state_idx,state_idx))*randn(nstates, nparticles);
yhat__ = zeros(2*nstates,nparticles);
yhat__(1:nstates,:) = yhat_;
yhat__(nstates+1:2*nstates,:) = chol(oo_.var(state_idx,state_idx))*randn(nstates, nparticles);
nstatesandobs = size(rf_ghx,1);
[ynext1,ynext1_lat] = local_state_space_iteration_3(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, rf_ghxxx, rf_ghuuu, rf_ghxxu, rf_ghxuu, rf_ghxss, rf_ghuss, yhat__, rf_ss, options_.threads.local_state_space_iteration_3);
[ynext2,ynext2_lat] = local_state_space_iteration_2(yhat__(nstates+1:2*nstates,:), epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, yhat_, rf_ss, options_.threads.local_state_space_iteration_2);
if max(max(abs(ynext1_lat(nstatesandobs+1:2*nstatesandobs,:)-ynext2))) > 1e-14
error('With pruning: inconsistency between local_state_space_iteration_2 and local_state_space_iteration_3 on the 2nd-order pruned variable')
end
if max(max(abs(ynext1_lat(1:nstatesandobs,:)-ynext2_lat))) > 1e-14
error('With pruning: inconsistency between local_state_space_iteration_2 and local_state_space_iteration_3 on the 1st-order pruned variable')
end
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment