diff --git a/tests/particle/local_state_space_iteration_k_test.mod b/tests/particle/local_state_space_iteration_k_test.mod index 7705f9ed5923a8966c192a6ce9d1833b1787e5c7..0fdb8f98101a0f9cc7094edb7d51a47dc4cd2935 100644 --- a/tests/particle/local_state_space_iteration_k_test.mod +++ b/tests/particle/local_state_space_iteration_k_test.mod @@ -47,31 +47,64 @@ rf_ghxx = dr.ghxx(dr.restrict_var_list, :); rf_ghuu = dr.ghuu(dr.restrict_var_list, :); rf_ghxu = dr.ghxu(dr.restrict_var_list, :); -setenv("DYNARE_LSSI2_KERNEL", "avx512") +// Dry run for making sure that the MEX is loaded in memory +dummy = local_state_space_iteration_2(yhat(:,1), epsilon(:,1), rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, 1); + +disp('* Multi-threaded comparison') + +disp('kernel=avx512') +setenv('DYNARE_LSSI2_KERNEL', 'avx512') tic; ynext1 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); toc; -setenv("DYNARE_LSSI2_KERNEL", "avx2") +disp('kernel=avx2') +setenv('DYNARE_LSSI2_KERNEL', 'avx2') tic; ynext2 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); toc; -setenv("DYNARE_LSSI2_KERNEL", "generic") +disp('kernel=generic') +setenv('DYNARE_LSSI2_KERNEL', 'generic') tic; ynext3 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); toc; -setenv("DYNARE_LSSI2_KERNEL", "auto") +if max(max(abs(ynext1-ynext3))) > 1e-15 + error('avx512 kernel is inconsistent with generic one') +end +if max(max(abs(ynext2-ynext3))) > 1e-15 + error('avx2 kernel is inconsistent with generic one') +end + +disp('') +disp('* Single-threaded comparison') -max(max(abs(ynext1-ynext3))) -max(max(abs(ynext2-ynext3))) +options_.threads.local_state_space_iteration_2 = 1; -/* -expected = rf_constant+rf_ghx*yhat+rf_ghu*epsilon; -for i=1:nparticles - expected(:,i) = expected(:,i)+A_times_B_kronecker_C(.5*rf_ghxx,yhat(:,i))+A_times_B_kronecker_C(.5*rf_ghuu,epsilon(:,i))+A_times_B_kronecker_C(rf_ghxu,yhat(:,i),epsilon(:,i)); ->>>>>>> 8e033b864 (WIP: AVX2 version of lssi2) +disp('kernel=avx512') +setenv('DYNARE_LSSI2_KERNEL', 'avx512') +tic; +ynext1 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); +toc; + +disp('kernel=avx2') +setenv('DYNARE_LSSI2_KERNEL', 'avx2') +tic; +ynext2 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); +toc; + +disp('kernel=generic') +setenv('DYNARE_LSSI2_KERNEL', 'generic') +tic; +ynext3 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); +toc; + +if max(max(abs(ynext1-ynext3))) > 1e-15 + error('avx512 kernel is inconsistent with generic one') end -max(max(abs(expected-ynext1))) -*/ +if max(max(abs(ynext2-ynext3))) > 1e-15 + error('avx2 kernel is inconsistent with generic one') +end + +setenv('DYNARE_LSSI2_KERNEL', 'auto')