Skip to content
Snippets Groups Projects
Commit ff5a802f authored by Marco Ratto's avatar Marco Ratto
Browse files

allow the possibility to close all slaves from a condition found in one single...

allow the possibility to close all slaves from a condition found in one single thread (e.g. in a while loop where all threads look for the verification of a condition in a loop)
parent 77e84c53
No related branches found
No related tags found
No related merge requests found
function closeSlave(Parallel,TmpFolder),
function closeSlave(Parallel,TmpFolder,partial),
% PARALLEL CONTEXT
% In parallel context, this utility closes all remote matlab instances
% called by masterParallel when strategy (1) is active i.e. always open (which leaves
......@@ -32,6 +32,32 @@ function closeSlave(Parallel,TmpFolder),
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
if nargin<3,
partial=0;
end
s=warning('off');
if partial==1
save('slaveParallel_break','partial')
for indPC=1:length(Parallel),
if (Parallel(indPC).Local==0),
dynareParallelSendFiles('slaveParallel_break.mat',TmpFolder,Parallel(indPC));
end
end
% delete('slaveParallel_break')
return
end
if partial==-1
delete('slaveParallel_break.mat')
for indPC=1:length(Parallel),
if (Parallel(indPC).Local==0),
dynareParallelDelete( 'slaveParallel_break.mat',TmpFolder,Parallel(indPC));
end
end
% delete('slaveParallel_break')
return
end
for indPC=1:length(Parallel),
if (Parallel(indPC).Local==0),
......@@ -58,3 +84,5 @@ while(1)
end
end
s=warning('on');
......@@ -43,7 +43,8 @@ catch
end
fslave = dir( ['slaveParallel_input',int2str(njob),'.mat']);
if isempty(fslave),
fbreak = dir( ['slaveParallel_break.mat']);
if isempty(fslave) || ~isempty(fbreak),
error('Master asked to break the job');
end
......@@ -81,13 +81,25 @@ try,
% Save the output result.
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
end
if isfield(fOutputVar,'CloseAllSlaves'),
CloseAllSlaves = 1;
fOutputVar = rmfield(fOutputVar,'CloseAllSlaves');
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
save(['comp_status_',funcName,int2str(whoiam),'.mat'],'CloseAllSlaves');
end
disp(['fParallel ',int2str(whoiam),' completed.'])
catch,
theerror = lasterror;
if strfind(theerror.message,'Master asked to break the job')
fOutputVar.message = theerror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
waitbarString = theerror.message;
else
disp(['fParallel ',int2str(whoiam),' crashed.'])
fOutputVar.error = lasterror;
fOutputVar.error = theerror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
waitbarString = fOutputVar.error.message;
waitbarString = theerror.message;
% waitbarTitle=['Metropolis-Hastings ',options_.parallel(ThisMatlab).ComputerName];
if Parallel(ThisMatlab).Local,
waitbarTitle='Local ';
......@@ -95,6 +107,7 @@ catch,
waitbarTitle=[Parallel(ThisMatlab).ComputerName];
end
fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab));
end
end
diary off;
......
......@@ -142,6 +142,7 @@ switch Strategy
save(['temp_input.mat'],'fInputVar')
end
save(['temp_input.mat'],'Parallel','-append')
closeSlave(Parallel,PRCDir,-1);
end
......@@ -423,6 +424,7 @@ for j=1:totCPU,
if isempty(PRCDirSnapshot{indPC}),
PRCDirSnapshot(indPC)=dynareParallelSnapshot(PRCDir,Parallel(indPC));
PRCDirSnapshotInit(indPC) = PRCDirSnapshot(indPC);
else
PRCDirSnapshot(indPC)=dynareParallelGetNewFiles(PRCDir,Parallel(indPC),PRCDirSnapshot(indPC));
end
......@@ -453,6 +455,7 @@ end
if Strategy==0 || newInstance, % See above.
PRCDirSnapshot=dynareParallelSnapshot(PRCDir,Parallel(1:totSlaves));
PRCDirSnapshotInit = PRCDirSnapshot;
% Run the slaves.
if ~ispc, %isunix || (~matlab_ver_less_than('7.4') && ismac),
......@@ -587,6 +590,7 @@ NuoviFilecopiati=zeros(1,totSlaves);
ForEver=1;
statusString = '';
flag_CloseAllSlaves=0;
while (ForEver)
......@@ -607,6 +611,12 @@ while (ForEver)
try
if ~isempty(['comp_status_',fname,int2str(j),'.mat'])
load(['comp_status_',fname,int2str(j),'.mat']);
% whoCloseAllSlaves = who(['comp_status_',fname,int2str(j),'.mat','CloseAllSlaves']);
if exist('CloseAllSlaves') && flag_CloseAllSlaves==0,
flag_CloseAllSlaves=1;
whoiamCloseAllSlaves=j;
closeSlave(Parallel(1:totSlaves),PRCDir,1);
end
end
pcerdone(j) = prtfrc;
idCPU(j) = njob;
......@@ -711,11 +721,16 @@ for j=1:totCPU,
for jstack=1:length(fOutputVar.error.stack)
fOutputVar.error.stack(jstack),
end
else
elseif flag_CloseAllSlaves==0,
fOutVar(j)=fOutputVar;
elseif j==whoiamCloseAllSlaves,
fOutVar=fOutputVar;
end
end
if flag_CloseAllSlaves==1,
closeSlave(Parallel(1:totSlaves),PRCDir,-1);
end
if iscrash,
error('Remote jobs crashed');
......@@ -737,10 +752,11 @@ switch Strategy
[A B C]=rmdir('dynareParallelLogFiles');
mkdir('dynareParallelLogFiles');
end
try
copyfile('*.log','dynareParallelLogFiles');
delete([fname,'*.log']);
mydelete([fname,'*.log']);
catch
end
mydelete(['*_core*_input*.mat']);
% if Parallel(indPC).Local == 1
% delete(['slaveParallel_input*.mat']);
......
......@@ -136,6 +136,13 @@ while (etime(clock,t0)<1200 && ~isempty(fslave)) || ~isempty(dir(['stayalive',in
% Save the output result.
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' );
% keyboard,
if isfield(fOutputVar,'CloseAllSlaves'),
CloseAllSlaves = 1;
fOutputVar = rmfield(fOutputVar,'CloseAllSlaves');
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
save(['comp_status_',funcName,int2str(whoiam),'.mat'],'CloseAllSlaves');
end
% Inform the master that the job is finished, and transfer the output data
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
......@@ -143,9 +150,16 @@ while (etime(clock,t0)<1200 && ~isempty(fslave)) || ~isempty(dir(['stayalive',in
disp(['Job ',fname,' on CPU ',int2str(whoiam),' completed.']);
t0 =clock; % Re-set waiting time of 20 mins
catch ME
catch,
theerror = lasterror;
if strfind(theerror.message,'Master asked to break the job')
disp(['Job ',fname,' on CPU ',int2str(whoiam),' broken from master.']);
fOutputVar.message = theerror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
else
disp(['Job ',fname,' on CPU ',int2str(whoiam),' crashed.']);
fOutputVar.error = ME;
fOutputVar.error = lasterror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' );
waitbarString = fOutputVar.error.message;
if Parallel(ThisMatlab).Local,
......@@ -156,6 +170,7 @@ while (etime(clock,t0)<1200 && ~isempty(fslave)) || ~isempty(dir(['stayalive',in
fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab));
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
break
end
end
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment