From 8a809c7e2abd0c9563ea2e3dfe712e5cf928db7a Mon Sep 17 00:00:00 2001 From: Marco Ratto <marco.ratto@jrc.ec.europa.eu> Date: Fri, 10 Feb 2012 22:02:11 +0100 Subject: [PATCH] Capture separately the error when the master asked to break the job. --- matlab/parallel/fParallel.m | 25 ++++++++++++++++--------- matlab/parallel/slaveParallel.m | 30 +++++++++++++++++++----------- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/matlab/parallel/fParallel.m b/matlab/parallel/fParallel.m index 9bd09d14d..01dcf28aa 100644 --- a/matlab/parallel/fParallel.m +++ b/matlab/parallel/fParallel.m @@ -84,17 +84,24 @@ try, disp(['fParallel ',int2str(whoiam),' completed.']) catch, - disp(['fParallel ',int2str(whoiam),' crashed.']) - fOutputVar.error = lasterror; - save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' ) - waitbarString = fOutputVar.error.message; - % waitbarTitle=['Metropolis-Hastings ',options_.parallel(ThisMatlab).ComputerName]; - if Parallel(ThisMatlab).Local, - waitbarTitle='Local '; + theerror = lasterror; + if strfind(theerror.message,'Master asked to break the job') + fOutputVar.message = theerror; + save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' ) + waitbarString = theerror.message; else - waitbarTitle=[Parallel(ThisMatlab).ComputerName]; + disp(['fParallel ',int2str(whoiam),' crashed.']) + fOutputVar.error = theerror; + save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' ) + waitbarString = theerror.message; + % waitbarTitle=['Metropolis-Hastings ',options_.parallel(ThisMatlab).ComputerName]; + if Parallel(ThisMatlab).Local, + waitbarTitle='Local '; + else + waitbarTitle=[Parallel(ThisMatlab).ComputerName]; + end + fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab)); end - fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab)); end diary off; diff --git a/matlab/parallel/slaveParallel.m b/matlab/parallel/slaveParallel.m index d54b6ac30..21042d068 100644 --- a/matlab/parallel/slaveParallel.m +++ b/matlab/parallel/slaveParallel.m @@ -143,19 +143,27 @@ while (etime(clock,t0)<1200 && ~isempty(fslave)) || ~isempty(dir(['stayalive',in disp(['Job ',fname,' on CPU ',int2str(whoiam),' completed.']); t0 =clock; % Re-set waiting time of 20 mins - catch ME - disp(['Job ',fname,' on CPU ',int2str(whoiam),' crashed.']); - fOutputVar.error = ME; - save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' ); - waitbarString = fOutputVar.error.message; - if Parallel(ThisMatlab).Local, - waitbarTitle='Local '; + catch, + theerror = lasterror; + if strfind(theerror.message,'Master asked to break the job') + disp(['Job ',fname,' on CPU ',int2str(whoiam),' broken from master.']); + fOutputVar.message = theerror; + save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' ) + delete(['P_',fname,'_',int2str(whoiam),'End.txt']); else - waitbarTitle=[Parallel(ThisMatlab).ComputerName]; + disp(['Job ',fname,' on CPU ',int2str(whoiam),' crashed.']); + fOutputVar.error = theerror; + save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' ); + waitbarString = fOutputVar.error.message; + if Parallel(ThisMatlab).Local, + waitbarTitle='Local '; + else + waitbarTitle=[Parallel(ThisMatlab).ComputerName]; + end + fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab)); + delete(['P_',fname,'_',int2str(whoiam),'End.txt']); + break end - fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab)); - delete(['P_',fname,'_',int2str(whoiam),'End.txt']); - break end end -- GitLab