Skip to content
Snippets Groups Projects
Commit 9e8d8992 authored by Stéphane Adjemian's avatar Stéphane Adjemian
Browse files

Added routines for reading data in csv, m, mat and xls files.

parent 5d76092a
No related branches found
No related tags found
No related merge requests found
function [freq, init, data, varlist] = load_csv_file_data(file)
%function [freq, init, data, varlist] = load_csv_file_data(file)
% Loads data in a csv file.
%
% INPUTS
% o file string, name of the csv file (with path).
%
% OUTPUTS
% o freq integer scalar equal to 1, 4, 12 or 52 (for annual, quaterly, monthly or weekly frequencies).
% o init dates object, initial date in the dataset.
% o data matrix of doubles, the data.
% o varlist cell of strings, names of the variables.
%
% REMARKS
% The varlist output will be set only if the first line contains variable
% names. Similarly, if the first column does not contain dates, then
% freq will be 1 and init will be year 1.
% Copyright (C) 2012-2014 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
% Output initialization
freq = 1; % Default frequency is annual.
init = dates(1,1); % Default initial date is year one.
varlist = [];
assert(exist(file, 'file') == 2, ['load_csv_file_data: I can''t find file ' file '!']);
if isoctave
fid = fopen(file, 'r');
firstline = fgetl(fid, 4097);
fclose(fid);
if length(firstline) < 4097
if ~user_has_octave_forge_package('io')
try
pkg load io
catch
error(['The io package is required to read CSV files from Octave. ' ...
'It can be installed by running the following from the Octave ' ...
' command line: pkg install -forge io']);
end
end
else
fid = fopen(file, 'r');
bfile = fread(fid);
fclose(fid);
if isunix || ismac
newline_code = 10;
elseif ispc
newline_code = 13;
else
error('load_csv_file_data is not implemented for your OS');
end
% Get the positions of the end-of-line code
end_of_line_locations = find(bfile==newline_code);
if ispc && isempty(end_of_line_locations)
newline_code=10;
end_of_line_locations = find(bfile==newline_code);
end;
tmp = find(bfile==newline_code);
% Get the number of lines in the file
ndx = length(tmp);
% Create a cell of indices for each line
b = [1; end_of_line_locations+1];
c = [end_of_line_locations-1; length(bfile)+1];
b = b(1:end-1);
c = c(1:end-1);
linea = 1;
if withnames
% Get the first line of the csv file (names of the variables).
linee = char(transpose(bfile(b(linea):c(linea))));
% Get the content of the first line and determine the number of variables and their names.
[B,C] = get_cells_id(linee,',');
if withtime
B = B(2:end);
C = C(2:end);
end
varlist = cell(length(B),1);
number_of_variables = length(varlist);
for i=1:number_of_variables
varlist(i) = {linee(B(i):C(i))};
end
varlist = strtrim(varlist);
linea = linea+1;
end
% Get following line (number 1 or 2 depending on withnames flag)
linee = char(transpose(bfile(b(linea):c(linea))));
comma_locations = transpose(strfind(linee,','));
B = 1;
C = comma_locations(1)-1;
if withtime
tmp = linee(B:C);
% Check the dates formatting
if isnumeric(tmp) && isint(tmp)
tmp = [num2str(tmp) 'Y'];
end
if ~isdate(tmp)
error('load_csv_file_data:: Formatting error. I can''t read the dates!')
end
init = dates(tmp);
freq = init.freq;
first = 2;
else
first = 1;
end
if ~withnames
number_of_variables = length(tmp)-withtime;
end
% Initialization of matrix data.
data = zeros(ndx,number_of_variables);
% Populate data.
for linea = 1+withnames:ndx
linee = char(transpose(bfile(b(linea):c(linea))));
[B,C] = get_cells_id(linee,',');
for i=first:length(B)
data(linea,i-withtime) = str2double(linee(B(i):C(i)));
end
end
% Remove first line if withnames
data = data(1+withnames:ndx, :);
end
else
A = importdata(file, ',');
if ~isstruct(A)
data = A;
T = {};
withvars = 0;
withtime = 0;
else
data = A.data;
T = A.textdata;
% importdata() allows text only at the top and the left, so the following
% tests are sufficient.
withvars = size(T, 2) >= size(data, 2);
withtime = size(T, 1) >= size(data, 1);
end
end
if withvars
varlist = T(1, 2:end);
T = T(2:end, :);
end
if withtime
init = dates(T{1, 1});
freq = init.freq;
end
varlist = transpose(varlist);
function [freq,init,data,varlist,tex] = load_m_file_data(file)
% Loads data in a matlab/octave script.
%
% INPUTS
% o file string, name of the matlab/octave script (with path)
%
% OUTPUTS
% o freq integer scalar equal to 1, 4, 12 or 52 (for annual, quaterly, monthly or weekly frequencies).
% o init dates object, initial date in the dataset.
% o data matrix of doubles, the data.
% o varlist cell of strings, names of the variables.
%
% REMARKS
% The frequency and initial date can be specified with variables FREQ__ and INIT__ in the matlab/octave script. FREQ__ must
% be a scalar integer and INIT__ a string like '1938M11', '1945Q3', '1973W3' or '2009A'. If these variables are not specified
% default values for freq and init are 1 and dates(1,1).
% Copyright (C) 2012-2013 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
if isoctave
run(file);
else
basename = file(1:end-2);
run(basename);
end
if exist('INIT__','var')
if isdate(INIT__)
init = dates(INIT__);
clear('INIT__')
else
error('load_m_file_data: INIT__ cannot be interpreted as a date.')
end
else
init = dates(1,1); % Default initial date is year one.
end
if exist('FREQ__','var')
freq = FREQ__;
clear('FREQ__');
else
freq = init.freq;
end
if ~isequal(freq,init.freq)
error('load_m_file_data: INIT__ and FREQ__ are not consistent!')
end
if exist('NAMES__','var')
varlist0 = NAMES__;
clear('NAMES__');
else
varlist0 = [];
list_of_variables = [];
end
if exist('TEX__','var')
tex = TEX__;
clear('TEX__');
else
tex = [];
end
if isempty(varlist0)
list_of_variables = whos();
end
data = [];
varlist = {};
if isempty(varlist0)
for current_variable_index=1:length(list_of_variables)
if isequal(list_of_variables(current_variable_index).name,'freq') ...
|| isequal(list_of_variables(current_variable_index).name,'time') ...
|| isequal(list_of_variables(current_variable_index).name,'data') ...
|| isequal(list_of_variables(current_variable_index).name,'varlist') ...
|| isequal(list_of_variables(current_variable_index).name,'varlist0') ...
|| isequal(list_of_variables(current_variable_index).name,'list_of_variables') ...
|| isequal(list_of_variables(current_variable_index).name,'tex') ...
continue
end
if list_of_variables(current_variable_index).global || list_of_variables(current_variable_index).persistent
% A variable cannot be a global or persistent variable.
continue
end
if list_of_variables(current_variable_index).complex || ~strcmp(list_of_variables(current_variable_index).class,'double')
% A variable cannot be complex.
continue
end
if list_of_variables(current_variable_index).size(2)>1
% A variable must be passed as a column vector.
continue
end
try
eval(['data = [data, ' list_of_variables(current_variable_index).name '];'])
eval(['varlist = {varlist{:}, ''' list_of_variables(current_variable_index).name '''};'])
catch
error(['load_m_file:: All the vectors (variables) in ' inputname(1) ' must have the same number of rows (observations)!'])
end
end
else
for current_variable_index=1:length(varlist0)
eval(['data = [data, ' varlist0{current_variable_index} '];'])
end
varlist = varlist0;
end
%@test:1
%$ % Create a data m-file
%$ fid = fopen('data_m_file.m','w');
%$ fprintf(fid,'FREQ__ = 4;');
%$ fprintf(fid,'INIT__ = ''1938Q4'';');
%$ fprintf(fid,'NAMES__ = {''azert'';''yuiop''};');
%$ fprintf(fid,'TEX__ = {''azert'';''yuiop''};');
%$ fprintf(fid,'azert = [1; 2; 3; 4; 5];');
%$ fprintf(fid,'yuiop = [2; 3; 4; 5; 6];');
%$ fclose(fid);
%$
%$ % Try to read the data m-file
%$ try
%$ datafile = 'data_m_file';
%$ [freq,init,data,varlist,tex] = load_m_file_data(datafile);
%$ t(1) = 1;
%$ catch exception
%$ t(1) = 0;
%$ T = all(t);
%$ LOG = getReport(exception,'extended');
%$ return
%$ end
%$
%$ % Check the results.
%$ t(2) = dassert(freq,4);
%$ t(3) = dassert(isa(init,'dates'),1);
%$ t(4) = dassert(init.freq,4);
%$ t(5) = dassert(init.time,[1938 4]);
%$ t(6) = dassert(varlist,{'azert';'yuiop'});
%$ t(7) = dassert(tex,{'azert';'yuiop'});
%$ t(8) = dassert(data(:,1),[1;2;3;4;5]);
%$ t(9) = dassert(data(:,2),[2;3;4;5;6]);
%$ T = all(t);
%@eof:1
function [freq,init,data,varlist,tex] = load_mat_file_data(file) % --*-- Unitary tests --*--
% Loads data in a matlab/octave mat-file.
%
% INPUTS
% o file string, name of the matlab/octave mat file (with path)
%
% OUTPUTS
% o freq integer scalar equal to 1, 4, 12 or 52 (for annual, quaterly, monthly or weekly frequencies).
% o init dates object, initial date in the dataset.
% o data matrix of doubles, the data.
% o varlist cell of strings, names of the variables.
%
% REMARKS
% The frequency and initial date can be specified with variables FREQ__ and INIT__ in the matlab/octave binary file. FREQ__ must
% be a scalar integer and INIT__ a string like '1938M11', '1945Q3', '1973W3' or '2009A'. If these variables are not specified
% default values for freq and init are 1 and dates(1,1).
% Copyright (C) 2012-2014 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
datafile = load(file);
if isfield(datafile,'INIT__')
if isdate(datafile.INIT__)
init = dates(datafile.INIT__);
datafile = rmfield(datafile, 'INIT__');
else
error('load_mat_file_data: INIT__ cannot be interpreted as a date.')
end
else
init = dates(1,1);
end
if isfield(datafile,'FREQ__')
freq = datafile.FREQ__;
datafile = rmfield(datafile, 'FREQ__');
else
freq = init.freq;
end
if ~isequal(freq,init.freq)
error('load_mat_file_data: INIT__ and FREQ__ are not consistent!')
end
if isfield(datafile,'NAMES__')
varlist = datafile.NAMES__;
datafile = rmfield(datafile, 'NAMES__');
else
varlist = [];
end
if isfield(datafile,'TEX__')
tex = datafile.TEX__;
datafile = rmfield(datafile, 'TEX__');
else
tex = [];
end
data = [];
if isempty(varlist)
varlist = fieldnames(datafile);
end
for i=1:length(varlist)
try
tmp = getfield(datafile,varlist{i});
data = [data, tmp(:)];
catch
error(['load_mat_file:: All the vectors (variables) in ' inputname(1) ' must have the same number of rows (observations)!'])
end
end
%@test:1
%$ % Create a data mat-file
%$ FREQ__ = 12;
%$ INIT__ = '1938M11';
%$ NAMES__ = {'hagop'; 'bedros'};
%$ TEX__ = NAMES__;
%$ hagop = [1; 2; 3; 4; 5];
%$ bedros = [2; 3; 4; 5; 6];
%$ save('datafile_for_test');
%$
%$ % Try to read the data mat-file
%$ t = zeros(8,1);
%$ try
%$ [freq,init,data,varlist,tex] = load_mat_file_data('datafile_for_test');
%$ t(1) = 1;
%$ catch exception
%$ t = t(1);
%$ T = all(t);
%$ LOG = getReport(exception,'extended');
%$ return
%$ end
%$
%$ % Check the results.
%$ t(2) = dassert(freq,12);
%$ t(3) = dassert(isa(init,'dates'),true);
%$ t(4) = dassert(init.freq,12);
%$ t(5) = dassert(init.time,[1938 11]);
%$ t(6) = dassert(varlist,{'hagop';'bedros'});
%$ t(7) = dassert(varlist,{'hagop';'bedros'});
%$ t(8) = dassert(data(:,1),[1;2;3;4;5]);
%$ t(9) = dassert(data(:,2),[2;3;4;5;6]);
%$ T = all(t);
%@eof:1
%@test:2
%$ % Create a data mat-file
%$ FREQ__ = 12;
%$ INIT__ = '1938M11';
%$ NAMES__ = {'hagop'; 'bedros'};
%$ TEX__ = NAMES__;
%$ hagop = [1, 2, 3, 4, 5];
%$ bedros = [2, 3, 4, 5, 6];
%$ save('datafile_for_test');
%$
%$ % Try to read the data mat-file
%$ t = zeros(8,1);
%$ try
%$ [freq,init,data,varlist,tex] = load_mat_file_data('datafile_for_test');
%$ t(1) = 1;
%$ catch exception
%$ t = t(1);
%$ T = all(t);
%$ LOG = getReport(exception,'extended');
%$ return
%$ end
%$
%$ % Check the results.
%$ t(2) = dassert(freq,12);
%$ t(3) = dassert(isa(init,'dates'),true);
%$ t(4) = dassert(init.freq,12);
%$ t(5) = dassert(init.time,[1938 11]);
%$ t(6) = dassert(varlist,{'hagop';'bedros'});
%$ t(7) = dassert(varlist,{'hagop';'bedros'});
%$ t(8) = dassert(data(:,1),[1;2;3;4;5]);
%$ t(9) = dassert(data(:,2),[2;3;4;5;6]);
%$ T = all(t);
%@eof:2
function [freq, init, data, varlist] = load_xls_file_data(file, sheet, range)
% Loads data in a xls file.
%
% INPUTS
% o file string, name of the file (with extension).
% o sheet string, name of the sheet to be read.
% o range string of the form 'B2:D6'
%
% OUTPUTS
% o freq integer scalar (1, 4, 12 or 52), code for frequency.
% o init dates object, initial date of the sample.
% o data matrix of doubles, the raw data.
% o varlist cell of strings (column), names of the variables in the database.
%
% REMARKS
% The range argument is only available on windows platform (with Excel installed).
% Copyright (C) 2013 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
% Set defaults.
if nargin<3 || isempty(range)
range = '';
if nargin<2 || isempty(sheet)
sheet = 1;
if nargin<1 || isempty(file)
error('load_xls_file_data:: I need at least one input (name of the xls or xlsx file)!')
end
end
end
% Check file extension.
if ~(check_file_extension(file,'xls') || check_file_extension(file,'xlsx'))
ext = get_file_extension(file);
if isempty(ext)
if exist([file '.xls'],'file')
file = [file '.xls'];
elseif exist([file '.xlsx'],'file')
file = [file '.xlsx'];
else
error(['load_xls_file_data:: Unable to find the data file ' file ' with an xls or xlsx extension!'])
end
else
error(['load_xls_file_data:: The data file ' file ' has wrong extension (must be either xls or xlsx)!'])
end
end
% load excel file.
if isoctave && ~user_has_octave_forge_package('io')
error('The io package is required to read XLS/XLSX files from Octave')
end
[num,txt,raw] = xlsread(file, sheet, range);
% Get dimensions of num, txt and raw
[n1, n2] = size(num);
[t1, t2] = size(txt);
[r1, r2] = size(raw);
% Check the content of the file.
if isequal(t1,0) && isequal(t2,0)
% The file contains no informations about the variables and dates.
notime = 1;
noname = 1;
elseif isequal(t2,1) && t1>=t2 && n2~=t2 %only one column present, but no var name in header text
% The file contains no informations about the dates.
notime = 0;
noname = 1;
elseif isequal(t2,1) && t1>=t2 && n2==t2 %only one column present with var name in header text
% The file contains no informations about the variables.
notime = 1;
noname = 0;
elseif isequal(t1,1) && t2>=t1
% The file contains no informations about the dates.
notime = 1;
noname = 0;
else
% The file contains informations about the variables and dates.
notime = 0;
noname = 0;
end
% Output initialization.
freq = 1;
init = dates(1,1);
varlist = [];
data = num;
% Update freq.
if ~notime
if isempty(txt{1,1})
first_date = txt{2,1};
else
first_date = txt{1,1};
end
if isnumeric(first_date) && isint(first_date)
first_date = [num2str(first_date) 'Y'];
end
if isdate(first_date)
init = dates(first_date);
freq = init.freq;
else
error('load_xls_file_data: I am not able to read the dates!')
end
end
% Update varlist.
if ~noname
if notime
varlist = transpose(txt);
else
varlist = transpose(txt(1,2:end));
end
% Remove leading and trailing white spaces
for i=1:length(varlist)
varlist(i) = {strtrim(varlist{i})};
end
else
% set default names
varlist = cell(n2,1);
for i=1:n2
varlist(i) = {['Variable_' int2str(i)]};
end
end
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment