load_csv_file_data.m 6.67 KB
Newer Older
1
function [freq, init, data, varlist] = load_csv_file_data(file) % --*-- Unitary tests --*--
2 3 4
%function [freq, init, data, varlist] = load_csv_file_data(file)
% Loads data in a csv file.
%
5
% INPUTS
6 7
%  o file        string, name of the csv file (with path).
%
8
% OUTPUTS
9 10 11 12 13
%  o freq        integer scalar equal to 1, 4, 12 or 52 (for annual, quaterly, monthly or weekly frequencies).
%  o init        dates object, initial date in the dataset.
%  o data        matrix of doubles, the data.
%  o varlist     cell of strings, names of the variables.
%
14
% REMARKS
15 16 17 18
%  The varlist output will be set only if the first line contains variable
%  names. Similarly, if the first column does not contain dates, then
%  freq will be 1 and init will be year 1.

19
% Copyright (C) 2012-2017 Dynare Team
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare.  If not, see <http://www.gnu.org/licenses/>.

% Output initialization
freq = 1;                  % Default frequency is annual.
init = dates(1,1);         % Default initial date is year one.
varlist = [];

assert(exist(file, 'file') == 2, ['load_csv_file_data: I can''t find file ' file '!']);

if isoctave
    fid = fopen(file, 'r');
    firstline = fgetl(fid, 4097);
    fclose(fid);
47 48
    if length(firstline) < 4097
        if ~user_has_octave_forge_package('io')
49 50 51
            error(['The io package is required to read CSV files from Octave. ' ...
                   'It can be installed by running the following from the Octave ' ...
                   ' command line: pkg install -forge io']);
52
        end
53 54 55 56
        A = csv2cell(file);
        [data, T, L] = parsecell(A);
        withvars = L.numlimits(2,1) > L.txtlimits(2,1);
        withtime = L.numlimits(1,1) > L.txtlimits(1,1);
57 58 59 60
    else
        fid = fopen(file, 'r');
        bfile = fread(fid);
        fclose(fid);
61

62 63 64 65 66 67 68
        if isunix || ismac
            newline_code = 10;
        elseif ispc
            newline_code = 13;
        else
            error('load_csv_file_data is not implemented for your OS');
        end
69

70 71 72 73 74 75 76
        % Get the positions of the end-of-line code
        end_of_line_locations = find(bfile==newline_code);
        if ispc && isempty(end_of_line_locations)
            newline_code=10;
            end_of_line_locations = find(bfile==newline_code);
        end;
        tmp = find(bfile==newline_code);
77

78 79
        % Get the number of lines in the file
        ndx = length(tmp);
80

81 82 83 84 85 86
        % Create a cell of indices for each line
        b = [1; end_of_line_locations+1];
        c = [end_of_line_locations-1; length(bfile)+1];
        b = b(1:end-1);
        c = c(1:end-1);
        linea = 1;
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103


        % Test the content of the first elements of the first column
        withtime = 1;
        for r=2:length(b)
            linee = char(transpose(bfile(b(r):c(r))));
            [B,C] = get_cells_id(linee,',');
            if ~isdates(linee(B(1):C(1)))
                break
            end
        end

        % Test the content of the first line
        linee = char(transpose(bfile(b(1):c(1))));
        [B,C] = get_cells_id(linee,',');
        withnames = isvarname(linee(B(2):C(2)));

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
        if withnames
            % Get the first line of the csv file (names of the variables).
            linee = char(transpose(bfile(b(linea):c(linea))));
            % Get the content of the first line and determine the number of variables and their names.
            [B,C] = get_cells_id(linee,',');
            if withtime
                B = B(2:end);
                C = C(2:end);
            end
            varlist = cell(length(B),1);
            number_of_variables = length(varlist);
            for i=1:number_of_variables
                varlist(i) = {linee(B(i):C(i))};
            end
            varlist = strtrim(varlist);
            linea = linea+1;
120 121
            % Remove double quotes if any
            varlist = strrep(varlist,'"','');
122
        end
123

124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
        % Get following line (number 1 or 2 depending on withnames flag)
        linee = char(transpose(bfile(b(linea):c(linea))));
        comma_locations = transpose(strfind(linee,','));
        B = 1;
        C = comma_locations(1)-1;
        if withtime
            tmp = linee(B:C);
            % Check the dates formatting
            if isnumeric(tmp) && isint(tmp)
                tmp = [num2str(tmp) 'Y'];
            end
            if ~isdate(tmp)
                error('load_csv_file_data:: Formatting error. I can''t read the dates!')
            end
            init = dates(tmp);
            freq = init.freq;
            first = 2;
        else
            first = 1;
        end
144

145 146 147
        if ~withnames
            number_of_variables = length(tmp)-withtime;
        end
148

149 150
        % Initialization of matrix data.
        data = zeros(ndx,number_of_variables);
151

152 153 154 155 156 157 158 159
        % Populate data.
        for linea = 1+withnames:ndx
            linee = char(transpose(bfile(b(linea):c(linea))));
            [B,C] = get_cells_id(linee,',');
            for i=first:length(B)
                data(linea,i-withtime) = str2double(linee(B(i):C(i)));
            end
        end
160

161 162
        % Remove first line if withnames
        data = data(1+withnames:ndx, :);
163
        return
164 165 166 167 168 169 170
    end
else
    A = importdata(file, ',');
    if ~isstruct(A)
        data = A;
        T = {};
        withvars = 0;
171
        withtime = 0;
172 173 174 175 176 177 178 179 180 181 182
    else
        data = A.data;
        T = A.textdata;
        % importdata() allows text only at the top and the left, so the following
        %  tests are sufficient.
        withvars = size(T, 2) >= size(data, 2);
        withtime = size(T, 1) >= size(data, 1);
    end
end

if withvars
183
    varlist = T(1, 1+withtime:end);
184 185 186 187 188 189 190 191
    T = T(2:end, :);
end
if withtime
    init = dates(T{1, 1});
    freq = init.freq;
end

varlist = transpose(varlist);
192

193 194 195
% Remove double quotes if any
varlist = strrep(varlist,'"','');

196 197
%@test:1
%$ % Download csv file with data.
198
%$ dseries_src_root = strrep(which('initialize_dseries_class'),'initialize_dseries_class.m','');
199 200 201
%$
%$ % Instantiate a dseries from the data in the csv file.
%$ try
202
%$   d = dseries([ dseries_src_root '../tests/data/data_ca1_csv.csv' ]);
203 204 205 206 207 208 209 210 211 212
%$   t(1) = true;
%$ catch
%$   t(1) = false;
%$ end
%$
%$ if t(1)
%$   t(2) = dassert(d.name,{'y_obs'; 'pie_obs'; 'R_obs'; 'de'; 'dq'});
%$ end
%$
%$ T = all(t);
213
%@eof:1