Commit 198023a2 authored by Dóra Kocsis's avatar Dóra Kocsis

Add dbnomics to dseries conversion routine, closes Dynare/dseries#44

parent 5ae52153
Pipeline #3777 passed with stage
in 1 minute and 3 seconds
......@@ -23,7 +23,8 @@ dseries_src_path_s = strsplit(dseries_src_root, filesep());
isstandalone = ~isequal(dseries_src_path_s(end-3:end), {'matlab', 'modules', 'dseries', 'src'}) & isempty(which('dynare'));
% Set the subfolders to be added in the path.
p = {'read'; ...
p = {'mdbnomics2dseries'; ...
'read'; ...
'utilities/is'; ...
'utilities/op'; ...
'utilities/convert'; ...
......@@ -31,6 +32,7 @@ p = {'read'; ...
'utilities/insert'; ...
'utilities/file'; ...
'utilities/from'; ...
'utilities/get'; ...
'utilities/print'; ...
'utilities/variables'; ...
'utilities/cumulate'; ...
......
function ds = mdbnomics2dseries(varargin) % --*-- Unitary tests --*--
% Given cell array from the mdbnomics library, it returns a dseries object.
%
% INPUTS
%
% - If only one arguments are provided, we must have:
% + varargin{1} [cell] A T*N array of data.
%
% OUTPUTS
% - ds [dseries]
% Copyright (C) 2020 Dynare Team
%
% This code is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare dseries submodule is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
switch nargin
case 0
% Return empty object.
error('mdbnomics2dseries:WrongInputArguments', 'Input must be non empty!');
case 1
if iscell(varargin{1})
switch length(varargin{1})
case 0
error('mdbnomics2dseries:WrongInputArguments', 'Input must be non empty!');
otherwise
o.data = varargin{1}(2:end,:);
o.cols = varargin{1}(1,:);
col_idx = {'x_frequency', 'dataset_code', 'series_code', 'original_period', 'period', 'value'};
for ii = 1:size(col_idx,2)
o.col_idx.(col_idx{ii}) = find(strcmp(col_idx{ii}, o.cols));
end
% Check if database has multiple frequencies
if size(unique(o.data(:, o.col_idx.x_frequency)),1) > 1
error('mdbnomics2dseries:DatabaseCheck: The database, that you are trying to convert, contains multiple frequencies. Currently, this type of dseries conversion is not supported. Please select a section of your database with uniform frequency.');
end
ds = convert_mdbnomics(o);
end
end
otherwise
error('mdbnomics2dseries:WrongInputArguments', 'Too many input arguments! Please check the manual.')
end
end
function ds = convert_mdbnomics(o)
% INPUTS
% - o [struct] Struct with fields: data, cols, col_idx
%
% OUTPUTS
% - ds [dseries]
% Copyright (C) 2020 Dynare Team
%
% This code is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare dates submodule is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
if ~isstruct(o)
error('mdbnomics2dseries::convert_mdbnomics: The input argument must be a struct!');
end
% Initialize dseries
ds = dseries();
% Check for multiple datasets
dataset_codes = unique(o.data(:,o.col_idx.dataset_code),'stable');
% Convert mdbnomics to dseries
for ii = 1:length(dataset_codes)
% Slice data for dataset
ds_dataset = o.data(strcmp(o.data(:,o.col_idx.dataset_code),dataset_codes{ii}),:);
series_codes = unique(ds_dataset(:,o.col_idx.series_code),'stable');
% Get list of variable names
list_of_names = cellfun(@(x)regexprep(x, '[^a-zA-Z0-9]', '_'), series_codes, 'UniformOutput', false);
% Get dataset values
ds_dataset_values = cell2mat(ds_dataset(:,o.col_idx.value));
% Get length of series
series_length = cell2mat(cellfun(@(x)length(find(strcmp(x, ds_dataset(:,o.col_idx.series_code)))), series_codes, 'UniformOutput', false));
% Get starting value indices
dataset_start_val = cumsum([1, series_length(1:end-1)']);
% Check if dataset starting date is uniform
starting_dates = o.data(dataset_start_val, o.col_idx.original_period);
freq = ds_dataset{1,o.col_idx.x_frequency};
if length(unique(starting_dates)) > 1
% Build dseries object by series
dataset = dseries();
for s = 1:length(starting_dates)
% Get dseries date format from dataset
dseries_date = get_series_start_date(freq, starting_dates{s});
% Transform dataset into dseries
data_series = ds_dataset_values(dataset_start_val(s):dataset_start_val(s)+series_length(s)-1);
series = dseries(data_series, dseries_date, list_of_names{s});
dataset = [dataset series];
end
else
% Pad values with NaN when series length in the same dataset is unequal
if size(series_length, 1) > 1 && length(unique(series_length)) > 1
val_ = mat2cell(ds_dataset_values, series_length');
ds_dataset_values = cell2mat(cellfun(@(x)cat(1, x, nan(max(series_length)-length(x),1)), val_, 'UniformOutput', false));
end
% Reshape dseries input data
data_dataset = reshape(ds_dataset_values, max(series_length), size(series_codes, 1));
% Get dseries date format from dataset
starting_date = min(datetime(ds_dataset(:,o.col_idx.period), 'InputFormat', 'yyyy-MM-dd', 'Format', 'yyyy-MM-dd'));
original_period = ds_dataset(strcmp(ds_dataset(:,o.col_idx.period),string(starting_date)),o.col_idx.original_period);
dseries_date = get_series_start_date(freq, original_period{1});
% Transform dataset into dseries
dataset = dseries(data_dataset, dseries_date, list_of_names);
end
% Append initial dseries object
ds = [ds dataset];
end
% Add tags to the variables
if length(dataset_codes) > 1
series_codes = unique(o.data(:,o.col_idx.series_code),'stable');
list_of_names = cellfun(@(x)regexprep(x, '[^a-zA-Z0-9]', '_'), series_codes, 'UniformOutput', false);
series_length = cell2mat(cellfun(@(x)length(find(strcmp(x, o.data(:,o.col_idx.series_code)))), series_codes, 'UniformOutput', false));
end
% Select relevant column indices (ignore columns: 'original_period', 'period', 'original_value', 'value')
col_idx = [1:6,11:size(o.data,2)];
tag_names = cellfun(@(x)regexprep(x, '[^a-zA-Z0-9]','_'), o.cols(col_idx), 'UniformOutput', false);
data_start_val = cumsum([1, series_length(1:end-1)']);
tag_data = o.data(data_start_val,col_idx);
for ii = 1:length(tag_names)
tag(ds, tag_names{ii});
for jj = 1:length(list_of_names)
tag(ds, tag_names{ii}, list_of_names{jj}, tag_data{jj,ii});
end
end
end
function series_start_date = get_series_start_date(frequency, original_period) % --*-- Unitary tests --*--
% Given cell array obtained using from the mdbnomics library,
% it returns a cell array of metadata ot be appended to a dseries object.
%
% INPUTS
% - frequency [string] Dataset frequency: monthly, quarterly, bi-annual, annual
% - original_period [string] Series original period
%
% OUTPUTS
% - series_start_date [string]
% Copyright (C) 2020 Dynare Team
%
% This code is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare dates submodule is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
date_ext = regexp(original_period, '\d*', 'Match');
switch frequency
case 'monthly'
series_start_date = [date_ext{1} 'M' regexprep(date_ext{2},'\<0*','')];
case 'quarterly'
series_start_date = [date_ext{1} 'Q' date_ext{2}];
case {'bi-annual', 'bi-monthly'}
series_start_date = [date_ext{1} 'H' date_ext{2}];
case 'annual'
series_start_date = [original_period 'Y'];
otherwise
error('mdbnomics2dseries::get_series_start_date: The frequency of the dataset is currently unsupported!');
end
end
%@test:1
%$ try
%$ str = get_series_start_date('monthly','1997-01');
%$ t(1) = true;
%$ catch
%$ t(1) = false;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(str, '1997M1');
%$ end
%$
%$ T = all(t);
%@eof:1
%@test:2
%$ try
%$ str = get_series_start_date('quarterly','1938-Q4');
%$ t(1) = true;
%$ catch
%$ t(1) = false;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(str, '1938Q4');
%$ end
%$
%$ T = all(t);
%@eof:2
%@test:3
%$ try
%$ str = get_series_start_date('bi-annual','1997-S2');
%$ t(1) = true;
%$ catch
%$ t(1) = false;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(str, '1997H2');
%$ end
%$
%$ T = all(t);
%@eof:3
%@test:4
%$ try
%$ str = get_series_start_date('annual','1997');
%$ t(1) = true;
%$ catch
%$ t(1) = false;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(str, '1997Y');
%$ end
%$
%$ T = all(t);
%@eof:4
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment