diff --git a/src/initialize_dseries_class.m b/src/initialize_dseries_class.m index 8ac05c5b38491879e52d0a62f777b9e51ee43c0a..8d40df962e126f8e6a2a964653cfe1aca1c10335 100644 --- a/src/initialize_dseries_class.m +++ b/src/initialize_dseries_class.m @@ -23,7 +23,8 @@ dseries_src_path_s = strsplit(dseries_src_root, filesep()); isstandalone = ~isequal(dseries_src_path_s(end-3:end), {'matlab', 'modules', 'dseries', 'src'}) & isempty(which('dynare')); % Set the subfolders to be added in the path. -p = {'read'; ... +p = {'mdbnomics2dseries'; ... + 'read'; ... 'utilities/is'; ... 'utilities/op'; ... 'utilities/convert'; ... @@ -31,6 +32,7 @@ p = {'read'; ... 'utilities/insert'; ... 'utilities/file'; ... 'utilities/from'; ... + 'utilities/get'; ... 'utilities/print'; ... 'utilities/variables'; ... 'utilities/cumulate'; ... diff --git a/src/mdbnomics2dseries/mdbnomics2dseries.m b/src/mdbnomics2dseries/mdbnomics2dseries.m new file mode 100644 index 0000000000000000000000000000000000000000..1ced8460dabe9d45917f88d6060dacf7d7f833d5 --- /dev/null +++ b/src/mdbnomics2dseries/mdbnomics2dseries.m @@ -0,0 +1,54 @@ +function ds = mdbnomics2dseries(varargin) % --*-- Unitary tests --*-- + +% Given cell array from the mdbnomics library, it returns a dseries object. +% +% INPUTS +% +% - If only one arguments are provided, we must have: +% + varargin{1} [cell] A T*N array of data. +% +% OUTPUTS +% - ds [dseries] + +% Copyright (C) 2020 Dynare Team +% +% This code is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% Dynare dseries submodule is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with Dynare. If not, see <http://www.gnu.org/licenses/>. + +switch nargin + case 0 + % Return empty object. + error('mdbnomics2dseries:WrongInputArguments', 'Input must be non empty!'); + case 1 + if iscell(varargin{1}) + switch length(varargin{1}) + case 0 + error('mdbnomics2dseries:WrongInputArguments', 'Input must be non empty!'); + otherwise + o.data = varargin{1}(2:end,:); + o.cols = varargin{1}(1,:); + col_idx = {'x_frequency', 'dataset_code', 'series_code', 'original_period', 'period', 'value'}; + for ii = 1:size(col_idx,2) + o.col_idx.(col_idx{ii}) = find(strcmp(col_idx{ii}, o.cols)); + end + % Check if database has multiple frequencies + if size(unique(o.data(:, o.col_idx.x_frequency)),1) > 1 + error('mdbnomics2dseries:DatabaseCheck: The database, that you are trying to convert, contains multiple frequencies. Currently, this type of dseries conversion is not supported. Please select a section of your database with uniform frequency.'); + end + ds = convert_mdbnomics(o); + end + end + otherwise + error('mdbnomics2dseries:WrongInputArguments', 'Too many input arguments! Please check the manual.') +end +end diff --git a/src/utilities/convert/convert_mdbnomics.m b/src/utilities/convert/convert_mdbnomics.m new file mode 100644 index 0000000000000000000000000000000000000000..71a3180c6a67ce887f448803c334f46f6bfdc454 --- /dev/null +++ b/src/utilities/convert/convert_mdbnomics.m @@ -0,0 +1,102 @@ +function ds = convert_mdbnomics(o) + +% INPUTS +% - o [struct] Struct with fields: data, cols, col_idx +% +% OUTPUTS +% - ds [dseries] + +% Copyright (C) 2020 Dynare Team +% +% This code is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% Dynare dates submodule is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with Dynare. If not, see <http://www.gnu.org/licenses/>. + +if ~isstruct(o) + error('mdbnomics2dseries::convert_mdbnomics: The input argument must be a struct!'); +end + +% Initialize dseries +ds = dseries(); + +% Check for multiple datasets +dataset_codes = unique(o.data(:,o.col_idx.dataset_code),'stable'); + +% Convert mdbnomics to dseries +for ii = 1:length(dataset_codes) + % Slice data for dataset + ds_dataset = o.data(strcmp(o.data(:,o.col_idx.dataset_code),dataset_codes{ii}),:); + series_codes = unique(ds_dataset(:,o.col_idx.series_code),'stable'); + % Get list of variable names + list_of_names = cellfun(@(x)regexprep(x, '[^a-zA-Z0-9]', '_'), series_codes, 'UniformOutput', false); + % Get dataset values + ds_dataset_values = cell2mat(ds_dataset(:,o.col_idx.value)); + % Get length of series + series_length = cell2mat(cellfun(@(x)length(find(strcmp(x, ds_dataset(:,o.col_idx.series_code)))), series_codes, 'UniformOutput', false)); + % Get starting value indices + dataset_start_val = cumsum([1, series_length(1:end-1)']); + + % Check if dataset starting date is uniform + starting_dates = o.data(dataset_start_val, o.col_idx.original_period); + freq = ds_dataset{1,o.col_idx.x_frequency}; + if length(unique(starting_dates)) > 1 + % Build dseries object by series + dataset = dseries(); + for s = 1:length(starting_dates) + % Get dseries date format from dataset + dseries_date = get_series_start_date(freq, starting_dates{s}); + % Transform dataset into dseries + data_series = ds_dataset_values(dataset_start_val(s):dataset_start_val(s)+series_length(s)-1); + series = dseries(data_series, dseries_date, list_of_names{s}); + dataset = [dataset series]; + end + else + % Pad values with NaN when series length in the same dataset is unequal + if size(series_length, 1) > 1 && length(unique(series_length)) > 1 + val_ = mat2cell(ds_dataset_values, series_length'); + ds_dataset_values = cell2mat(cellfun(@(x)cat(1, x, nan(max(series_length)-length(x),1)), val_, 'UniformOutput', false)); + end + + % Reshape dseries input data + data_dataset = reshape(ds_dataset_values, max(series_length), size(series_codes, 1)); + + % Get dseries date format from dataset + starting_date = min(datetime(ds_dataset(:,o.col_idx.period), 'InputFormat', 'yyyy-MM-dd', 'Format', 'yyyy-MM-dd')); + original_period = ds_dataset(strcmp(ds_dataset(:,o.col_idx.period),string(starting_date)),o.col_idx.original_period); + dseries_date = get_series_start_date(freq, original_period{1}); + % Transform dataset into dseries + dataset = dseries(data_dataset, dseries_date, list_of_names); + end + % Append initial dseries object + ds = [ds dataset]; +end + +% Add tags to the variables +if length(dataset_codes) > 1 + series_codes = unique(o.data(:,o.col_idx.series_code),'stable'); + list_of_names = cellfun(@(x)regexprep(x, '[^a-zA-Z0-9]', '_'), series_codes, 'UniformOutput', false); + series_length = cell2mat(cellfun(@(x)length(find(strcmp(x, o.data(:,o.col_idx.series_code)))), series_codes, 'UniformOutput', false)); +end + +% Select relevant column indices (ignore columns: 'original_period', 'period', 'original_value', 'value') +col_idx = [1:6,11:size(o.data,2)]; +tag_names = cellfun(@(x)regexprep(x, '[^a-zA-Z0-9]','_'), o.cols(col_idx), 'UniformOutput', false); +data_start_val = cumsum([1, series_length(1:end-1)']); +tag_data = o.data(data_start_val,col_idx); + +for ii = 1:length(tag_names) + tag(ds, tag_names{ii}); + for jj = 1:length(list_of_names) + tag(ds, tag_names{ii}, list_of_names{jj}, tag_data{jj,ii}); + end +end +end diff --git a/src/utilities/get/get_series_start_date.m b/src/utilities/get/get_series_start_date.m new file mode 100644 index 0000000000000000000000000000000000000000..4597519ac5fff415eddbc54b01c206bf2183d3a0 --- /dev/null +++ b/src/utilities/get/get_series_start_date.m @@ -0,0 +1,100 @@ +function series_start_date = get_series_start_date(frequency, original_period) % --*-- Unitary tests --*-- +% Given cell array obtained using from the mdbnomics library, +% it returns a cell array of metadata ot be appended to a dseries object. +% +% INPUTS +% - frequency [string] Dataset frequency: monthly, quarterly, bi-annual, annual +% - original_period [string] Series original period +% +% OUTPUTS +% - series_start_date [string] + +% Copyright (C) 2020 Dynare Team +% +% This code is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% Dynare dates submodule is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with Dynare. If not, see <http://www.gnu.org/licenses/>. + +date_ext = regexp(original_period, '\d*', 'Match'); +switch frequency + case 'monthly' + series_start_date = [date_ext{1} 'M' regexprep(date_ext{2},'\<0*','')]; + case 'quarterly' + series_start_date = [date_ext{1} 'Q' date_ext{2}]; + case {'bi-annual', 'bi-monthly'} + series_start_date = [date_ext{1} 'H' date_ext{2}]; + case 'annual' + series_start_date = [original_period 'Y']; + otherwise + error('mdbnomics2dseries::get_series_start_date: The frequency of the dataset is currently unsupported!'); +end +end + +%@test:1 +%$ try +%$ str = get_series_start_date('monthly','1997-01'); +%$ t(1) = true; +%$ catch +%$ t(1) = false; +%$ end +%$ +%$ if t(1) +%$ t(2) = dassert(str, '1997M1'); +%$ end +%$ +%$ T = all(t); +%@eof:1 + +%@test:2 +%$ try +%$ str = get_series_start_date('quarterly','1938-Q4'); +%$ t(1) = true; +%$ catch +%$ t(1) = false; +%$ end +%$ +%$ if t(1) +%$ t(2) = dassert(str, '1938Q4'); +%$ end +%$ +%$ T = all(t); +%@eof:2 + +%@test:3 +%$ try +%$ str = get_series_start_date('bi-annual','1997-S2'); +%$ t(1) = true; +%$ catch +%$ t(1) = false; +%$ end +%$ +%$ if t(1) +%$ t(2) = dassert(str, '1997H2'); +%$ end +%$ +%$ T = all(t); +%@eof:3 + +%@test:4 +%$ try +%$ str = get_series_start_date('annual','1997'); +%$ t(1) = true; +%$ catch +%$ t(1) = false; +%$ end +%$ +%$ if t(1) +%$ t(2) = dassert(str, '1997Y'); +%$ end +%$ +%$ T = all(t); +%@eof:4