Commit 921c7c1e authored by Dóra Kocsis's avatar Dóra Kocsis

add new functions: mdbnomics_datasets, mdbnomics_dimensions, mdbnomics_series.

parent b4568f07
......@@ -147,6 +147,72 @@ Example:
'BEA/NIUnderlyingDetail-U001BC/S315-Q',...
'BEA/NIUnderlyingDetail-U001BC/S315-M'});
### Fetch the available datasets of a provider
When fetching series from DBnomics, the user needs to give a provider and a dataset before specifying correct dimensions.
With the function `mdbnomics_datasets`, the user can download the list of the available datasets for a provider.
If no `provider_code` was supplied, an array of all datasets for every provider is returned.
Example:
>> datasets = mdbnomics_datasets('provider_code', 'IMF');
The result is a structure with with a cell array containing the dataset codes and names of the requested providers.
With the same function, if the user wants to fetch the available datasets for multiple providers, a cell array of providers has to be given.
Example:
>> datasets = mdbnomics_datasets('provider_code', {'IMF', 'BDF'});
In the event that the user only requests the datasets for one provider, if `simplify` is defined as `true`, then the result will be a simple cell array, not a structure.
Example:
>> datasets = mdbnomics_datasets('provider_code', 'IMF', 'simplify', true);
### Fetch the possible dimensions of available datasets of a provider
When fetching series from DBnomics, it can be interesting and especially useful to specify dimensions for a particular dataset to download only the series you want to analyse. With the function `mdbnomics_dimensions`,
the user can download these dimensions and their meanings.
Example:
>> datasets = mdbnomics_dimensions('provider_code', 'IMF', 'dataset_code', 'WEO');
The result is a nested structure (its names are IMF_WEO and the dimensions names) with a structure at the end of each branch.
In the event that the user only requests the dimensions for one dataset for one provider, if `simplify` is defined as `true`, then the result will be a simple structure, not a nested one.
Example:
>> datasets = mdbnomics_dimensions('provider_code', 'IMF', 'dataset_code', 'WEO', 'simplify', true);
To download the dimensions of every dataset gathered by DBnomics, the user does not have to set any arguments.
Example:
>> datasets = mdbnomics_dimensions();
### Fetch the series codes and names of available datasets of a provider
The user can download the list of series, and especially their codes, of a dataset’s provider by using the function `mdbnomics_series`. The result is a structure with a cell array at the end of each branch. If `simplify` is defined as `true`,
then the result will be a simple cell array.
Example:
>> series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO', 'simplify', true);
Like the function `mdbnomics()`, features can be added to `mdbnomics_series()`. The user can ask for the series with specific dimensions:
Example:
>> series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO', 'dimensions', '{"weo-subject":["NGDP_RPCH"]}', 'simplify', true);
or with a query:
Example:
>> series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO', 'query', 'NGDP_RPCH');
> :warning: **We ask the user to use this function parsimoniously because there are a huge amount of series per dataset. Please only fetch for one dataset if you need it or visit the DBnomics website.**
### Transform time series
The routines can interact with the [Time Series Editor](https://editor.nomics.world/) to transform time series by applying filters to them.
Available filters are listed on the [filters page](https://editor.nomics.world/filters).
......
function datasets = mdbnomics_datasets(varargin) % --*-- Unitary tests --*--
% function mdbnomics_datasets(varargin)
% Downloads the list of available datasets for a selection of providers (or all of them) from https://db.nomics.world/.
% By default, the function returns a structure with a cell array containing the dataset codes and names of the requested providers.
%
% POSSIBLE PARAMETERS
% provider_code [char] DBnomics code of one or multiple providers. If empty, the providers are firstly
% dowloaded with the function mdbnomics_providers and then the available datasets are requested.
% simplify [logical] If true, when the datasets are requested for only one provider then a cell array is returned, not a structure.
% If not provided, the default value is false.
%
% OUTPUTS
% datasets
%
% SPECIAL REQUIREMENTS
% none
% Copyright (C) 2020 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
api_base_url = 'https://api.db.nomics.world';
api_version = 22;
p = inputParser;
validStringInput = @(x) ischar(x) || iscellstr(x);
p.addParameter('provider_code', '', validStringInput);
p.addParameter('simplify', false, @islogical);
p.KeepUnmatched = false;
p.parse(varargin{:});
if isempty(p.Results.provider_code)
provider_code = mdbnomics_providers('code', true);
else
if ischar(p.Results.provider_code)
provider_code = {p.Results.provider_code};
else
provider_code = p.Results.provider_code;
end
end
datasets = struct();
for i = 1:numel(provider_code)
pc = provider_code{i};
provider_page = sprintf('%s/v%d/providers/%s', api_base_url, api_version, pc);
provider_info = webread(provider_page);
provider_info = provider_info.category_tree;
code = [];
name = [];
if isfield(provider_info, 'children')
unpack_children(provider_info, code, name);
else
try
for n = 1:numel(provider_info)
code = [code, {provider_info{n}.code}];
name = [name, {provider_info{n}.name}];
end
catch
for n = 1:numel(provider_info)
code = [code, {provider_info(n).code}];
name = [name, {provider_info(n).name}];
end
end
end
datasets.(pc) = horzcat(code', name');
end
if p.Results.simplify
if length(fieldnames(datasets)) == 1
datasets = datasets.(pc);
else
error('Your query corresponds to multiple providers, not possible to simplify');
end
end
end
%@test:1
%$ try
%$ datasets = mdbnomics_datasets('provider_code', 'IMF', 'simplify', true);
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(length(unique(datasets(:,1))), 43);
%$ t(3) = dassert(size(datasets, 2), 2);
%$ end
%$
%$ T = all(t);
%@eof:1
%@test:2
%$ try
%$ datasets = mdbnomics_datasets('provider_code', 'IMF');
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(fieldnames(datasets), {'IMF'});
%$ t(3) = dassert(size(datasets.IMF,1), 43);
%$ end
%$
%$ T = all(t);
%@eof:2
%@test:3
%$ try
%$ datasets = mdbnomics_datasets('provider_code', {'IMF', 'AMECO'});
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(fieldnames(datasets), {'IMF'; 'AMECO'});
%$ t(3) = dassert(size(datasets.IMF,1), 43);
%$ t(4) = dassert(size(datasets.AMECO,1), 473);
%$ end
%$
%$ T = all(t);
%@eof:3
\ No newline at end of file
function dimensions = mdbnomics_dimensions(varargin) % --*-- Unitary tests --*--
% function mdbnomics_dimensions(varargin)
% Downloads the list of dimensions (if they exist) for available datasets of a selection of providers from https://db.nomics.world/.
% By default, the function returns a structure containing the dimensions of datasets for DBnomics providers.
%
% POSSIBLE PARAMETERS
% provider_code [char] DBnomics code of one or multiple providers. If empty, the providers are firstly
% dowloaded with the function mdbnomics_providers and then the available datasets are requested.
% dataset_code [char] DBnomics code of one or multiple datasets of a provider. If empty, the datasets codes are dowloaded
% with the function mdbnomics_datasets and then the dimensions are requested.
% simplify [logical] If true, when the dimensions are requested for only one provider and one dataset then only the dimension names and their values are provided.
% If not provided, the default value is false.
%
% OUTPUTS
% dimensions
%
% SPECIAL REQUIREMENTS
% none
% Copyright (C) 2020 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
api_base_url = 'https://api.db.nomics.world';
api_version = 22;
p = inputParser;
validStringInput = @(x) ischar(x) || iscellstr(x);
p.addParameter('provider_code', '', validStringInput);
p.addParameter('dataset_code', '', validStringInput);
p.addParameter('simplify', false, @islogical);
p.KeepUnmatched = false;
p.parse(varargin{:});
if isempty(p.Results.provider_code) && ~isempty(p.Results.dataset_code)
error('When you use dataset_code, you must specify provider_code as well.');
end
if iscell(p.Results.provider_code) || iscell(p.Results.dataset_code)
if ~isempty(p.Results.provider_code) && ~isempty(p.Results.dataset_code) && length(p.Results.provider_code) ~= length(p.Results.dataset_code)
error('Please specify as many provider codes as dataset codes.')
end
end
if isempty(p.Results.provider_code)
provider_code = mdbnomics_providers('code', true);
else
if ischar(p.Results.provider_code)
provider_code = {p.Results.provider_code};
else
provider_code = p.Results.provider_code;
end
end
if isempty(p.Results.dataset_code)
dataset_code = mdbnomics_datasets('provider_code', provider_code);
else
if ischar(p.Results.dataset_code)
dataset_code = {p.Results.dataset_code};
else
dataset_code = p.Results.dataset_code;
end
end
dimensions = struct();
for i = 1:numel(provider_code)
pc = provider_code{i};
dc = dataset_code{i};
dataset_page = sprintf('%s/v%d/datasets/%s/%s', api_base_url, api_version, pc, dc);
dataset_info = webread(dataset_page);
dataset_name = sprintf('%s_%s', pc, dc);
try
tmp1 = dataset_info.datasets.docs.dimensions_labels;
catch
try
tmp1 = dataset_info.datasets.(dataset_name).dimensions_labels;
catch
tmp1 = {};
end
end
try
tmp2 = dataset_info.datasets.docs.dimensions_values_labels;
catch
try
tmp2 = dataset_info.datasets.(dataset_name).dimensions_values_labels;
catch
tmp2 = {};
end
end
dataset_dimensions = fieldnames(tmp1);
for d = 1:numel(dataset_dimensions)
dimensions.(dataset_name).(dataset_dimensions{d}) = tmp2.(dataset_dimensions{d});
end
end
if p.Results.simplify
if length(fieldnames(dimensions)) == 1
dimensions = dimensions.(dataset_name);
else
error('Your query corresponds to multiple datasets, not possible to simplify');
end
end
end
%@test:1
%$ try
%$ dimensions = mdbnomics_dimensions('provider_code', 'IMF', 'dataset_code', 'WEO');
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(fieldnames(dimensions), {'IMF_WEO'});
%$ t(3) = dassert(isfield(dimensions.IMF_WEO, 'unit'), true);
%$ t(4) = dassert(length(fieldnames(dimensions.IMF_WEO.unit)), 13);
%$ end
%$
%$ T = all(t);
%@eof:1
%@test:2
%$ try
%$ dimensions = mdbnomics_dimensions('provider_code', 'IMF', 'dataset_code', 'WEO', 'simplify', true);
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(isfield(dimensions, 'IMF_WEO'), false);
%$ t(3) = dassert(isfield(dimensions, 'unit'), true);
%$ t(4) = dassert(length(fieldnames(dimensions.unit)), 13);
%$ end
%$
%$ T = all(t);
%@eof:2
function series = mdbnomics_series(varargin) % --*-- Unitary tests --*--
% function mdbnomics_series(varargin)
% Downloads the list of series for available datasets of a selection of providers from https://db.nomics.world/.
% We warn the user that this function can be (very) long to execute!
% We remind that DBnomics requests data from 63 providers to retrieve 21675 datasets for a total of approximately 720 millions series.
% By default, the function returns a structure with a cell array at the end of each branch containing the series codes and names of datasets for DBnomics providers.
%
% POSSIBLE PARAMETERS
% provider_code [char] DBnomics code of one or multiple providers. If empty, the providers are firstly
% dowloaded with the function mdbnomics_providers and then the available datasets are requested.
% dataset_code [char] DBnomics code of one or multiple datasets of a provider. If empty, the datasets codes are dowloaded
% with the function mdbnomics_datasets and then the dimensions are requested.
% dimensions [char] DBnomics code of one or several dimensions in the specified provider and dataset.
% If provided it must be a string formatted like: '{"country":["ES","FR","IT"],"indicator":["IC.REG.COST.PC.FE.ZS.DRFN"]}'.
% query [char] A query to filter/select series from a provider's dataset.
% only_number_of_series [logical] If true, only the number of series for the given query will be printed in the command window.
% If not provided, the default value is false.
% simplify [logical] If true, when the datasets are requested for only one provider then a cell array is returned, not a structure.
% If not provided, the default value is false.
%
% OUTPUTS
% series
%
% SPECIAL REQUIREMENTS
% none
% Copyright (C) 2020 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
api_base_url = 'https://api.db.nomics.world';
api_version = 22;
p = inputParser;
validStringInput = @(x) ischar(x) || iscellstr(x);
p.addParameter('provider_code', '', validStringInput);
p.addParameter('dataset_code', '', validStringInput);
p.addParameter('dimensions', '', validStringInput);
p.addParameter('query', '', validStringInput);
p.addParameter('only_number_of_series', false, @islogical);
p.addParameter('simplify', false, @islogical);
p.KeepUnmatched = false;
p.parse(varargin{:});
if iscell(p.Results.provider_code) || iscell(p.Results.dataset_code)
if ~isempty(p.Results.provider_code) && ~isempty(p.Results.dataset_code) && length(p.Results.provider_code) ~= length(p.Results.dataset_code)
error('Please specify as many provider codes as dataset codes.')
end
end
if isempty(p.Results.provider_code)
provider_code = mdbnomics_providers('code', true);
else
if ischar(p.Results.provider_code)
provider_code = {p.Results.provider_code};
else
provider_code = p.Results.provider_code;
end
end
if isempty(p.Results.dataset_code)
dataset_code = mdbnomics_datasets('provider_code', provider_code);
else
if ischar(p.Results.dataset_code)
dataset_code = {p.Results.dataset_code};
else
dataset_code = p.Results.dataset_code;
end
end
if ~isempty(p.Results.query)
if ischar(p.Results.query)
db_query = {p.Results.query};
else
db_query = p.Results.query;
end
end
if ~isempty(p.Results.dimensions)
if ischar(p.Results.dimensions)
dimensions = {p.Results.dimensions};
else
dimensions = p.Results.dimensions;
end
end
series = struct();
for i = 1:numel(provider_code)
pc = provider_code{i};
dc = dataset_code{i};
dataset_page = sprintf('%s/v%d/series/%s/%s', api_base_url, api_version, pc, dc);
if exist('db_query', 'var')
dataset_page = sprintf('%s?q=%s', dataset_page, db_query{i});
end
if exist('dimensions', 'var')
if contains(dimensions{i}, '\\?')
spec = '&';
else
spec = '?';
end
dataset_page = sprintf('%s%sdimensions=%s', dataset_page, spec, dimensions{i});
end
dataset_info = webread(dataset_page);
dataset_name = sprintf('%s_%s', pc, dc);
limit = dataset_info.series.limit;
num_found = dataset_info.series.num_found;
if p.Results.only_number_of_series
sprintf('Number of series = %d', num_found)
return
else
sprintf('The dataset %s from provider %s contains %d series.', dc, pc, num_found)
series_code = [];
series_name = [];
if num_found > limit
sequence = 0:1:floor(num_found/limit);
if contains(dataset_page, 'offset=')
dataset_page = regexprep(dataset_page, '\\&offset=[0-9]+', '');
dataset_page = regexprep(dataset_page, '\\?offset=[0-9]+', '');
end
if contains(dataset_page, '\\?')
sep = '&';
else
sep = '?';
end
for j = 1:numel(sequence)
tmp_api_link = sprintf('%s%soffset=%d', dataset_page, sep, sequence(j)*limit);
dataset_info = webread(tmp_api_link);
series_info = dataset_info.series.docs;
for s = 1:numel(series_info)
series_code = [series_code, {series_info(s).series_code}];
series_name = [series_name, {series_info(s).series_name}];
end
end
series.(dataset_name) = horzcat(series_code', series_name');
else
series_info = dataset_info.series.docs;
for s = 1:numel(series_info)
series_code = [series_code, {series_info(s).series_code}];
series_name = [series_name, {series_info(s).series_name}];
end
series.(dataset_name) = horzcat(series_code', series_name');
end
end
end
if p.Results.simplify
if length(fieldnames(series)) == 1
series = series.(dataset_name);
else
error('Your query corresponds to multiple datasets, not possible to simplify');
end
end
end
%@test:1
%$ try
%$ series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO', 'simplify', true);
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(length(unique(series(:,1))), 8924);
%$ t(3) = dassert(size(series, 2), 2);
%$ end
%$
%$ T = all(t);
%@eof:1
%@test:2
%$ try
%$ series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO');
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(fieldnames(series), {'IMF_WEO'});
%$ t(3) = dassert(length(unique(series.IMF_WEO(:,1))), 8924);
%$ t(4) = dassert(size(series.IMF_WEO, 2), 2);
%$ end
%$
%$ T = all(t);
%@eof:2
%@test:3
%$ try
%$ series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO', 'dimensions', '{"weo-subject":["NGDP_RPCH"]}', 'simplify', true);
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(length(unique(series(:,1))), 194);
%$ t(3) = dassert(size(series, 2), 2);
%$ end
%$
%$ T = all(t);
%@eof:3
%@test:4
%$ try
%$ series = mdbnomics_series('provider_code', 'IMF', 'dataset_code', 'WEO', 'query', 'NGDP_RPCH');
%$ t(1) = 1;
%$ catch
%$ t = 0;
%$ end
%$
%$ if t(1)
%$ t(2) = dassert(length(unique(series.IMF_WEO(:,1))), 194);
%$ t(3) = dassert(size(series.IMF_WEO, 2), 2);
%$ end
%$
%$ T = all(t);
%@eof:4
\ No newline at end of file
function providers = mdbnomics_providers(varargin) % --*-- Unitary tests --*--
% function mdbnomics_providers(varargin)
% Downloads the list of DBnomics providers from https://db.nomics.world/.
% By default, the function returns a cell array containing the list of providers
% with additional informations such as the region, the website, etc.
%
% POSSIBLE PARAMETERS
% code [logical] If true, then only the providers are returned in a vector. If not provided, the default value is false.
%
% OUTPUTS
% providers
%
% SPECIAL REQUIREMENTS
% none
% Copyright (C) 2020 Dynare Team
%
% This file is part of Dynare.
%
% Dynare is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% Dynare is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
api_base_url = 'https://api.db.nomics.world';
api_version = 22;
p = inputParser;
p.addParameter('code', false, @islogical);
p.KeepUnmatched = false;
p.parse(varargin{:});
providers_url = sprintf('%s/v%d/providers', api_base_url, api_version);
response = webread(providers_url);
if p.Results.code
providers = cell(size(response.providers.docs, 1),1);
for i = 1:size(response.providers.docs, 1)