Commit 14413931 authored by Dóra Kocsis's avatar Dóra Kocsis

add subroutines to filter series, closes #2

parent 1c3767bd
......@@ -43,49 +43,68 @@ else
end
ordered_columns_names = [common_columns, dimensions_codes_columns_names, dimensions_labels_columns_names];
if ~isempty(p.Results.dbnomics_filters)
filtered_series_list = filter_series(series_list, p.Results.dbnomics_filters,p.Results.editor_api_base_url);
% Append common column names with period_middle_day and filtered columns for the final DataFrame
idx_period = find(strcmp(ordered_columns_names, "period"));
idx_value = find(strcmp(ordered_columns_names, "value"));
ordered_columns_names = [ordered_columns_names{1:idx_period}, "period_middle_day", ordered_columns_names{idx_period+1:idx_value}, "filtered", ordered_columns_names{idx_value+1:end}];
% Append series_list with the filtered series
series_list = [series_list, filtered_series_list];
end
df = cell(length(series_list{1}.value)*length(series_list)+1,length(ordered_columns_names));
for col = 1:length(ordered_columns_names)
col_ = ordered_columns_names{col};
df{1,col} = col_;
end
series_length=0;
series_length=0;
% Flatten series received from the API (rename some keys of JSON result to match DataFrame organization)
for ii = 1:length(series_list)
flat_series = flatten_dbnomics_series(series_list{ii});
% Add dimensions labels to flat_series
complete_dataset_code = [flat_series.provider_code '_' flat_series.dataset_code];
dataset_dimensions = datasets_dimensions.(complete_dataset_code);
if isfield(dataset_dimensions, 'dimensions_labels')
dataset_dimensions_labels = dataset_dimensions.dimensions_labels;
else
dataset_dimensions_labels = struct();
for jj = 1:length(dataset_dimensions.dimensions_codes_order)
dataset_dimensions_labels.(dataset_dimensions.dimensions_codes_order{jj}) = [dataset_dimensions.dimensions_codes_order{jj} '_label'];
end
end
% Add dimensions values labels to current series
if isfield(dataset_dimensions, 'dimensions_values_labels')
dimension_codes = intersect(dimensions_codes_columns_names, string(fieldnames(dataset_dimensions_labels)')); %string(fieldnames(dataset_dimensions_labels)');
for jj = 1:length(dimension_codes)
series_code = regexprep(flat_series.series_code,'[^a-zA-Z0-9]','_');
dimension_label = dataset_dimensions_labels.(dimension_codes{jj});
flat_series.labels{jj, 1} = dimension_label;
dimension_value_code = regexprep(series_dims_by_dataset_code.(complete_dataset_code).(series_code).(dimension_codes{jj}),'[^a-zA-Z0-9]','_');
if isstrprop(dimension_value_code(1), 'digit') %MATLAB doesn't allow struct fieldnames to start with a digit
dimension_value_code = strcat('x', dimension_value_code);
if ~isfield(series_list{ii}, 'filtered')
flat_series = flatten_dbnomics_series(series_list{ii});
% Add dimensions labels to flat_series
complete_dataset_code = [flat_series.provider_code '_' flat_series.dataset_code];
dataset_dimensions = datasets_dimensions.(complete_dataset_code);
if isfield(dataset_dimensions, 'dimensions_labels')
dataset_dimensions_labels = dataset_dimensions.dimensions_labels;
else
dataset_dimensions_labels = struct();
for jj = 1:length(dataset_dimensions.dimensions_codes_order)
dataset_dimensions_labels.(dataset_dimensions.dimensions_codes_order{jj}) = [dataset_dimensions.dimensions_codes_order{jj} '_label'];
end
try
flat_series.labels{jj, 2} = dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}).(dimension_value_code);
catch
for it = 1:size(dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}), 1)
tmp = regexprep(dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}){it}{1}, '[^a-zA-Z0-9]', '_');
if strcmp(tmp, dimension_value_code)
flat_series.labels{jj, 2} = dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}){it}{2};
end
% Add dimensions values labels to current series
if isfield(dataset_dimensions, 'dimensions_values_labels')
dimension_codes = intersect(dimensions_codes_columns_names, string(fieldnames(dataset_dimensions_labels)')); %string(fieldnames(dataset_dimensions_labels)');
for jj = 1:length(dimension_codes)
series_code = regexprep(flat_series.series_code,'[^a-zA-Z0-9]','_');
dimension_label = dataset_dimensions_labels.(dimension_codes{jj});
flat_series.labels{jj, 1} = dimension_label;
dimension_value_code = regexprep(series_dims_by_dataset_code.(complete_dataset_code).(series_code).(dimension_codes{jj}),'[^a-zA-Z0-9]','_');
if isstrprop(dimension_value_code(1), 'digit') %MATLAB doesn't allow struct fieldnames to start with a digit
dimension_value_code = strcat('x', dimension_value_code);
end
try
flat_series.labels{jj, 2} = dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}).(dimension_value_code);
catch
for it = 1:size(dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}), 1)
tmp = regexprep(dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}){it}{1}, '[^a-zA-Z0-9]', '_');
if strcmp(tmp, dimension_value_code)
flat_series.labels{jj, 2} = dataset_dimensions.dimensions_values_labels.(dimension_codes{jj}){it}{2};
end
end
end
end
end
if ~isempty(p.Results.dbnomics_filters)
flat_series.filtered = false;
end
else
flat_series = series_list{ii};
end
% Create final cell array
for col = 1:length(ordered_columns_names)
col_ = ordered_columns_names{col};
......@@ -93,14 +112,24 @@ else
if strcmp(col_, 'original_value') || strcmp(col_,'value') || strcmp(col_, 'original_period') || strcmp(col_, 'period')
df{series_length+jj+1,col} = flat_series.(col_){jj};
elseif any(strcmp(col_,dimensions_labels_columns_names))
if ~any(strcmp(col_,flat_series.labels))
df{series_length+jj+1,col} = NaN;
if isfield(flat_series, 'labels')
if ~any(strcmp(col_,flat_series.labels))
df{series_length+jj+1,col} = NaN;
else
idx = find(strcmp(flat_series.labels, col_));
df{series_length+jj+1,col} = flat_series.labels{idx,2};
end
else
idx = find(strcmp(flat_series.labels, col_));
df{series_length+jj+1,col} = flat_series.labels{idx,2};
df{series_length+jj+1,col} = NaN;
end
elseif any(strcmp(col_, dimensions_codes_columns_names)) && ~any(strcmp(col_,string(fieldnames(flat_series)')))
df{series_length+jj+1,col} = NaN;
elseif strcmp(col_, 'period_middle_day')
if isfield(flat_series, 'period_middle_day')
df{series_length+jj+1,col} = flat_series.(col_){jj};
else
df{series_length+jj+1,col} = NaN;
end
else
df{series_length+jj+1,col} = flat_series.(col_);
end
......
function filtered_series = filter_series(series_list, dbnomics_filters, editor_api_base_url)
%FILTER_SERIES Summary of this function goes here
% Detailed explanation goes here
if strcmp(editor_api_base_url(end),'/') == 0
editor_api_base_url = strcat(editor_api_base_url, '/');
end
apply_endpoint_url = strcat(editor_api_base_url,'apply');
filtered_series = iter_filtered_series(series_list, dbnomics_filters, apply_endpoint_url);
end
function series = flatten_editor_series(series, dbnomics_series)
% Adapt Time Series Editor series attributes to ease DataFrame construction.
series = normalize_period(series);
series = normalize_value(series);
series.x_frequency = series.frequency;
series = rmfield(series, 'frequency');
orig_fields = ["provider_code", "dataset_code", "dataset_name"];
for ii = 1:length(orig_fields)
series.(orig_fields{ii}) = dbnomics_series.(orig_fields{ii});
end
series.series_code = [dbnomics_series.series_code '_filtered'];
if isfield(dbnomics_series, 'series_name')
series.series_name = [dbnomics_series.series_name, ' (filtered)'];
end
series.filtered = true;
end
\ No newline at end of file
function filtered_series_list = iter_filtered_series(series_list, dbnomics_filters, apply_endpoint_url)
editor_apply_endpoint_nb_series_per_post = 100;
opts = weboptions('ContentType','json', 'MediaType','application/json', 'RequestMethod','POST');
if size(series_list, 2) > editor_apply_endpoint_nb_series_per_post
grouped_series = mat2cell(series_list,1,repmat(editor_apply_endpoint_nb_series_per_post, size(series_list,1), size(series_list,2)));
else
grouped_series = {series_list};
end
for gg = 1:size(grouped_series, 2)
series_list = grouped_series{gg};
posted_series_list = cell(1,size(series_list,2));
series_fields = ["x_frequency", "period_start_day", "value"];
posted_series_fields = ["frequency", "period_start_day", "value"];
for series = 1:size(series_list, 2)
for ii = 1:length(posted_series_fields)
posted_series.(posted_series_fields{ii}) = series_list{series}.(series_fields{ii});
end
posted_series_list{series} = posted_series;
end
json_request = sprintf('{"filters":%s,"series":%s}', dbnomics_filters, jsonencode(posted_series_list));
try
response = webwrite(apply_endpoint_url, json_request, opts);
catch
error("Invalid response from Time Series Editor (JSON expected)");
end
filtered_series_list = cell(1,size(series_list,2));
for ii = 1:length(response.filter_results)
filtered_series = flatten_editor_series(response.filter_results(ii).series, series_list{ii});
filtered_series_list{ii} = filtered_series;
end
end
end
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment