Refactor generation of YAML file listing the Dynare working papers

— the file is now created on the fly by the CI
— rewrite the RDF→YAML conversion script in Bash
— fix a bug that was leading titles of some papers to be truncated
parent b1bfb065
Pipeline #3119 passed with stages
in 57 seconds
......@@ -16,7 +16,7 @@ build:
GEM_HOME: "$CI_PROJECT_DIR/.gem"
before_script:
- gem install jekyll
- (cd _data && wget --no-verbose https://www.dynare.org/RePEc/cpm/dynare/wp.yml)
- (cd _data && wget --no-verbose --no-parent --accept '*.rdf' --recursive --no-directories --execute robots=off https://www.dynare.org/RePEc/cpm/dynare/ && ../assets/RePEc/rdf2yml.sh && rm -f *.rdf)
- ./setup-download-links.sh
- bundle install
script:
......
......@@ -166,6 +166,7 @@ exclude:
- /assets/images/logo/*.sh
- /assets/images/logo/Makefile
- /assets/images/logo/dbkgd_ws.png
- /assets/RePEc/rdf2yml.sh
- .gitlab-ci.yml
- setup-download-links.sh
- COPYING
......
import os
import re
import sys
# Parse .rdf files
wp_first_year = 2011
wps = dict()
wp_year = wp_first_year
while True:
filename = 'dynare-' + str(wp_year) + '.rdf'
if not os.path.isfile(filename):
break
fh = open(filename, 'r')
wpn = -1
wp = dict()
authors = []
urls = []
url_function = []
for line in fh:
spl = re.split('[:]', line)
key = spl[0]
if key == 'Template-Type' or key == '# Local Variables':
if wpn >= 1:
wp['urls'] = urls
wp['url_function'] = url_function
wp['authors'] = authors
wps[wpn] = wp
wpn = -1
wp = dict()
authors = []
urls = []
url_function = []
elif key == 'Number':
wpn = int(spl[1])
elif key == 'Title':
wp['title'] = ":".join(spl[1:]).strip()
elif key == 'File-URL':
urls.append(':'.join((spl[1:])).strip())
elif key == 'File-Function':
url_function.append(spl[1].strip())
elif key == 'Creation-Date':
spl = re.split('[-]', spl[1])
wp['year'] = int(spl[0])
elif key == 'Author-Name':
authors.append(spl[1].strip())
fh.close()
wp_year += 1
# Write .yml file
yml = open('wp.yml', 'w')
for key in sorted(wps, reverse=True):
yml.write('- number: {:d}\n'.format(key))
yml.write(' authors: ')
firstauthor = True
for author in wps[key]['authors']:
name = author.split(',')
last = name[0]
first = " ".join(name[1:]).strip()
if firstauthor:
firstauthor = False
else:
yml.write(', ')
yml.write('{:s} {:s}'.format(first, last))
yml.write('\n')
yml.write(' title: "{:s}"\n'.format(wps[key]['title']))
yml.write(' year: {:d}\n'.format(wps[key]['year']))
# Have to do this because some entries are mal-formed (do not contain File-Function info)
if len(wps[key]['urls']) == 1:
yml.write(' url: {:s}\n'.format(wps[key]['urls'][0]))
else:
for idx, url in enumerate(wps[key]['urls']):
if wps[key]['url_function'][idx] == 'Main text':
yml.write(' url: {:s}\n'.format(wps[key]['urls'][idx]))
elif wps[key]['url_function'][idx] == 'Data used in the paper':
yml.write(' data_url: {:s}\n'.format(wps[key]['urls'][idx]))
elif wps[key]['url_function'][idx] == 'Source code of programs used in the paper' or \
wps[key]['url_function'][idx] == 'Dynare source code used in the paper':
yml.write(' source_url: {:s}\n'.format(wps[key]['urls'][idx]))
else:
print(wps[key])
print('Unhandled url case: {:s}'.format(wps[key]['url_function'][idx]))
sys.exit()
yml.write('\n')
yml.close()
wp_data_file = "../../_data/wp.yml"
if os.path.exists(wp_data_file):
os.remove(wp_data_file)
os.rename("wp.yml", wp_data_file)
print('Done writing wp.yml')
#!/bin/bash
# Reads the dynare-*.rdf files in the current directory, and then create a
# wp.yml suitable for consumption by Jekyll
declare -A wp_authors wp_title wp_year wp_url wp_data_url wp_source_url
# Parses an entry “Fieldname: value”, stored as a single line in $field
parse_field ()
{
local firstname lastname
if [[ $field =~ ^Number:\ +([0-9]+) ]]; then
number=${BASH_REMATCH[1]}
elif [[ $field =~ ^Creation-Date:\ +([0-9]+) ]]; then
year=${BASH_REMATCH[1]}
elif [[ $field =~ ^Title:\ +(.*) ]]; then
title=${BASH_REMATCH[1]}
elif [[ $field =~ ^Author-Name:\ +(.+) ]]; then
if [[ -n $authors ]]; then
authors+=", "
fi
lastname=${BASH_REMATCH[1]%%, *}
firstname=${BASH_REMATCH[1]#*, }
authors+="$firstname $lastname"
elif [[ $field =~ ^File-URL:\ +(.+) ]]; then
last_url=${BASH_REMATCH[1]}
elif [[ $field =~ ^File-Function:\ +(.+) ]]; then
if [[ ${BASH_REMATCH[1]} == "Main text" ]]; then
url=$last_url
elif [[ ${BASH_REMATCH[1]} == "Data used in the paper" ]]; then
data_url=$last_url
elif [[ ${BASH_REMATCH[1]} == *[Ss]ource\ code* ]]; then
source_url=$last_url
else
echo "Unrecognized value for File-Function field: ${BASH_REMATCH[1]}!" >&2
exit 1
fi
fi
unset field
}
# Add a new template (i.e. a working paper in RePEc’s jargon) to the global arrays
add_template ()
{
echo "Found working paper no. $number"
wp_authors[$number]=$authors
wp_title[$number]=$title
wp_year[$number]=$year
wp_url[$number]=${url:-$last_url} # If the File-Function field is missing, then the last URL is the main text
wp_data_url[$number]=$data_url
wp_source_url[$number]=$source_url
unset number authors title year url data_url source_url
}
# Parses an RDF file given on standard input
parse_rdf_file ()
{
local line last_url field
local number authors title year url data_url source_url
while IFS= read -r line; do
if [[ $line =~ ^\ *$ ]]; then
# Empty line, add the template and jump to next line
if [[ -n $field ]]; then
parse_field
fi
if [[ -n $number ]]; then
add_template
fi
continue
fi
if [[ $line =~ ^\ + ]]; then
# Continuation of a field
field+=$line
else
if [[ -n $field ]]; then
parse_field
fi
field=$line
fi
done
# Handle the case when there is no newline at the end of the file
if [[ -n $field ]]; then
parse_field
fi
if [[ -n $number ]]; then
add_template
fi
}
for f in dynare-*.rdf; do
echo "Parsing $f…"
parse_rdf_file < "$f"
echo
done
if ((${#wp_authors[@]} == 0)); then
echo "No working paper found!" >&2
exit 1
else
echo "Found a total of ${#wp_authors[@]} working papers."
fi
for ((i = ${#wp_authors[@]}; i >= 1; i--)); do
if [[ -z ${wp_authors[$i]} ]] || [[ -z ${wp_title[$i]} ]] || [[ -z ${wp_year[$i]} ]] || [[ -z ${wp_url[$i]} ]]; then
echo "Some field is missing in working paper $i!" >&2
exit 1
fi
echo "- number: $i"
echo " authors: ${wp_authors[$i]}"
echo " title: \"${wp_title[$i]}\""
echo " year: ${wp_year[$i]}"
echo " url: ${wp_url[$i]}"
if [[ -n ${wp_source_url[$i]} ]]; then
echo " source_url: ${wp_source_url[$i]}"
fi
if [[ -n ${wp_data_url[$i]} ]]; then
echo " data_url: ${wp_data_url[$i]}"
fi
echo
done > wp.yml
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment