Commit a7fd5ba1 authored by Houtan Bastani's avatar Houtan Bastani

python script to read RePEc .rdf files and create _data/wp.yml

parent 9623782c
......@@ -167,6 +167,7 @@ exclude:
- /assets/images/logo/*.pdf
- /assets/images/logo/*.tex
- /assets/images/logo/*.sh
- /assets/RePEc/rdf2yml.py
keep_files:
- .git
- /assets/images/*
......
import os
import re
import sys
# Parse .rdf files
wp_first_year = 2011
wps = dict()
wp_year = wp_first_year
while True:
filename = 'dynare-' + str(wp_year) + '.rdf'
if not os.path.isfile(filename):
break
fh = open(filename, 'r')
wpn = -1
wp = dict()
authors = []
urls = []
url_function = []
for line in fh:
spl = re.split('[:]', line)
key = spl[0]
if key == 'Template-Type' or key == '# Local Variables':
if wpn >= 1:
wp['urls'] = urls
wp['url_function'] = url_function
wp['authors'] = authors
wps[wpn] = wp
wpn = -1
wp = dict()
authors = []
urls = []
url_function = []
elif key == 'Number':
wpn = int(spl[1])
elif key == 'Title':
wp['title'] = ":".join(spl[1:]).strip()
elif key == 'File-URL':
urls.append(':'.join((spl[1:])).strip())
elif key == 'File-Function':
url_function.append(spl[1].strip())
elif key == 'Creation-Date':
spl = re.split('[-]', spl[1])
wp['year'] = int(spl[0])
elif key == 'Author-Name':
authors.append(spl[1].strip())
fh.close()
wp_year += 1
# Write .yml file
yml = open('wp.yml', 'w')
for key in sorted(wps, reverse=True):
yml.write('- number: {:d}\n'.format(key))
yml.write(' authors: ')
firstauthor = True
for author in wps[key]['authors']:
name = author.split(',')
last = name[0]
first = " ".join(name[1:]).strip()
if firstauthor:
firstauthor = False
else:
yml.write(', ')
yml.write('{:s} {:s}'.format(first, last))
yml.write('\n')
yml.write(' title: "{:s}"\n'.format(wps[key]['title']))
yml.write(' year: {:d}\n'.format(wps[key]['year']))
# Have to do this because some entries are mal-formed (do not contain File-Function info)
if len(wps[key]['urls']) == 1:
yml.write(' url: {:s}\n'.format(wps[key]['urls'][0]))
else:
for idx, url in enumerate(wps[key]['urls']):
if wps[key]['url_function'][idx] == 'Main text':
yml.write(' url: {:s}\n'.format(wps[key]['urls'][idx]))
elif wps[key]['url_function'][idx] == 'Data used in the paper':
yml.write(' data_url: {:s}\n'.format(wps[key]['urls'][idx]))
elif wps[key]['url_function'][idx] == 'Source code of programs used in the paper' or \
wps[key]['url_function'][idx] == 'Dynare source code used in the paper':
yml.write(' source_url: {:s}\n'.format(wps[key]['urls'][idx]))
else:
print(wps[key])
print('Unhandled url case: {:s}'.format(wps[key]['url_function'][idx]))
sys.exit()
yml.write('\n')
yml.close()
print('Done writing wp.yml')
......@@ -12,7 +12,7 @@ title: Working Papers
<ol reversed start="{{ latestwp }}">
{% for wp in site.data.wp limit:5 %}
<li>
{{ wp.authors }} ({{ wp.year }}), "<a href="http://www.dynare.org/wp-repo/dynarewp0{{ wp.number }}.pdf">{{ wp.title }}</a>"
{{ wp.authors }} ({{ wp.year }}), "<a href="{{ wp.url }}">{{ wp.title }}</a>"
{% if wp.source_url %}(<a href="{{ wp.source_url }}">source</a>){% endif %}
{% if wp.data_url %}(<a href="{{ wp.data_url }}">data</a>){% endif %}
</li>
......
......@@ -12,7 +12,7 @@ title: Working Papers
<ol reversed start="{{ latestwp }}">
{% for wp in site.data.wp %}
<li>
{{ wp.authors }} ({{ wp.year }}), "<a href="http://www.dynare.org/wp-repo/dynarewp0{{ wp.number }}.pdf">{{ wp.title }}</a>"
{{ wp.authors }} ({{ wp.year }}), "<a href="{{ wp.url }}">{{ wp.title }}</a>"
{% if wp.source_url %}(<a href="{{ wp.source_url }}">source</a>){% endif %}
{% if wp.data_url %}(<a href="{{ wp.data_url }}">data</a>){% endif %}
</li>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment