Takes a set of bibtex of publications and converts them for use with academicpages.github.io. This is an interactive Jupyter notebook (see more info here).
The core python code is also in pubsFromBibs.py
.
Run either from the markdown_generator
folder after replacing updating the publist dictionary with:
TODO: Make this work with other databases of citations, TODO: Merge this with the existing TSV parsing solution
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex
from time import strptime
import string
import html
import os
import re
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
"proceeding": {
"file" : "/home/lensenandr/publications/lensen.bib",
"venuekey": ["booktitle","journal"],
"venue-pretext": "",
"collection" : {"name":"publications",
"permalink":"/publication/"}
}
}
html_escape_table = {
"&": "&",
'"': """,
"'": "'"
}
def html_escape(text):
"""Produce entities within text."""
return "".join(html_escape_table.get(c,c) for c in text)
for pubsource in publist:
parser = bibtex.Parser()
bibdata = parser.parse_file(publist[pubsource]["file"])
#loop through the individual references in a given bibtex file
for bib_id in bibdata.entries:
#reset default date
pub_year = "1900"
pub_month = "01"
pub_day = "01"
b = bibdata.entries[bib_id].fields
try:
pub_year = f'{b["year"]}'
#todo: this hack for month and day needs some cleanup
if "month" in b.keys():
if(len(b["month"])<3):
pub_month = "0"+b["month"]
pub_month = pub_month[-2:]
elif(b["month"] not in range(12)):
tmnth = strptime(b["month"][:3],'%b').tm_mon
pub_month = "{:02d}".format(tmnth)
else:
pub_month = str(b["month"])
if "day" in b.keys():
pub_day = str(b["day"])
pub_date = pub_year+"-"+pub_month+"-"+pub_day
#strip out {} as needed (some bibtex entries that maintain formatting)
clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")
url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
url_slug = url_slug.replace("--","-")
md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")
#Build Citation from text
citation = ""
#citation authors - todo - add highlighting for primary author?
for author in bibdata.entries[bib_id].persons["author"]:
citation = citation+(" "+author.first_names[0]+" "+author.last_names[0]+", ").replace("{", "").replace("}","").replace("\\","")
#citation title
citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""
#add venue logic depending on citation type
venue = publist[pubsource]["venue-pretext"]
for k in publist[pubsource]["venuekey"]:
if k in b:
venue = venue + b[k].replace("{", "").replace("}","").replace("\\","")
break
#venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")
citation = citation + " " + html_escape(venue)
citation = citation + ", " + pub_year + "."
if "note" in b.keys():
citation = citation + " " + html_escape(b['note']) + "."
## YAML variables
md = "---\ntitle: \"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
md += """collection: """ + publist[pubsource]["collection"]["name"]
md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"] + html_filename
note = False
if "note" in b.keys():
if len(str(b["note"])) > 5:
#md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
note = True
md += "\ndate: " + str(pub_date)
md += "\nvenue: '" + html_escape(venue) + "'"
b["url"] = '/files/{}.pdf'.format(bib_id)
url = False
if "url" in b.keys():
if len(str(b["url"])) > 5:
#this is annoying??
#md += "\npaperurl: '" + b["url"] + "'"
url = True
md += "\ncitation: '" + html_escape(citation) + "'"
md += "\n---"
## Markdown description for individual page
#if note:
# md += "\n" + html_escape(b["note"]) + "\n"
if url:
md += "\n[Access paper here](" + b["url"] + "){:target=\"_blank\"}\n"
#else:
# md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"
md_filename = os.path.basename(md_filename)
with open("../_publications/" + md_filename, 'w') as f:
f.write(md)
print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
# field may not exist for a reference
except KeyError as e:
print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
continue
SUCESSFULLY PARSED lensen2017New: " New Representations in Genetic Programming for Feature Const ... " SUCESSFULLY PARSED lensen2016Genetic: " Genetic Programming for Region Detection, Feature Extraction ... " SUCESSFULLY PARSED lensen2015Genetic: " Genetic Programming for algae detection in river images " SUCESSFULLY PARSED lensen2015hybrid: " A hybrid Genetic Programming approach to feature detection a ... " SUCESSFULLY PARSED lensen2017Improving: " Improving {k}-means clustering with genetic programming for ... " SUCESSFULLY PARSED lensen2017GPGC: " {GPGC:} genetic programming for automatic clustering using a ... " SUCESSFULLY PARSED lensen2017Using: " Using Particle Swarm Optimisation and the Silhouette Metric ... " SUCESSFULLY PARSED lensen2016Particle: " Particle Swarm Optimisation Representations for Simultaneous ... " SUCESSFULLY PARSED lensen2018generating: " Generating Redundant Features with Unsupervised Multi-tree G ... " SUCESSFULLY PARSED lensen2018automatically: " Automatically Evolving Difficult Benchmark Feature Selection ... " SUCESSFULLY PARSED oneill2018particle: " Particle Swarm Optimisation for Feature Selection and Weight ... " SUCESSFULLY PARSED lensen2019can: " Can Genetic Programming Do Manifold Learning Too? " SUCESSFULLY PARSED alsahaf2019survey: " A survey on evolutionary machine learning " SUCESSFULLY PARSED lensen2019genetic: " Genetic Programming for Evolving Similarity Functions for Cl ... " SUCESSFULLY PARSED lensen2019multi: " Multi-Objective Genetic Programming for Manifold Learning: B ... " SUCESSFULLY PARSED lensen2020genetic: " Genetic Programming for Evolving a Front of Interpretable Mo ... " SUCESSFULLY PARSED schofield2020evolving: " Evolving Simpler Constructed Features for Clustering Problem ... "
bibdata
BibliographyData(entries=OrderedCaseInsensitiveDict([('lensen2017New', Entry('inproceedings', fields=[('title', 'New Representations in Genetic Programming for Feature Construction in k-Means Clustering'), ('booktitle', 'Proceedings of the 11th International Conference on Simulated Evolution and Learning ({SEAL})'), ('year', '2017'), ('month', 'November'), ('day', '10'), ('volume', '10593'), ('series', 'Lecture Notes in Computer Science'), ('pages', '543--555'), ('publisher', 'Springer'), ('url', '/files/lensen2017New.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2016Genetic', Entry('inproceedings', fields=[('title', 'Genetic Programming for Region Detection, Feature Extraction, Feature Construction and Classification in Image Data'), ('booktitle', 'Proceedings of the European Conference on Genetic Programming (EuroGP)'), ('year', '2016'), ('month', 'March'), ('day', '30'), ('volume', '9594'), ('series', 'Lecture Notes in Computer Science'), ('pages', '51--67'), ('publisher', 'Springer'), ('url', '/files/lensen2016Genetic.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Al{-}Sahaf, Harith'), Person('Zhang, Mengjie'), Person('Xue, Bing')]), ('editor', [Person('Heywood, Malcolm I.'), Person('McDermott, James'), Person('Castelli, Mauro'), Person('Costa, Ernesto'), Person('Sim, Kevin')])]))), ('lensen2015Genetic', Entry('inproceedings', fields=[('title', 'Genetic Programming for algae detection in river images'), ('booktitle', '{IEEE} Congress on Evolutionary Computation, {CEC} 2015, Sendai, Japan, May 25-28, 2015'), ('year', '2015'), ('month', 'November'), ('day', '23'), ('pages', '2468--2475'), ('publisher', '{IEEE}'), ('bibsource', 'dblp computer science bibliography, http://dblp.org'), ('biburl', 'http://dblp.org/rec/bib/conf/cec/LensenAZV15'), ('doi', '10.1109/CEC.2015.7257191'), ('owner', 'lensenandr'), ('timestamp', '2017.11.09'), ('url', '/files/lensen2015Genetic.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Al{-}Sahaf, Harith'), Person('Zhang, Mengjie'), Person('Verma, Brijesh')])]))), ('lensen2015hybrid', Entry('inproceedings', fields=[('title', 'A hybrid Genetic Programming approach to feature detection and image classification'), ('booktitle', '2015 International Conference on Image and Vision Computing New Zealand, {IVCNZ} 2015, Auckland, New Zealand, November 23-24, 2015'), ('year', '2015'), ('pages', '1--6'), ('publisher', '{IEEE}'), ('bibsource', 'dblp computer science bibliography, http://dblp.org'), ('biburl', 'http://dblp.org/rec/bib/conf/ivcnz/LensenAZX15'), ('doi', '10.1109/IVCNZ.2015.7761564'), ('owner', 'lensenandr'), ('timestamp', '2017.11.09'), ('url', '/files/lensen2015hybrid.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Al{-}Sahaf, Harith'), Person('Zhang, Mengjie'), Person('Xue, Bing')])]))), ('lensen2017Improving', Entry('inproceedings', fields=[('title', 'Improving {k}-means clustering with genetic programming for feature construction'), ('booktitle', 'Genetic and Evolutionary Computation Conference, Berlin, Germany, July 15-19, 2017, Companion Material Proceedings'), ('year', '2017'), ('month', 'April'), ('day', '19'), ('pages', '237--238'), ('publisher', '{ACM}'), ('owner', 'lensenandr'), ('timestamp', '2017.11.09'), ('url', '/files/lensen2017Improving.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')]), ('editor', [Person('Bosman, Peter A. N.')])]))), ('lensen2017GPGC', Entry('inproceedings', fields=[('title', '{GPGC:} genetic programming for automatic clustering using a flexible non-hyper-spherical graph-based approach'), ('booktitle', 'Proceedings of the Genetic and Evolutionary Computation Conference, {GECCO}.'), ('year', '2017'), ('month', 'July'), ('day', '15'), ('pages', '449--456'), ('publisher', '{ACM}'), ('owner', 'lensenandr'), ('timestamp', '2017.11.09'), ('url', '/files/lensen2017GPGC.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2017Using', Entry('inproceedings', fields=[('title', 'Using Particle Swarm Optimisation and the Silhouette Metric to Estimate the Number of Clusters, Select Features, and Perform Clustering'), ('booktitle', 'Proceedings of the 20th European Conference on the Applications of Evolutionary Computation (EvoApplications), Part {I}'), ('year', '2017'), ('month', 'April'), ('day', '19'), ('volume', '10199'), ('series', 'Lecture Notes in Computer Science'), ('pages', '538--554'), ('publisher', 'Springer'), ('owner', 'lensenandr'), ('timestamp', '2017.11.09'), ('url', '/files/lensen2017Using.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2016Particle', Entry('inproceedings', fields=[('title', 'Particle Swarm Optimisation Representations for Simultaneous Clustering and Feature Selection'), ('booktitle', 'Proceedings of the Symposium Series on Computational Intelligence'), ('year', '2016'), ('month', 'December'), ('day', '6'), ('pages', '1--8'), ('publisher', '{IEEE}'), ('owner', 'lensenandr'), ('timestamp', '2017.11.09'), ('url', '/files/lensen2016Particle.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2018generating', Entry('inproceedings', fields=[('title', 'Generating Redundant Features with Unsupervised Multi-tree Genetic Programming'), ('booktitle', 'Proceedings of the European Conference on Genetic Programming (EuroGP)'), ('pages', '84--100'), ('year', '2018'), ('month', 'April'), ('day', '4'), ('series', 'Lecture Notes in Computer Science'), ('volume', '10781'), ('publisher', 'Springer'), ('url', '/files/lensen2018generating.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2018automatically', Entry('inproceedings', fields=[('title', 'Automatically Evolving Difficult Benchmark Feature Selection Datasets with Genetic Programming'), ('booktitle', 'Proceedings of the Genetic and Evolutionary Computation Conference, {GECCO}'), ('pages', '458--465'), ('year', '2018'), ('month', 'July'), ('day', '15'), ('publisher', '{ACM}'), ('url', '/files/lensen2018automatically.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('oneill2018particle', Entry('inproceedings', fields=[('title', 'Particle Swarm Optimisation for Feature Selection and Weighting in High-Dimensional Clustering'), ('booktitle', 'Proceedings of the {IEEE} Congress on Evolutionary Computation, {CEC}'), ('year', '2018'), ('month', 'July'), ('day', '8'), ('publisher', '{IEEE}'), ('pages', '1--8'), ('url', '/files/oneill2018particle.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person("O'Neill, Damien"), Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2019can', Entry('inproceedings', fields=[('title', 'Can Genetic Programming Do Manifold Learning Too?'), ('booktitle', 'Proceedings of the European Conference on Genetic Programming (EuroGP)'), ('year', '2019'), ('month', 'April'), ('day', '24'), ('volume', '11451'), ('series', 'Lecture Notes in Computer Science'), ('publisher', 'Springer'), ('pages', '114--130'), ('note', 'Best paper.'), ('url', '/files/lensen2019can.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('alsahaf2019survey', Entry('article', fields=[('title', 'A survey on evolutionary machine learning'), ('journal', 'Journal of the Royal Society of New Zealand'), ('volume', '49'), ('number', '2'), ('pages', '205-228'), ('year', '2019'), ('month', 'April'), ('day', '15'), ('publisher', 'Taylor & Francis'), ('doi', '10.1080/03036758.2019.1609052'), ('url', '/files/alsahaf2019survey.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Al-Sahaf, Harith'), Person('Bi, Ying'), Person('Chen, Qi'), Person('Lensen, Andrew'), Person('Mei, Yi'), Person('Sun, Yanan'), Person('Tran, Binh'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2019genetic', Entry('article', fields=[('title', 'Genetic Programming for Evolving Similarity Functions for Clustering: Representations and Analysis'), ('journal', 'Evolutionary Computation'), ('volume', '0'), ('number', 'ja'), ('pages', '1--31'), ('year', '2019'), ('month', 'October'), ('day', '10'), ('note', 'Early Access'), ('publisher', 'MIT Press'), ('url', '/files/lensen2019genetic.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('lensen2019multi', Entry('article', fields=[('title', 'Multi-Objective Genetic Programming for Manifold Learning: Balancing Quality and Dimensionality'), ('journal', 'Genetic Programming and Evolvable Machines'), ('volume', '21'), ('pages', '399--431'), ('year', '2020'), ('url', '/files/lensen2019multi.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Zhang, Mengjie'), Person('Xue, Bing')])]))), ('lensen2020genetic', Entry('article', fields=[('title', 'Genetic Programming for Evolving a Front of Interpretable Models for Data Visualisation'), ('journal', '{IEEE} Trans. Cybernetics'), ('volume', '0'), ('pages', '1--15'), ('year', '2020'), ('month', 'February'), ('note', 'Early Access'), ('publisher', 'IEEE'), ('url', '/files/lensen2020genetic.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Lensen, Andrew'), Person('Xue, Bing'), Person('Zhang, Mengjie')])]))), ('schofield2020evolving', Entry('inproceedings', fields=[('title', 'Evolving Simpler Constructed Features for Clustering Problems with Genetic Programming'), ('booktitle', 'Proceedings of the {IEEE} Congress on Evolutionary Computation, {CEC}'), ('year', '2020'), ('month', 'July'), ('day', '19'), ('publisher', '{IEEE}'), ('pages', '1--8'), ('url', '/files/schofield2020evolving.pdf')], persons=OrderedCaseInsensitiveDict([('author', [Person('Schofield, Finn'), Person('Lensen, Andrew')])])))]), preamble=[])