diff --git a/.DS_Store b/.DS_Store index 1fd6ba5dc70e4..e5fc085b6ec67 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/_pages/.DS_Store b/_pages/.DS_Store index 5c3e0ec3702a2..50df098058040 100644 Binary files a/_pages/.DS_Store and b/_pages/.DS_Store differ diff --git a/_pages/sitemap.md b/_pages/sitemap.md deleted file mode 100644 index 0525daf0f6c97..0000000000000 --- a/_pages/sitemap.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -layout: archive -title: "Sitemap" -permalink: /sitemap/ -author_profile: true ---- - -{% include base_path %} - -A list of all the posts and pages found on the site. For you robots out there is an [XML version]({{ base_path }}/sitemap.xml) available for digesting as well. - -
This map is generated from a Jupyter Notebook file in /_talks/talkmap.ipynb, which mines the location fields in the .md files in _talks/.
- diff --git a/markdown_generator/PubsFromBib.ipynb b/markdown_generator/PubsFromBib.ipynb deleted file mode 100644 index df898a7128007..0000000000000 --- a/markdown_generator/PubsFromBib.ipynb +++ /dev/null @@ -1,223 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Publications markdown generator for academicpages\n", - "\n", - "Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). \n", - "\n", - "The core python code is also in `pubsFromBibs.py`. \n", - "Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:\n", - "* bib file names\n", - "* specific venue keys based on your bib file preferences\n", - "* any specific pre-text for specific files\n", - "* Collection Name (future feature)\n", - "\n", - "TODO: Make this work with other databases of citations, \n", - "TODO: Merge this with the existing TSV parsing solution" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybtex.database.input import bibtex\n", - "import pybtex.database.input.bibtex \n", - "from time import strptime\n", - "import string\n", - "import html\n", - "import os\n", - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#todo: incorporate different collection types rather than a catch all publications, requires other changes to template\n", - "publist = {\n", - " \"proceeding\": {\n", - " \"file\" : \"proceedings.bib\",\n", - " \"venuekey\": \"booktitle\",\n", - " \"venue-pretext\": \"In the proceedings of \",\n", - " \"collection\" : {\"name\":\"publications\",\n", - " \"permalink\":\"/publication/\"}\n", - " \n", - " },\n", - " \"journal\":{\n", - " \"file\": \"pubs.bib\",\n", - " \"venuekey\" : \"journal\",\n", - " \"venue-pretext\" : \"\",\n", - " \"collection\" : {\"name\":\"publications\",\n", - " \"permalink\":\"/publication/\"}\n", - " } \n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "html_escape_table = {\n", - " \"&\": \"&\",\n", - " '\"': \""\",\n", - " \"'\": \"'\"\n", - " }\n", - "\n", - "def html_escape(text):\n", - " \"\"\"Produce entities within text.\"\"\"\n", - " return \"\".join(html_escape_table.get(c,c) for c in text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "for pubsource in publist:\n", - " parser = bibtex.Parser()\n", - " bibdata = parser.parse_file(publist[pubsource][\"file\"])\n", - "\n", - " #loop through the individual references in a given bibtex file\n", - " for bib_id in bibdata.entries:\n", - " #reset default date\n", - " pub_year = \"1900\"\n", - " pub_month = \"01\"\n", - " pub_day = \"01\"\n", - " \n", - " b = bibdata.entries[bib_id].fields\n", - " \n", - " try:\n", - " pub_year = f'{b[\"year\"]}'\n", - "\n", - " #todo: this hack for month and day needs some cleanup\n", - " if \"month\" in b.keys(): \n", - " if(len(b[\"month\"])<3):\n", - " pub_month = \"0\"+b[\"month\"]\n", - " pub_month = pub_month[-2:]\n", - " elif(b[\"month\"] not in range(12)):\n", - " tmnth = strptime(b[\"month\"][:3],'%b').tm_mon \n", - " pub_month = \"{:02d}\".format(tmnth) \n", - " else:\n", - " pub_month = str(b[\"month\"])\n", - " if \"day\" in b.keys(): \n", - " pub_day = str(b[\"day\"])\n", - "\n", - " \n", - " pub_date = pub_year+\"-\"+pub_month+\"-\"+pub_day\n", - " \n", - " #strip out {} as needed (some bibtex entries that maintain formatting)\n", - " clean_title = b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\").replace(\" \",\"-\") \n", - "\n", - " url_slug = re.sub(\"\\\\[.*\\\\]|[^a-zA-Z0-9_-]\", \"\", clean_title)\n", - " url_slug = url_slug.replace(\"--\",\"-\")\n", - "\n", - " md_filename = (str(pub_date) + \"-\" + url_slug + \".md\").replace(\"--\",\"-\")\n", - " html_filename = (str(pub_date) + \"-\" + url_slug).replace(\"--\",\"-\")\n", - "\n", - " #Build Citation from text\n", - " citation = \"\"\n", - "\n", - " #citation authors - todo - add highlighting for primary author?\n", - " for author in bibdata.entries[bib_id].persons[\"author\"]:\n", - " citation = citation+\" \"+author.first_names[0]+\" \"+author.last_names[0]+\", \"\n", - "\n", - " #citation title\n", - " citation = citation + \"\\\"\" + html_escape(b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")) + \".\\\"\"\n", - "\n", - " #add venue logic depending on citation type\n", - " venue = publist[pubsource][\"venue-pretext\"]+b[publist[pubsource][\"venuekey\"]].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")\n", - "\n", - " citation = citation + \" \" + html_escape(venue)\n", - " citation = citation + \", \" + pub_year + \".\"\n", - "\n", - " \n", - " ## YAML variables\n", - " md = \"---\\ntitle: \\\"\" + html_escape(b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")) + '\"\\n'\n", - " \n", - " md += \"\"\"collection: \"\"\" + publist[pubsource][\"collection\"][\"name\"]\n", - "\n", - " md += \"\"\"\\npermalink: \"\"\" + publist[pubsource][\"collection\"][\"permalink\"] + html_filename\n", - " \n", - " note = False\n", - " if \"note\" in b.keys():\n", - " if len(str(b[\"note\"])) > 5:\n", - " md += \"\\nexcerpt: '\" + html_escape(b[\"note\"]) + \"'\"\n", - " note = True\n", - "\n", - " md += \"\\ndate: \" + str(pub_date) \n", - "\n", - " md += \"\\nvenue: '\" + html_escape(venue) + \"'\"\n", - " \n", - " url = False\n", - " if \"url\" in b.keys():\n", - " if len(str(b[\"url\"])) > 5:\n", - " md += \"\\npaperurl: '\" + b[\"url\"] + \"'\"\n", - " url = True\n", - "\n", - " md += \"\\ncitation: '\" + html_escape(citation) + \"'\"\n", - "\n", - " md += \"\\n---\"\n", - "\n", - " \n", - " ## Markdown description for individual page\n", - " if note:\n", - " md += \"\\n\" + html_escape(b[\"note\"]) + \"\\n\"\n", - "\n", - " if url:\n", - " md += \"\\n[Access paper here](\" + b[\"url\"] + \"){:target=\\\"_blank\\\"}\\n\" \n", - " else:\n", - " md += \"\\nUse [Google Scholar](https://scholar.google.com/scholar?q=\"+html.escape(clean_title.replace(\"-\",\"+\"))+\"){:target=\\\"_blank\\\"} for full citation\"\n", - "\n", - " md_filename = os.path.basename(md_filename)\n", - "\n", - " with open(\"../_publications/\" + md_filename, 'w') as f:\n", - " f.write(md)\n", - " print(f'SUCESSFULLY PARSED {bib_id}: \\\"', b[\"title\"][:60],\"...\"*(len(b['title'])>60),\"\\\"\")\n", - " # field may not exist for a reference\n", - " except KeyError as e:\n", - " print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \\\"', b[\"title\"][:30],\"...\"*(len(b['title'])>30),\"\\\"\")\n", - " continue\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/markdown_generator/publications.ipynb b/markdown_generator/publications.ipynb deleted file mode 100644 index 8657e1009b7b6..0000000000000 --- a/markdown_generator/publications.ipynb +++ /dev/null @@ -1,371 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "# Publications markdown generator for academicpages\n", - "\n", - "Takes a TSV of publications with metadata and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). The core python code is also in `publications.py`. Run either from the `markdown_generator` folder after replacing `publications.tsv` with one containing your data.\n", - "\n", - "TODO: Make this work with BibTex and other databases of citations, rather than Stuart's non-standard TSV format and citation style.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data format\n", - "\n", - "The TSV needs to have the following columns: pub_date, title, venue, excerpt, citation, site_url, and paper_url, with a header at the top. \n", - "\n", - "- `excerpt` and `paper_url` can be blank, but the others must have values. \n", - "- `pub_date` must be formatted as YYYY-MM-DD.\n", - "- `url_slug` will be the descriptive part of the .md file and the permalink URL for the page about the paper. The .md file will be `YYYY-MM-DD-[url_slug].md` and the permalink will be `https://[yourdomain]/publications/YYYY-MM-DD-[url_slug]`\n", - "\n", - "This is how the raw file looks (it doesn't look pretty, use a spreadsheet or other program to edit and create)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pub_date\ttitle\tvenue\texcerpt\tcitation\turl_slug\tpaper_url\r\n", - "2009-10-01\tPaper Title Number 1\tJournal 1\tThis paper is about the number 1. The number 2 is left for future work.\tYour Name, You. (2009). \"Paper Title Number 1.\" Journal 1. 1(1).\tpaper-title-number-1\thttp://academicpages.github.io/files/paper1.pdf\r\n", - "2010-10-01\tPaper Title Number 2\tJournal 1\tThis paper is about the number 2. The number 3 is left for future work.\tYour Name, You. (2010). \"Paper Title Number 2.\" Journal 1. 1(2).\tpaper-title-number-2\thttp://academicpages.github.io/files/paper2.pdf\r\n", - "2015-10-01\tPaper Title Number 3\tJournal 1\tThis paper is about the number 3. The number 4 is left for future work.\tYour Name, You. (2015). \"Paper Title Number 3.\" Journal 1. 1(3).\tpaper-title-number-3\thttp://academicpages.github.io/files/paper3.pdf" - ] - } - ], - "source": [ - "!cat publications.tsv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import pandas\n", - "\n", - "We are using the very handy pandas library for dataframes." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import TSV\n", - "\n", - "Pandas makes this easy with the read_csv function. We are using a TSV, so we specify the separator as a tab, or `\\t`.\n", - "\n", - "I found it important to put this data in a tab-separated values format, because there are a lot of commas in this kind of data and comma-separated values can get messed up. However, you can modify the import statement, as pandas also has read_excel(), read_json(), and others." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " | pub_date | \n", - "title | \n", - "venue | \n", - "excerpt | \n", - "citation | \n", - "url_slug | \n", - "paper_url | \n", - "
---|---|---|---|---|---|---|---|
0 | \n", - "2009-10-01 | \n", - "Paper Title Number 1 | \n", - "Journal 1 | \n", - "This paper is about the number 1. The number 2... | \n", - "Your Name, You. (2009). \"Paper Title Number 1.... | \n", - "paper-title-number-1 | \n", - "http://academicpages.github.io/files/paper1.pdf | \n", - "
1 | \n", - "2010-10-01 | \n", - "Paper Title Number 2 | \n", - "Journal 1 | \n", - "This paper is about the number 2. The number 3... | \n", - "Your Name, You. (2010). \"Paper Title Number 2.... | \n", - "paper-title-number-2 | \n", - "http://academicpages.github.io/files/paper2.pdf | \n", - "
2 | \n", - "2015-10-01 | \n", - "Paper Title Number 3 | \n", - "Journal 1 | \n", - "This paper is about the number 3. The number 4... | \n", - "Your Name, You. (2015). \"Paper Title Number 3.... | \n", - "paper-title-number-3 | \n", - "http://academicpages.github.io/files/paper3.pdf | \n", - "
\n", - " | title | \n", - "type | \n", - "url_slug | \n", - "venue | \n", - "date | \n", - "location | \n", - "talk_url | \n", - "description | \n", - "
---|---|---|---|---|---|---|---|---|
0 | \n", - "Talk 1 on Relevant Topic in Your Field | \n", - "Talk | \n", - "talk-1 | \n", - "UC San Francisco, Department of Testing | \n", - "2012-03-01 | \n", - "San Francisco, California | \n", - "NaN | \n", - "This is a description of your talk, which is a... | \n", - "
1 | \n", - "Tutorial 1 on Relevant Topic in Your Field | \n", - "Tutorial | \n", - "tutorial-1 | \n", - "UC-Berkeley Institute for Testing Science | \n", - "2013-03-01 | \n", - "Berkeley CA, USA | \n", - "http://exampleurl.com | \n", - "This is a description of your tutorial, note t... | \n", - "
2 | \n", - "Talk 2 on Relevant Topic in Your Field | \n", - "Talk | \n", - "talk-2 | \n", - "London School of Testing | \n", - "2014-02-01 | \n", - "London, UK | \n", - "http://example2.com | \n", - "This is a description of your talk, which is a... | \n", - "
3 | \n", - "Conference Proceeding talk 3 on Relevant Topic... | \n", - "Conference proceedings talk | \n", - "talk-3 | \n", - "Testing Institute of America 2014 Annual Confe... | \n", - "2014-03-01 | \n", - "Los Angeles, CA | \n", - "NaN | \n", - "This is a description of your conference proce... | \n", - "