{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Rename all BibTeX keys using JabRef" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Use JabRef, open the bib file -> Quality -> Autogenerate BibTeX keys.\n", "* Commit the changes.\n", "* `git diff @~1 master > changes`\n", "* Use Sublime to get the dict below" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Supercurrent orbitalfield\n", "replace = {\n", " \"Gramacy2004\": \"gramacy2004parameter\",\n", " \"Visvalingam1990\": \"visvalingam1990douglas\",\n", " \"DeRose1998\": \"derose1998subdivision\",\n", " \"Alliez2003\": \"alliez2003anisotropic\",\n", " \"Nijholt2019\": \"Nijholt2019a\",\n", " \"WolframResearch\": \"Mathematica\",\n", " \"Nijholt\": \"adaptive_docs\",\n", " \"Vuik2018\": \"vuik2018reproducing\",\n", " \"Laeven2019\": \"laeven2019enhanced\",\n", " \"Bommer2019\": \"bommer2019spin\",\n", " \"Melo2019\": \"melo2019supercurrent\",\n", " \"Chen2017\": \"chen2017intelligent\",\n", " \"Takhtaganov2018\": \"takhtaganov2018adaptive\",\n", " \"Emery1998\": \"emery1998optimal\",\n", " \"Gonnet2010\": \"gonnet2010increasing\",\n", " \"Galassi1996\": \"galassi1996gnu\",\n", " \"Klein1999\": \"klein1999star\",\n", " \"Berger1989\": \"berger1989local\",\n", " \"Berger1984\": \"berger1984adaptive\",\n", " \"Nijholt2016\": \"nijholt2016orbital\",\n", " \"Dyn1990\": \"dyn1990data\",\n", " \"Clenshaw1960\": \"clenshaw1960method\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fname = \"paper.md\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(fname, 'r') as f:\n", " text = f.readlines()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "text = ''.join(text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for old, new in replace.items():\n", " text = text.replace(new, old)\n", "\n", "print(text)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# bibtex to yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "def doi2bib(doi):\n", " \"\"\"Return a bibTeX string of metadata for a given DOI.\"\"\"\n", " url = \"http://dx.doi.org/\" + doi\n", " headers = {\"accept\": \"application/x-bibtex\"}\n", " r = requests.get(url, headers=headers)\n", " return r.text" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create the yaml files and `not_on_crossref.bib` files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import glob\n", "\n", "bibs = [f for f in glob.glob('*bib') if 'not_on_crossref' not in f]\n", "bibs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Go over the above bib files and fix the DOI problems and then create the `yaml`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fname = 'paper.bib'\n", "yamls = []\n", "folder, name = os.path.split(fname)\n", "new = os.path.join(folder, \"tmp.yaml\")\n", "yamls.append(new)\n", "cmd = f\"pandoc-citeproc --bib2yaml {fname} > {new}\"\n", "print(cmd)\n", "os.system(cmd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import yaml\n", "\n", "with open(new, 'r') as f:\n", " try:\n", " bibs = yaml.safe_load(f)\n", " except yaml.YAMLError as exc:\n", " print(exc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "start = '@article{'\n", "entries = {}\n", "for d in bibs['references']:\n", " key = d['id']\n", " doi = d.get('DOI')\n", " if doi is None:\n", " bib = None\n", " by_hand = True\n", " else:\n", " bib = doi2bib(doi)\n", " if not bib.startswith(\"@\"):\n", " bib = \"MANUALLY_ADD\"\n", " by_hand = True\n", " else:\n", " by_hand = False\n", " entries[key] = dict(doi=doi, bib=bib, by_hand=by_hand)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for k, d in entries.items():\n", " if d['bib'] == \"MANUALLY_ADD\":\n", " print(k)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for k, d in entries.items():\n", " if d['bib'] is None:\n", " print(k)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "entries['Nijholt2016']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def replace_key(key, bib_entry):\n", " bib_type, *_ = bib_entry.split('{')\n", " _, *rest = bib_entry.split(',')\n", " rest = ','.join(rest)\n", " result = bib_type + '{' + key + ',' + rest\n", "\n", " # XXX: I am not sure whether these substitutions are needed.\n", " # the problem seemed to be the utf-8 `requests.get` encoding.\n", " to_replace = [(\"ö\", r\"\\\"{o}\"), (\"ü\", r\"\\\"{u}\"), (\"ë\", r\"\\\"{e}\"), (\"ï\", r\"\\\"{i}\") ]\n", " for old, new in to_replace:\n", " result = result.replace(old.upper(), new.upper())\n", " result = result.replace(old.lower(), new.lower())\n", "\n", " print(result, \"\\n\")\n", " return result\n", "\n", "entries = dict(sorted(entries.items()))\n", "for key, d in entries.items():\n", " if d['bib'] is not None and d['bib'].startswith(\"@\"):\n", " replace_key(key, d['bib'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import parse\n", "bib = entries[\"Nijholt2019\"][\"bib\"]\n", "bib_type, *_ = bib.split('{')\n", "_, *rest = bib.split(',')\n", "rest = ','.join(rest)\n", "new_bib = bib_type + '{' + \"new\" + ',' + rest" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(new_bib)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Try to fix the above entries with the correct DOI" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from toolz.dicttoolz import dissoc\n", "\n", "with open(fname[:-3] + \"yaml\", 'w') as f:\n", " data = {k: dissoc(v, \"bib\") for k, v in entries.items()}\n", " yaml.dump(data, f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Go from yamls to bib files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Check for double entries and fix them! " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import glob\n", "\n", "bibs = [f for f in glob.glob('*/*yaml') if 'tmp.yaml' not in f]\n", "bibs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import yaml\n", "mapping = {}\n", "for fname in bibs:\n", " with open(fname) as f:\n", " mapping[fname] = yaml.safe_load(f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from collections import defaultdict\n", "items = defaultdict(list)\n", "for fname, info in mapping.items():\n", " for k, v in info.items():\n", " if v is not None:\n", " v = tuple(v.items())\n", " items[k].append(v)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for k, v in items.items():\n", " if len(set(v)) >= 2:\n", " print(k, v)\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When there are no more entries above here, go to the next step" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine everything into one yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# see create_bib_file.py" ] } ], "metadata": { "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 2 }