rename_refs.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Rename all BibTeX keys using JabRef"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Use JabRef, open the bib file -> Quality -> Autogenerate BibTeX keys.\n",
    "* Commit the changes.\n",
    "* `git diff @~1 master > changes`\n",
    "* Use Sublime to get the dict below"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  Supercurrent orbitalfield\n",
    "replace = {\n",
    "    \"Gramacy2004\": \"gramacy2004parameter\",\n",
    "    \"Visvalingam1990\": \"visvalingam1990douglas\",\n",
    "    \"DeRose1998\": \"derose1998subdivision\",\n",
    "    \"Alliez2003\": \"alliez2003anisotropic\",\n",
    "    \"Nijholt2019\": \"Nijholt2019a\",\n",
    "    \"WolframResearch\": \"Mathematica\",\n",
    "    \"Nijholt\": \"adaptive_docs\",\n",
    "    \"Vuik2018\": \"vuik2018reproducing\",\n",
    "    \"Laeven2019\": \"laeven2019enhanced\",\n",
    "    \"Bommer2019\": \"bommer2019spin\",\n",
    "    \"Melo2019\": \"melo2019supercurrent\",\n",
    "    \"Chen2017\": \"chen2017intelligent\",\n",
    "    \"Takhtaganov2018\": \"takhtaganov2018adaptive\",\n",
    "    \"Emery1998\": \"emery1998optimal\",\n",
    "    \"Gonnet2010\": \"gonnet2010increasing\",\n",
    "    \"Galassi1996\": \"galassi1996gnu\",\n",
    "    \"Klein1999\": \"klein1999star\",\n",
    "    \"Berger1989\": \"berger1989local\",\n",
    "    \"Berger1984\": \"berger1984adaptive\",\n",
    "    \"Nijholt2016\": \"nijholt2016orbital\",\n",
    "    \"Dyn1990\": \"dyn1990data\",\n",
    "    \"Clenshaw1960\": \"clenshaw1960method\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fname = \"paper.md\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(fname, 'r') as f:\n",
    "    text = f.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = ''.join(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for old, new in replace.items():\n",
    "    text = text.replace(new, old)\n",
    "\n",
    "print(text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# bibtex to yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "def doi2bib(doi):\n",
    "    \"\"\"Return a bibTeX string of metadata for a given DOI.\"\"\"\n",
    "    url = \"http://dx.doi.org/\" + doi\n",
    "    headers = {\"accept\": \"application/x-bibtex\"}\n",
    "    r = requests.get(url, headers=headers)\n",
    "    return r.text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create the yaml files and `not_on_crossref.bib` files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "\n",
    "bibs = [f for f in glob.glob('*bib') if 'not_on_crossref' not in f]\n",
    "bibs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Go over the above bib files and fix the DOI problems and then create the `yaml`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fname = 'paper.bib'\n",
    "yamls = []\n",
    "folder, name = os.path.split(fname)\n",
    "new = os.path.join(folder, \"tmp.yaml\")\n",
    "yamls.append(new)\n",
    "cmd = f\"pandoc-citeproc --bib2yaml {fname} > {new}\"\n",
    "print(cmd)\n",
    "os.system(cmd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "\n",
    "with open(new, 'r') as f:\n",
    "    try:\n",
    "        bibs = yaml.safe_load(f)\n",
    "    except yaml.YAMLError as exc:\n",
    "        print(exc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = '@article{'\n",
    "entries = {}\n",
    "for d in bibs['references']:\n",
    "    key = d['id']\n",
    "    doi = d.get('DOI')\n",
    "    if doi is None:\n",
    "        bib = None\n",
    "        by_hand = True\n",
    "    else:\n",
    "        bib = doi2bib(doi)\n",
    "        if not bib.startswith(\"@\"):\n",
    "            bib = \"MANUALLY_ADD\"\n",
    "            by_hand = True\n",
    "        else:\n",
    "            by_hand = False\n",
    "    entries[key] = dict(doi=doi, bib=bib, by_hand=by_hand)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k, d in entries.items():\n",
    "    if d['bib'] == \"MANUALLY_ADD\":\n",
    "        print(k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k, d in entries.items():\n",
    "    if d['bib'] is None:\n",
    "        print(k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "entries['Nijholt2016']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def replace_key(key, bib_entry):\n",
    "    bib_type, *_ = bib_entry.split('{')\n",
    "    _, *rest = bib_entry.split(',')\n",
    "    rest = ','.join(rest)\n",
    "    result = bib_type + '{' + key + ',' + rest\n",
    "\n",
    "    # XXX: I am not sure whether these substitutions are needed.\n",
    "    # the problem seemed to be the utf-8 `requests.get` encoding.\n",
    "    to_replace = [(\"ö\", r\"\\\"{o}\"), (\"ü\", r\"\\\"{u}\"), (\"ë\", r\"\\\"{e}\"), (\"ï\", r\"\\\"{i}\") ]\n",
    "    for old, new in to_replace:\n",
    "        result = result.replace(old.upper(), new.upper())\n",
    "        result = result.replace(old.lower(), new.lower())\n",
    "\n",
    "    print(result, \"\\n\")\n",
    "    return result\n",
    "\n",
    "entries = dict(sorted(entries.items()))\n",
    "for key, d in entries.items():\n",
    "    if d['bib'] is not None and d['bib'].startswith(\"@\"):\n",
    "        replace_key(key, d['bib'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import parse\n",
    "bib = entries[\"Nijholt2019\"][\"bib\"]\n",
    "bib_type, *_ = bib.split('{')\n",
    "_, *rest = bib.split(',')\n",
    "rest = ','.join(rest)\n",
    "new_bib = bib_type + '{' + \"new\" + ',' + rest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(new_bib)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Try to fix the above entries with the correct DOI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from toolz.dicttoolz import dissoc\n",
    "\n",
    "with open(fname[:-3] + \"yaml\", 'w') as f:\n",
    "    data = {k: dissoc(v, \"bib\") for k, v in entries.items()}\n",
    "    yaml.dump(data, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Go from yamls to bib files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check for double entries and fix them! "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "\n",
    "bibs = [f for f in glob.glob('*/*yaml') if 'tmp.yaml' not in f]\n",
    "bibs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "mapping = {}\n",
    "for fname in bibs:\n",
    "    with open(fname) as f:\n",
    "        mapping[fname] = yaml.safe_load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "items = defaultdict(list)\n",
    "for fname, info in mapping.items():\n",
    "    for k, v in info.items():\n",
    "        if v is not None:\n",
    "            v = tuple(v.items())\n",
    "        items[k].append(v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k, v in items.items():\n",
    "    if len(set(v)) >= 2:\n",
    "        print(k, v)\n",
    "        print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When there are no more entries above here, go to the next step"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Combine everything into one yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# see create_bib_file.py"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}