diff --git a/figures.ipynb b/figures.ipynb
index a296429f830293cbfcd3653e34f92ebe7d5ce0c3..eee3291227390e2be97cf16175f95786caa4336b 100644
--- a/figures.ipynb
+++ b/figures.ipynb
@@ -51,11 +51,98 @@
     "import matplotlib.tri as mtri"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1D funcs\n",
+    "def f(x, offset=0.123):\n",
+    "    a = 0.02\n",
+    "    return x + a ** 2 / (a ** 2 + (x - offset) ** 2)\n",
+    "\n",
+    "\n",
+    "def g(x):\n",
+    "    return np.tanh(x * 40)\n",
+    "\n",
+    "\n",
+    "def h(x):\n",
+    "    return np.sin(100 * x) * np.exp(-x ** 2 / 0.1 ** 2)\n",
+    "\n",
+    "\n",
+    "funcs_1D = [\n",
+    "    dict(function=f, bounds=(-1, 1), title=\"peak\"),\n",
+    "    dict(function=g, bounds=(-1, 1), title=\"tanh\"),\n",
+    "    dict(function=h, bounds=(-0.3, 0.3), title=\"wave packet\"),\n",
+    "]\n",
+    "\n",
+    "# 2D funcs\n",
+    "def ring(xy, offset=0.123):\n",
+    "    a = 0.2\n",
+    "    x, y = xy\n",
+    "    return x + np.exp(-(x ** 2 + y ** 2 - 0.75 ** 2) ** 2 / a ** 4)\n",
+    "\n",
+    "\n",
+    "@functools.lru_cache()\n",
+    "def phase_diagram_setup(fname):\n",
+    "    data = adaptive.utils.load(fname)\n",
+    "    points = np.array(list(data.keys()))\n",
+    "    values = np.array(list(data.values()), dtype=float)\n",
+    "    bounds = [\n",
+    "        (points[:, 0].min(), points[:, 0].max()),\n",
+    "        (points[:, 1].min(), points[:, 1].max()),\n",
+    "    ]\n",
+    "    ll, ur = np.reshape(bounds, (2, 2)).T\n",
+    "    inds = np.all(np.logical_and(ll <= points, points <= ur), axis=1)\n",
+    "    points, values = points[inds], values[inds].reshape(-1, 1)\n",
+    "    return interpolate.LinearNDInterpolator(points, values), bounds\n",
+    "\n",
+    "\n",
+    "def phase_diagram(xy, fname):\n",
+    "    ip, _ = phase_diagram_setup(fname)\n",
+    "    return np.round(ip(xy))\n",
+    "\n",
+    "\n",
+    "def density(x, eps=0):\n",
+    "    e = [0.8, 0.2]\n",
+    "    delta = [0.5, 0.5, 0.5]\n",
+    "    c = 3\n",
+    "    omega = [0.02, 0.05]\n",
+    "\n",
+    "    H = np.array(\n",
+    "        [\n",
+    "            [e[0] + 1j * omega[0], delta[0], delta[1]],\n",
+    "            [delta[0], e[1] + c * x + 1j * omega[1], delta[1]],\n",
+    "            [delta[1], delta[2], e[1] - c * x + 1j * omega[1]],\n",
+    "        ]\n",
+    "    )\n",
+    "    H += np.eye(3) * eps\n",
+    "    return np.trace(np.linalg.inv(H)).imag\n",
+    "\n",
+    "\n",
+    "def level_crossing(xy):\n",
+    "    x, y = xy\n",
+    "    return density(x, y) + y\n",
+    "\n",
+    "\n",
+    "funcs_2D = [\n",
+    "    dict(function=ring, bounds=[(-1, 1), (-1, 1)], npoints=33),\n",
+    "    dict(\n",
+    "        function=phase_diagram,\n",
+    "        bounds=phase_diagram_setup(\"phase_diagram.pickle\")[1],\n",
+    "        npoints=100,\n",
+    "        fname=\"phase_diagram.pickle\",\n",
+    "    ),\n",
+    "    dict(function=level_crossing, bounds=[(-1, 1), (-3, 3)], npoints=50),\n",
+    "]"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Fig 1."
+    "# loss_1D"
    ]
   },
   {
@@ -75,7 +162,7 @@
     "fig, ax = plt.subplots(figsize=fig_size)\n",
     "ax.scatter(xs, ys, c=\"k\")\n",
     "ax.plot(xs, ys, c=\"k\")\n",
-    "# ax.scatter()\n",
+    "\n",
     "ax.annotate(\n",
     "    s=r\"$L_{1,2} = \\sqrt{\\Delta x^2 + \\Delta y^2}$\",\n",
     "    xy=(np.mean([xs[0], xs[1]]), np.mean([ys[0], ys[1]])),\n",
@@ -113,7 +200,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Fig 2."
+    "# adaptive_vs_grid"
    ]
   },
   {
@@ -122,24 +209,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def f(x, offset=0.123):\n",
-    "    a = 0.02\n",
-    "    return x + a ** 2 / (a ** 2 + (x - offset) ** 2)\n",
-    "\n",
-    "\n",
-    "def g(x):\n",
-    "    return np.tanh(x * 40)\n",
-    "\n",
-    "\n",
-    "def h(x):\n",
-    "    return np.sin(100 * x) * np.exp(-x ** 2 / 0.1 ** 2)\n",
-    "\n",
     "\n",
-    "funcs_1D = [\n",
-    "    dict(function=f, bounds=(-1, 1), title=\"peak\"),\n",
-    "    dict(function=g, bounds=(-1, 1), title=\"tanh\"),\n",
-    "    dict(function=h, bounds=(-0.3, 0.3), title=\"wave packet\"),\n",
-    "]\n",
     "fig, axs = plt.subplots(2, len(funcs_1D), figsize=(fig_width, 1.5 * fig_height))\n",
     "n_points = 50\n",
     "for i, ax in enumerate(axs.T.flatten()):\n",
@@ -168,14 +238,14 @@
     "\n",
     "axs[0][0].set_ylabel(r\"$\\textrm{homogeneous}$\")\n",
     "axs[1][0].set_ylabel(r\"$\\textrm{adaptive}$\")\n",
-    "plt.savefig(\"figures/adaptive_vs_grid.pdf\", bbox_inches=\"tight\", transparent=True)"
+    "plt.savefig(\"figures/Learner1D.pdf\", bbox_inches=\"tight\", transparent=True)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Fig 3. "
+    "# adaptive_2D"
    ]
   },
   {
@@ -184,66 +254,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "def f(xy, offset=0.123):\n",
-    "    a = 0.2\n",
-    "    x, y = xy\n",
-    "    return x + np.exp(-(x ** 2 + y ** 2 - 0.75 ** 2) ** 2 / a ** 4)\n",
-    "\n",
-    "\n",
-    "@functools.lru_cache()\n",
-    "def g_setup(fname):\n",
-    "    data = adaptive.utils.load(fname)\n",
-    "    points = np.array(list(data.keys()))\n",
-    "    values = np.array(list(data.values()), dtype=float)\n",
-    "    bounds = [\n",
-    "        (points[:, 0].min(), points[:, 0].max()),\n",
-    "        (points[:, 1].min(), points[:, 1].max()),\n",
-    "    ]\n",
-    "    ll, ur = np.reshape(bounds, (2, 2)).T\n",
-    "    inds = np.all(np.logical_and(ll <= points, points <= ur), axis=1)\n",
-    "    points, values = points[inds], values[inds].reshape(-1, 1)\n",
-    "    return interpolate.LinearNDInterpolator(points, values), bounds\n",
-    "\n",
-    "\n",
-    "def g(xy, fname):\n",
-    "    ip, _ = g_setup(fname)\n",
-    "    return np.round(ip(xy))\n",
-    "\n",
-    "\n",
-    "def density(x, eps=0):\n",
-    "    e = [0.8, 0.2]\n",
-    "    delta = [0.5, 0.5, 0.5]\n",
-    "    c = 3\n",
-    "    omega = [0.02, 0.05]\n",
-    "\n",
-    "    H = np.array(\n",
-    "        [\n",
-    "            [e[0] + 1j * omega[0], delta[0], delta[1]],\n",
-    "            [delta[0], e[1] + c * x + 1j * omega[1], delta[1]],\n",
-    "            [delta[1], delta[2], e[1] - c * x + 1j * omega[1]],\n",
-    "        ]\n",
-    "    )\n",
-    "    H += np.eye(3) * eps\n",
-    "    return np.trace(np.linalg.inv(H)).imag\n",
-    "\n",
-    "\n",
-    "def h(xy):\n",
-    "    x, y = xy\n",
-    "    return density(x, y) + y\n",
-    "\n",
-    "\n",
-    "funcs = [\n",
-    "    dict(function=f, bounds=[(-1, 1), (-1, 1)], npoints=33),\n",
-    "    dict(\n",
-    "        function=g,\n",
-    "        bounds=g_setup(\"phase_diagram.pickle\")[1],\n",
-    "        npoints=100,\n",
-    "        fname=\"phase_diagram.pickle\",\n",
-    "    ),\n",
-    "    dict(function=h, bounds=[(-1, 1), (-3, 3)], npoints=50),\n",
-    "]\n",
-    "fig, axs = plt.subplots(2, len(funcs), figsize=(fig_width, 1.5 * fig_height))\n",
+    "fig, axs = plt.subplots(2, len(funcs_2D), figsize=(fig_width, 1.5 * fig_height))\n",
     "\n",
     "plt.subplots_adjust(hspace=-0.1, wspace=0.1)\n",
     "\n",
@@ -262,7 +273,7 @@
     "    ax.xaxis.set_ticks([])\n",
     "    ax.yaxis.set_ticks([])\n",
     "    kind = \"homogeneous\" if i % 2 == 0 else \"adaptive\"\n",
-    "    d = funcs[i // 2] if kind == \"homogeneous\" else funcs[(i - 1) // 2]\n",
+    "    d = funcs_2D[i // 2] if kind == \"homogeneous\" else funcs_2D[(i - 1) // 2]\n",
     "    bounds = d[\"bounds\"]\n",
     "    npoints = d[\"npoints\"]\n",
     "    f = d[\"function\"]\n",
@@ -302,7 +313,7 @@
     "\n",
     "axs[0][0].set_ylabel(r\"$\\textrm{homogeneous}$\")\n",
     "axs[1][0].set_ylabel(r\"$\\textrm{adaptive}$\")\n",
-    "plt.savefig(\"figures/adaptive_2D.pdf\", bbox_inches=\"tight\", transparent=True)"
+    "plt.savefig(\"figures/Learner2D.pdf\", bbox_inches=\"tight\", transparent=True)"
    ]
   },
   {
@@ -467,7 +478,7 @@
     "    ncol=1,\n",
     ")\n",
     "\n",
-    "# On grid\n",
+    "# On grid, uncomment for homogeneous plot\n",
     "# axs.plot(learner.bounds, [-(i + 0.5) * step, -(i + 0.5) * step], c='k', ls='--')\n",
     "# xs_hom = np.linspace(*learner.bounds, i)\n",
     "# ys_hom = f(xs_hom) - (i + 3) * step\n",
@@ -573,27 +584,6 @@
    "source": [
     "import scipy\n",
     "\n",
-    "\n",
-    "def f(x, offset=0.123):\n",
-    "    a = 0.02\n",
-    "    return x + a ** 2 / (a ** 2 + (x - offset) ** 2)\n",
-    "\n",
-    "\n",
-    "def g(x):\n",
-    "    return np.tanh(x * 40)\n",
-    "\n",
-    "\n",
-    "def h(x):\n",
-    "    return np.sin(100 * x) * np.exp(-x ** 2 / 0.1 ** 2)\n",
-    "\n",
-    "\n",
-    "funcs_1D = [\n",
-    "    dict(function=f, bounds=(-1, 1), title=\"peak\"),\n",
-    "    dict(function=g, bounds=(-1, 1), title=\"tanh\"),\n",
-    "    dict(function=h, bounds=(-0.3, 0.3), title=\"wave packet\"),\n",
-    "]\n",
-    "\n",
-    "\n",
     "def err(xs, ys, xs_rand):\n",
     "    ip = scipy.interpolate.interp1d(xs, ys)\n",
     "    abserr = np.abs(ip(xs_rand) - f(xs_rand))\n",
@@ -784,7 +774,7 @@
     "    return x ** 2 + y ** 3\n",
     "\n",
     "bounds = [(-1, 1), (-1, 1)]\n",
-    "npoints = 300\n",
+    "npoints = 17**2\n",
     "\n",
     "def isoline_loss_function(y_iso, sigma, priority=1):\n",
     "    from adaptive.learner.learnerND import default_loss\n",
@@ -833,13 +823,15 @@
     "\n",
     "    if with_tri:\n",
     "        xy = np.array([learner.tri.get_vertices(s)\n",
-    "                           for s in learner.tri.simplices]).reshape(-1, 2)\n",
-    "        triang = mtri.Triangulation(*xy.T)\n",
+    "                           for s in learner.tri.simplices])\n",
+    "        print(f'{len(xy)} triangles')\n",
+    "        triang = mtri.Triangulation(*xy.reshape(-1, 2).T)\n",
     "        ax.triplot(triang, c=\"w\", lw=0.2, alpha=0.8)\n",
     "\n",
     "    # Isolines\n",
     "    vertices, lines = learner._get_iso(level, which='line')\n",
     "    paths = np.array([[vertices[i], vertices[j]] for i, j in lines]).T\n",
+    "    print('{} line segments'.format(len(paths.T)))\n",
     "    ax.plot(*paths, c='k')\n",
     "\n",
     "    values = np.array(list(learner.data.values()))\n",
diff --git a/figures/adaptive_vs_grid.pdf b/figures/Learner1D.pdf
similarity index 100%
rename from figures/adaptive_vs_grid.pdf
rename to figures/Learner1D.pdf
diff --git a/figures/adaptive_2D.pdf b/figures/Learner2D.pdf
similarity index 100%
rename from figures/adaptive_2D.pdf
rename to figures/Learner2D.pdf
diff --git a/paper.md b/paper.md
index e44df2a5dae5126f1beec060fe1413fb75eed837..effba5e26bebcbe5e2b85b5114e45783483e7936 100755
--- a/paper.md
+++ b/paper.md
@@ -61,14 +61,14 @@ The most significant advantage of these *local* algorithms is that they allow fo
 
 ![Comparison of homogeneous sampling (top) with adaptive sampling (bottom) for different one-dimensional functions (red) where the number of points in each column is identical.
 We see that when the function has a distinct feature---such as with the peak and tanh---adaptive sampling performs much better.
-When the features are homogeneously spaced, such as with the wave packet, adaptive sampling is not as effective as in the other cases.](figures/adaptive_vs_grid.pdf){#fig:adaptive_vs_grid}
+When the features are homogeneously spaced, such as with the wave packet, adaptive sampling is not as effective as in the other cases.](figures/Learner1D.pdf){#fig:Learner1D}
 
 ![Comparison of homogeneous sampling (top) with adaptive sampling (bottom) for different two-dimensional functions where the number of points in each column is identical.
 On the left is a circle with a linear background $x + a ^ 2 / (a ^ 2 + (x - x_\textrm{offset}) ^ 2)$.
 In the middle a topological phase diagram from [\onlinecite{nijholt2016orbital}] its function can be -1 or 1, which indicate the presence or abscence of a Majorana bound state.
 On the right we plot level crossings for a two level quantum system.
 In all cases using Adaptive results in a better plot.
-](figures/adaptive_2D.pdf){#fig:adaptive_2D}
+](figures/Learner2D.pdf){#fig:Learner2D}
 
 
 #### We provide a reference implementation, the Adaptive package, and demonstrate its performance.
@@ -133,7 +133,7 @@ The amortized complexity of the point suggestion algorithm is, therefore, $\math
 An example of such a loss function for a one-dimensional function is the interpoint distance.
 This loss will suggest to sample a point in the middle of an interval with the largest Euclidean distance and thereby ensure the continuity of the function.
 A more complex loss function that also takes the first neighbouring intervals into account is one that adds more points where the second derivative (or curvature) is the highest.
-Figure @fig:adaptive_vs_grid shows a comparison between a result using this loss and a function that is sampled on a grid.
+Figure @fig:Learner1D shows a comparison between a result using this loss and a function that is sampled on a grid.
 
 #### With many points, due to the loss being local, parallel sampling incurs no additional cost.
 So far, the description of the general algorithm did not include parallelism.
@@ -218,7 +218,7 @@ $$
 $$
 This error approaches zero as the approximation becomes better.
 
-![The $L^{1}$-norm error as a function of number of points $N$ for the functions in Fig. @fig:adaptive_vs_grid (a,b,c).
+![The $L^{1}$-norm error as a function of number of points $N$ for the functions in Fig. @fig:Learner1D (a,b,c).
 The interrupted lines correspond to homogeneous sampling and the solid line to the sampling with the line loss.
 In all cases adaptive sampling performs better, where the error is a factor 1.6-20 lower for $N=10000$.
 ](figures/line_loss_error.pdf){#fig:line_loss_error}
@@ -232,11 +232,12 @@ Here, we see that for homogeneous sampling to get the same error as sampling wit
 
 ## isoline and isosurface sampling
 We can find isolines or isosurfaces using a loss function that prioritizes intervals that are closer to the function values that we are interested in.
-See @fig:isoline.
+See Fig. @fig:isoline.
 
-![Comparison of isoline sampling of $f(x,y)=x^2 + y^3$ at $f(x,y)=0.1$ using homogeneous sampling (left) and adaptive sampling (right).
+![Comparison of isoline sampling of $f(x,y)=x^2 + y^3$ at $f(x,y)=0.1$ using homogeneous sampling (left) and adaptive sampling (right) with the same amount of points $n=17^2=289$.
 We plot the function interpolated on a grid (color) with the triangulation on top (white) where the function is sampled on the vertices.
 The solid line (black) indicates the isoline at $f(x,y)=0.1$.
+The isoline in the homogeneous case consists of 62 line segments and the adaptive case consists of 147 line segments.
 ](figures/isoline.pdf){#fig:isoline}
 
 # Implementation and benchmarks