author={Bas Nijholt and Joseph Weston and Jorn Hoofwijk and Anton Akhmerov},
title={python-adaptive/adaptive: version 0.7.3},
year={2019},
howpublished={Software on Zenodo},
note={Tools for the adaptive and parallel sampling of mathematical functions, available at http://dx.doi.org/10.5281/zenodo.1182437},
month=jan,
doi={10.5281/zenodo.1182437},
publisher={Zenodo},
@PhdThesis{Castro2008,
author={Castro, Rui M},
title={Active learning and adaptive sampling for non-parametric inference},
year={2008},
school={Rice University},
}
@Article{Vuik2018,
author={Vuik, Adriaan and Nijholt, Bas and Akhmerov, AR and Wimmer, Michael},
title={Reproducing topological properties with quasi-Majorana states},
journal={arXiv preprint arXiv:1806.02801},
year={2018},
@InCollection{Figueiredo1995,
author={de Figueiredo, Luiz Henrique},
title={Adaptive sampling of parametric curves},
booktitle={Graphics Gems V},
year={1995},
publisher={Elsevier},
pages={173--178},
}
@Article{Galassi1996,
author={Galassi, Mark and Davies, Jim and Theiler, James and Gough, Brian and Jungman, Gerard and Alken, Patrick and Booth, Michael and Rossi, Fabrice},
title={GNU scientific library},
journal={No. Release},
year={1996},
volume={2},
}
@Article{Laeven2019,
...
...
@@ -23,18 +29,6 @@
year={2019},
}
@Article{Bommer2019,
author={Bommer, Jouri DS and Zhang, Hao and Gül, Önder and Nijholt, Bas and Wimmer, Michael and Rybakov, Filipp N and Garaud, Julien and Rodic, Donjan and Babaev, Egor and Troyer, Matthias and others},
title={Spin-orbit protection of induced superconductivity in Majorana nanowires},
journal={Physical review letters},
year={2019},
volume={122},
number={18},
pages={187702},
doi={10.1103/PhysRevLett.122.187702},
publisher={APS},
}
@Article{Melo2019,
author={Melo, André and Rubbert, Sebastian and Akhmerov, Anton R},
title={Supercurrent-induced Majorana bound states in a planar geometry},
...
...
@@ -42,42 +36,11 @@
year={2019},
}
@InProceedings{Gramacy2004,
author={Gramacy, Robert B and Lee, Herbert KH and Macready, William G},
title={Parameter space exploration with Gaussian process trees},
booktitle={Proceedings of the twenty-first international conference on Machine learning},
year={2004},
organization={ACM},
pages={45},
doi={10.1145/1015330.1015367},
}
@InCollection{Figueiredo1995,
author={de Figueiredo, Luiz Henrique},
title={Adaptive sampling of parametric curves},
booktitle={Graphics Gems V},
year={1995},
publisher={Elsevier},
pages={173--178},
}
@PhdThesis{Castro2008,
author={Castro, Rui M},
title={Active learning and adaptive sampling for non-parametric inference},
year={2008},
school={Rice University},
}
@Article{Chen2017,
author={Chen, Yuhang and Peng, Chaoyang},
title={Intelligent adaptive sampling guided by Gaussian process inference},
journal={Measurement Science and Technology},
year={2017},
volume={28},
number={10},
pages={105005},
doi={10.1088/1361-6501/aa7d31},
publisher={IOP Publishing},
@Misc{Nijholt2018,
author={Bas Nijholt and Joseph Weston and Anton Akhmerov},
title={Adaptive documentation},
note={https://adaptive.readthedocs.io},
year={2018},
}
@Article{Takhtaganov2018,
...
...
@@ -87,6 +50,13 @@
year={2018},
}
@Article{Vuik2018,
author={Vuik, Adriaan and Nijholt, Bas and Akhmerov, AR and Wimmer, Michael},
title={Reproducing topological properties with quasi-Majorana states},
journal={arXiv preprint arXiv:1806.02801},
year={2018},
}
@Online{Wolfram2011,
author={Stephen Wolfram},
title={Mathematica: Adaptive Plotting},
...
...
@@ -95,161 +65,214 @@
urldate={2019-09-10},
}
@InProceedings{Visvalingam1990,
author={Visvalingam, Mahes and Whyatt, J Duncan},
title={The Douglas-Peucker Algorithm for Line Simplification: Re-evaluation through Visualization},
booktitle={Computer Graphics Forum},
year={1990},
volume={9},
number={3},
organization={Wiley Online Library},
pages={213--225},
doi={10.1111/j.1467-8659.1990.tb00398.x},
}
@Book{Hu2006,
author={Hu, Feifang and Rosenberger, William F},
title={The theory of response-adaptive randomization in clinical trials},
year={2006},
volume={525},
publisher={John Wiley \& Sons},
doi={10.1002/047005588X},
}
@Article{Emery1998,
author={Emery, Ashley F and Nenarokomov, Aleksey V},
title={Optimal experiment design},
journal={Measurement Science and Technology},
year={1998},
volume={9},
number={6},
pages={864},
doi={10.1088/0957-0233/9/6/003},
publisher={IOP Publishing},
}
@Article{Gonnet2010,
author={Gonnet, Pedro},
title={Increasing the reliability of adaptive quadrature using explicit interpolants},
journal={ACM Transactions on Mathematical Software (TOMS)},
year={2010},
volume={37},
number={3},
pages={26},
doi={10.1145/1824801.1824804},
publisher={ACM},
}
@Article{Galassi1996,
author={Galassi, Mark and Davies, Jim and Theiler, James and Gough, Brian and Jungman, Gerard and Alken, Patrick and Booth, Michael and Rossi, Fabrice},
title={GNU scientific library},
journal={No. Release},
year={1996},
volume={2},
}
@Misc{WolframResearch,
author={Wolfram Research, Inc.},
title={Mathematica, Version 12.0},
note={Champaign, IL, 2019},
}
@Misc{Nijholt,
author={Bas Nijholt and Joseph Weston and Anton Akhmerov},
title={Adaptive documentation},
note={https://adaptive.readthedocs.io},
% Below has DOI and is taken from the doi.org API
@article{Alliez2003,
doi={10.1145/882262.882296},
url={https://doi.org/10.1145%2F882262.882296},
year=2003,
month={jul},
publisher={Association for Computing Machinery ({ACM})},
volume={22},
number={3},
pages={485},
author={Pierre Alliez and David Cohen-Steiner and Olivier Devillers and Bruno L{\'{e}}vy and Mathieu Desbrun},
title={Anisotropic polygonal remeshing},
journal={{ACM} Transactions on Graphics}
}
@Article{Klein1999,
author={Klein, Richard I},
title={Star formation with 3-D adaptive mesh refinement: the collapse and fragmentation of molecular clouds},
journal={Journal of Computational and Applied Mathematics},
author={Jouri D.{\hspace{0.167em}}S. Bommer and Hao Zhang and \"{O}nder G\"{u}l and Bas Nijholt and Michael Wimmer and Filipp N. Rybakov and Julien Garaud and Donjan Rodic and Egor Babaev and Matthias Troyer and Diana Car and S{\'{e}}bastien R. Plissard and Erik P.{\hspace{0.167em}}A.{\hspace{0.167em}}M. Bakkers and Kenji Watanabe and Takashi Taniguchi and Leo P. Kouwenhoven},
title={Spin-Orbit Protection of Induced Superconductivity in Majorana Nanowires},
@@ -30,9 +30,9 @@ Even though it is suboptimal, one usually resorts to sampling $X$ on a homogeneo
#### Choosing new points based on existing data improves the simulation efficiency.
<!-- This should convey the point that it is advantageous to do this. -->
An alternative, that improves the simulation efficiency is to choose new, potentially interesting points in $X$ based on existing data[@Gramacy2004; @de1995adaptive; @castro2008active; @Chen2017]. <!-- cite i.e., hydrodynamics-->
Bayesian optimization works well for high-cost simulations where one needs to find a minimum (or maximum) [@@Takhtaganov2018].
However, if the goal of the simulation is to approximate a continuous function using the fewest points, the continuity of the approximation is achieved by a greedy algorithm that samples mid-points of intervals with the largest distance or curvature[@mathematica_adaptive].
An alternative, that improves the simulation efficiency is to choose new, potentially interesting points in $X$ based on existing data[@Gramacy2004; @Figueiredo1995; @Castro2008; @Chen2017]. <!-- cite i.e., hydrodynamics-->
Bayesian optimization works well for high-cost simulations where one needs to find a minimum (or maximum) [@Takhtaganov2018].
However, if the goal of the simulation is to approximate a continuous function using the fewest points, the continuity of the approximation is achieved by a greedy algorithm that samples mid-points of intervals with the largest distance or curvature[@Wolfram2011].
Such a sampling strategy (i.e., in Fig. @fig:algo) would trivially speedup many simulations.
Here, the complexity arises when parallelizing this algorithm, because this requires a lot of bookkeeping and planning.
...
...
@@ -73,7 +73,7 @@ In all cases using Adaptive results in a higher fidelity plot.
#### We provide a reference implementation, the Adaptive package, and demonstrate its performance.
We provide a reference implementation, the open-source Python package called Adaptive[@Nijholt2019], which has previously been used in several scientific publications[@Vuik2018; @Laeven2019; @Bommer2019; @Melo2019].
We provide a reference implementation, the open-source Python package called Adaptive[@Nijholt2019], which has previously been used in several scientific publications[@Vuik2018; @Laeven2019; @Bommer2019; @Melo2019].
It has algorithms for $f \colon \R^N \to \R^M$, where $N, M \in \mathbb{Z}^+$ but which work best when $N$ is small; integration in $\R$; and the averaging of stochastic functions.
Most of our algorithms allow for a customizable loss function with which one can adapt the sampling algorithm to work optimally for different classes of functions.
It integrates with the Jupyter notebook environment as well as popular parallel computation frameworks such as `ipyparallel`, `mpi4py`, and `dask.distributed`.
...
...
@@ -88,9 +88,9 @@ To explain the relation of our approach with prior work, we discuss several exis
This is not a systematic review of all these fields, but rather, we aim to identify the important traits and design considerations.
#### Experiment design uses Bayesian sampling because the computational costs are not a limitation.
Optimal experiment design (OED) is a field of statistics that minimizes the number of experimental runs needed to estimate specific parameters and, thereby, reduce the cost of experimentation.[@Emery1998]
Optimal experiment design (OED) is a field of statistics that minimizes the number of experimental runs needed to estimate specific parameters and, thereby, reduce the cost of experimentation[@Emery1998].
It works with many degrees of freedom and can consider constraints, for example when the sample space contains regions that are infeasible for practical reasons.
One form of OED is response-adaptive design[@hu2006theory], which concerns the adaptive sampling of designs for statistical experiments.
One form of OED is response-adaptive design[@Hu2006], which concerns the adaptive sampling of designs for statistical experiments.
Here, the acquired data (i.e. the observations) are used to estimate the uncertainties of a certain desired parameter.
It then suggests further experiments that will optimally reduce these uncertainties.
In this step of the calculation Bayesian statistics is frequently used.
...
...
@@ -100,21 +100,21 @@ In a typical non-adaptive experiment decisions on which experiments to perform,
#### Plotting and low dimensional integration uses local sampling.
Plotting a low dimensional function in between bounds requires one to evaluate the function on sufficiently many points such that when we interpolate values in between data points, we get an accurate description of the function values that were not explicitly calculated.
In order to minimize the number of function evaluations, one can use adaptive sampling routines.
For example, for one-dimensional functions, WolframResearch[@WolframResearch] implements a `FunctionInterpolation` class that takes the function, $x_\textrm{min}$, and $x_\textrm{max}$, and returns an object that samples the function more densely in regions with high curvature; however, details on the algorithm are not published.
For example, for one-dimensional functions, Mathematica [@WolframResearch] implements a `FunctionInterpolation` class that takes the function, $x_\textrm{min}$, and $x_\textrm{max}$, and returns an object that samples the function more densely in regions with high curvature; however, details on the algorithm are not published.
Subsequently, we can query this object for points in between $x_\textrm{min}$ and $x_\textrm{max}$, and get the interpolated value, or we can use it to plot the function without specifying a grid.
Another application for adaptive sampling is numerical integration.
It works by estimating the integration error of each interval and then minimizing the sum of these errors greedily.
For example, the `CQUAD` algorithm[@Gonnet2010] in the GNU Scientific Library[@Galassi1996] implements a more sophisticated strategy and is a doubly-adaptive general-purpose integration routine which can handle most types of singularities.
In general, it requires more function evaluations than the integration routines in `QUADPACK`[@Galassi1996]; however, it works more often for difficult integrands.
For example, the `CQUAD` algorithm[@Gonnet2010] in the GNU Scientific Library[@Galassi1996] implements a more sophisticated strategy and is a doubly-adaptive general-purpose integration routine which can handle most types of singularities.
In general, it requires more function evaluations than the integration routines in `QUADPACK`[@Galassi1996]; however, it works more often for difficult integrands.
It is doubly-adaptive because it can decide to either subdivide intervals into more intervals or refine an interval by adding more points---that do not lie on a regular grid---to each interval.
#### PDE solvers and computer graphics use adaptive meshing.
Hydrodynamics[@Berger1989; @Berger1984] and astrophysics[@Klein1999] use an adaptive refinement of the triangulation mesh on which a partial differential equation is discretized.
Hydrodynamics[@Berger1989; @Berger1984] and astrophysics[@Klein1999] use an adaptive refinement of the triangulation mesh on which a partial differential equation is discretized.
By providing smaller mesh elements in regions with a higher variation of the solution, they reduce the amount of data and calculation needed at each step of time propagation.
The remeshing at each time step happens globally, and this is an expensive operation.
Therefore, mesh optimization does not fit our workflow because expensive global updates should be avoided.
Computer graphics uses similar adaptive methods where a smooth surface can represent a surface via a coarser piecewise linear polygon mesh, called a subdivision surface[@DeRose1998].
An example of such a polygonal remeshing method is one where the polygons align with the curvature of the space or field; this is called anisotropic meshing[@Alliez2003].
Computer graphics uses similar adaptive methods where a smooth surface can represent a surface via a coarser piecewise linear polygon mesh, called a subdivision surface[@DeRose1998].
An example of such a polygonal remeshing method is one where the polygons align with the curvature of the space or field; this is called anisotropic meshing[@Alliez2003].
# Design constraints and the general algorithm
...
...
@@ -201,7 +201,7 @@ By adding the two loss functions, we can combine the 3D area loss to exploit int
## Line simplification loss
#### The line simplification loss is based on an inverse Visvalingam’s algorithm.
Inspired by a method commonly employed in digital cartography for coastline simplification, Visvalingam's algorithm, we construct a loss function that does its reverse.[@Visvalingam1990]
Inspired by a method commonly employed in digital cartography for coastline simplification, Visvalingam's algorithm, we construct a loss function that does its reverse[@Visvalingam1990].
Here, at each point (ignoring the boundary points), we compute the effective area associated with its triangle, see Fig. @fig:line_loss(b).
The loss then becomes the average area of two adjacent triangles.
By Taylor expanding $f$ around $x$ it can be shown that the area of the triangles relates to the contributions of the second derivative.
...
...
@@ -232,8 +232,8 @@ Here, we see that for homogeneous sampling to get the same error as sampling wit
## A parallelizable adaptive integration algorithm based on cquad
#### The `cquad` algorithm belongs to a class that is parallelizable.
In Sec. @sec:review we mentioned the doubly-adaptive integration algorithm `CQUAD`.[@Gonnet2010]
This algorithm uses a Clenshaw-Curtis quadrature rules [@Clenshaw1960] of increasing degree $d$ in each interval.
In Sec. @sec:review we mentioned the doubly-adaptive integration algorithm `CQUAD`[@Gonnet2010].
This algorithm uses a Clenshaw-Curtis quadrature rules of increasing degree $d$ in each interval [@Clenshaw1960].
The error estimate is $\sqrt{\int{\left(f_0(x) - f_1(x)\right)^2}}$, where $f_0$ and $f_1$ are two successive interpolations of the integrand.
To reach the desired total error, intervals with the maximum absolute error are improved.
Either (1) the degree of the rule is increased or (2) the interval is split if either the function does not appear to be smooth or a rule of maximum degree ($d=4$) has been reached.
For more details on how to use Adaptive, we recommend reading the tutorial inside the documentation [@Nijholt].
For more details on how to use Adaptive, we recommend reading the tutorial inside the documentation [@Nijholt2018].
# Possible extensions
#### Anisotropic triangulation would improve the algorithm.
The current implementation of choosing the candidate point inside a simplex (triangle in 2D) with the highest loss, for the `LearnerND`, works by either picking a point (1) in the center of the simplex or (2) by picking a point on the longest edge of the simplex.
The choice depends on the shape of the simplex, where the algorithm tries to create regular simplices.
Alternatively, a good strategy is choosing points somewhere on the edge of a triangle such that the simplex aligns with the gradient of the function; creating an anisotropic triangulation[@Dyn1990].
Alternatively, a good strategy is choosing points somewhere on the edge of a triangle such that the simplex aligns with the gradient of the function; creating an anisotropic triangulation[@Dyn1990].
This is a similar approach to the anisotropic meshing techniques mentioned in the literature review.
#### Learning stochastic functions is a promising direction.