Merge branch 'tell_pending' into 'master'

Introduce 'tell_pending', which replaces 'tell(x, None)' See merge request !107

Merge branch 'tell_pending' into 'master'
1b9ef594 · Joseph Weston · 6896de47 · 2bdb12b5 · 1b9ef594 · 1b9ef594
Commit 1b9ef594 authored 6 years ago by Joseph Weston
--- a/adaptive/learner/average_learner.py
+++ b/adaptive/learner/average_learner.py
@@ -50,7 +50,7 @@ class AverageLearner(BaseLearner):
    def n_requested(self):
        return len(self.data) + len(self.pending_points)

-    def ask(self, n, add_data=True):
+    def ask(self, n, tell_pending=True):
        points = list(range(self.n_requested, self.n_requested + n))

        if any(p in self.data or p in self.pending_points for p in points):
@@ -60,8 +60,9 @@ class AverageLearner(BaseLearner):
                          - set(self.pending_points))[:n]

        loss_improvements = [self.loss_improvement(n) / n] * n
-        if add_data:
-            self.tell_many(points, itertools.repeat(None))
+        if tell_pending:
+            for p in points:
+                self.tell_pending(p)
        return points, loss_improvements

    def tell(self, n, value):
@@ -69,14 +70,14 @@ class AverageLearner(BaseLearner):
            # The point has already been added before.
            return

-        if value is None:
-            self.pending_points.add(n)
-        else:
-            self.data[n] = value
-            self.pending_points.discard(n)
-            self.sum_f += value
-            self.sum_f_sq += value**2
-            self.npoints += 1
+        self.data[n] = value
+        self.pending_points.discard(n)
+        self.sum_f += value
+        self.sum_f_sq += value**2
+        self.npoints += 1
+
+    def tell_pending(self, n):
+        self.pending_points.add(n)

    @property
    def mean(self):

--- a/adaptive/learner/balancing_learner.py
+++ b/adaptive/learner/balancing_learner.py
@@ -71,7 +71,7 @@ class BalancingLearner(BaseLearner):
            for index, learner in enumerate(self.learners):
                if index not in self._points:
                    self._points[index] = learner.ask(
-                        n=1, add_data=False)
+                        n=1, tell_pending=False)
                point, loss_improvement = self._points[index]
                improvements_per_learner.append(loss_improvement[0])
                pairs.append((index, point[0]))
@@ -79,13 +79,13 @@ class BalancingLearner(BaseLearner):
                       key=itemgetter(1))
            points.append(x)
            loss_improvements.append(l)
-            self.tell(x, None)
+            self.tell_pending(x)

        return points, loss_improvements

-    def ask(self, n, add_data=True):
+    def ask(self, n, tell_pending=True):
        """Chose points for learners."""
-        if not add_data:
+        if not tell_pending:
            with restore(*self.learners):
                return self._ask_and_tell(n)
        else:
@@ -97,6 +97,12 @@ class BalancingLearner(BaseLearner):
        self._loss.pop(index, None)
        self.learners[index].tell(x, y)

+    def tell_pending(self, x):
+        index, x = x
+        self._points.pop(index, None)
+        self._loss.pop(index, None)
+        self.learners[index].tell_pending(x)
+
    def loss(self, real=True):
        losses = []
        for index, learner in enumerate(self.learners):

--- a/adaptive/learner/base_learner.py
+++ b/adaptive/learner/base_learner.py
@@ -44,6 +44,12 @@ class BaseLearner(metaclass=abc.ABCMeta):
        for x, y in zip(xs, ys):
            self.tell(x, y)

+    @abc.abstractmethod
+    def tell_pending(self, x):
+        """Tell the learner that 'x' has been requested such
+        that it's not suggested again."""
+        pass
+
    @abc.abstractmethod
    def remove_unfinished(self):
        """Remove uncomputed data from the learner."""
@@ -62,14 +68,14 @@ class BaseLearner(metaclass=abc.ABCMeta):
        """

    @abc.abstractmethod
-    def ask(self, n, add_data=True):
+    def ask(self, n, tell_pending=True):
        """Choose the next 'n' points to evaluate.

        Parameters
        ----------
        n : int
            The number of points to choose.
-        add_data : bool, default: True
+        tell_pending : bool, default: True
            If True, add the chosen points to this
            learner's 'data' with 'None' for the 'y'
            values. Set this to False if you do not

--- a/adaptive/learner/data_saver.py
+++ b/adaptive/learner/data_saver.py
@@ -32,10 +32,13 @@ class DataSaver:
        return getattr(self.learner, attr)

    def tell(self, x, result):
-        y = self.arg_picker(result) if result is not None else None
+        y = self.arg_picker(result)
        self.extra_data[x] = result
        self.learner.tell(x, y)

+    def tell_pending(self, x):
+        self.learner.tell_pending(x)
+

 def _ds(learner_type, arg_picker, *args, **kwargs):
    args = args[2:]  # functools.partial passes the first 2 arguments in 'args'!

--- a/adaptive/learner/integrator_learner.py
+++ b/adaptive/learner/integrator_learner.py
@@ -15,6 +15,7 @@ from .integrator_coeffs import (b_def, T_left, T_right, ns, hint,
                                ndiv_max, min_sep, eps, xi, V_inv,
                                Vcond, alpha, gamma)
 from ..notebook_integration import ensure_holoviews
+from ..utils import restore


 def _downdate(c, nans, depth):
@@ -393,6 +394,9 @@ class IntegratorLearner(BaseLearner):
                        assert ival in self.ivals
                        self.priority_split.append(ival)

+    def tell_pending(self):
+        pass
+
    def propagate_removed(self, ival):
        def _propagate_removed_down(ival):
            ival.removed = True
@@ -414,10 +418,16 @@ class IntegratorLearner(BaseLearner):
                self._stack.append(x)
        self.ivals.add(ival)

-    def ask(self, n, add_data=True):
-        if not add_data:
-            raise NotImplementedError(
-                "Asking points irreversibly changes the learner's data structure.")
+
+    def ask(self, n, tell_pending=True):
+        """Choose points for learners."""
+        if not tell_pending:
+            with restore(self):
+                return self._ask_and_tell_pending(n)
+        else:
+            return self._ask_and_tell_pending(n)
+
+    def _ask_and_tell_pending(self, n):
        points, loss_improvements = self.pop_from_stack(n)
        n_left = n - len(points)
        while n_left > 0:

--- a/adaptive/learner/learner1D.py
+++ b/adaptive/learner/learner1D.py
@@ -188,7 +188,8 @@ class Learner1D(BaseLearner):
        if (b is not None) and right_loss_is_unknown:
            self.losses_combined[x, b] = float('inf')

-    def find_neighbors(self, x, neighbors):
+    @staticmethod
+    def find_neighbors(x, neighbors):
        if x in neighbors:
            return neighbors[x]
        pos = neighbors.bisect_left(x)
@@ -212,7 +213,7 @@ class Learner1D(BaseLearner):

        When the function returns a vector the learners y-scale is set by
        the level with the the largest peak-to-peak value.
-         """
+        """
        self._bbox[0][0] = min(self._bbox[0][0], x)
        self._bbox[0][1] = max(self._bbox[0][1], x)
        self._scale[0] = self._bbox[0][1] - self._bbox[0][0]
@@ -236,36 +237,29 @@ class Learner1D(BaseLearner):
            # The point is already evaluated before
            return

-        real = y is not None
-        if real:
-            # either it is a float/int, if not, try casting to a np.array
-            if not isinstance(y, (float, int)):
-                y = np.asarray(y, dtype=float)
+        # either it is a float/int, if not, try casting to a np.array
+        if not isinstance(y, (float, int)):
+            y = np.asarray(y, dtype=float)

-            # Add point to the real data dict
-            self.data[x] = y
-            # remove from set of pending points
-            self.pending_points.discard(x)
+        # Add point to the real data dict
+        self.data[x] = y

-            if self._vdim is None:
-                try:
-                    self._vdim = len(np.squeeze(y))
-                except TypeError:
-                    self._vdim = 1
-        else:
-            # The keys of pending_points are the unknown points
-            self.pending_points.add(x)
+        # remove from set of pending points
+        self.pending_points.discard(x)

-        # Update the neighbors
-        self.update_neighbors(x, self.neighbors_combined)
-        if real:
-            self.update_neighbors(x, self.neighbors)
+        if self._vdim is None:
+            try:
+                self._vdim = len(np.squeeze(y))
+            except TypeError:
+                self._vdim = 1

-        # Update the scale
-        self.update_scale(x, y)
+        if not self.bounds[0] <= x <= self.bounds[1]:
+            return

-        # Update the losses
-        self.update_losses(x, real)
+        self.update_neighbors(x, self.neighbors_combined)
+        self.update_neighbors(x, self.neighbors)
+        self.update_scale(x, y)
+        self.update_losses(x, real=True)

        # If the scale has increased enough, recompute all losses.
        if self._scale[1] > self._oldscale[1] * 2:
@@ -275,7 +269,15 @@ class Learner1D(BaseLearner):

            self._oldscale = deepcopy(self._scale)

-    def ask(self, n, add_data=True):
+    def tell_pending(self, x):
+        if x in self.data:
+            # The point is already evaluated before
+            return
+        self.pending_points.add(x)
+        self.update_neighbors(x, self.neighbors_combined)
+        self.update_losses(x, real=False)
+
+    def ask(self, n, tell_pending=True):
        """Return n points that are expected to maximally reduce the loss."""
        # Find out how to divide the n points over the intervals
        # by finding  positive integer n_i that minimize max(L_i / n_i) subject
@@ -326,8 +328,9 @@ class Learner1D(BaseLearner):
                                     itertools.repeat(-quality, n - 1)
                                     for quality, x, n in quals))

-        if add_data:
-            self.tell_many(points, itertools.repeat(None))
+        if tell_pending:
+            for p in points:
+                self.tell_pending(p)

        return points, loss_improvements


--- a/adaptive/learner/learner2D.py
+++ b/adaptive/learner/learner2D.py
@@ -269,50 +269,76 @@ class Learner2D(BaseLearner):
        return not any((p in self.pending_points or p in self._stack)
                       for p in self._bounds_points)

-    def data_combined(self):
-        # Interpolate the unfinished points
-        data_combined = copy(self.data)
+    def _data_in_bounds(self):
+        if self.data:
+            points = np.array(list(self.data.keys()))
+            values = np.array(list(self.data.values()), dtype=float)
+            ll, ur = np.reshape(self.bounds, (2, 2)).T
+            inds = np.all(np.logical_and(ll <= points, points <= ur), axis=1)
+            return points[inds], values[inds].reshape(-1, self.vdim)
+        return np.zeros((0, 2)), np.zeros((0, self.vdim), dtype=float)
+
+    def _data_interp(self):
        if self.pending_points:
-            points_interp = list(self.pending_points)
+            points = list(self.pending_points)
            if self.bounds_are_done:
-                values_interp = self.ip()(self._scale(points_interp))
+                values = self.ip()(self._scale(points))
            else:
                # Without the bounds the interpolation cannot be done properly,
                # so we just set everything to zero.
-                values_interp = np.zeros((len(points_interp), self.vdim))
+                values = np.zeros((len(points), self.vdim))
+            return points, values
+        return np.zeros((0, 2)), np.zeros((0, self.vdim), dtype=float)
+
+    def _data_combined(self):
+        points, values = self._data_in_bounds()
+        if not self.pending_points:
+            return points, values
+        points_interp, values_interp = self._data_interp()
+        points_combined = np.vstack([points, points_interp])
+        values_combined = np.vstack([values, values_interp])
+        return points_combined, values_combined

-            for point, value in zip(points_interp, values_interp):
-                data_combined[point] = value
-
-        return data_combined
+    def data_combined(self):
+        # Interpolate the unfinished points
+        points, values = self._data_combined()
+        return {tuple(k): v for k, v in zip(points, values)}

    def ip(self):
        if self._ip is None:
-            points = self._scale(list(self.data.keys()))
-            values = np.array(list(self.data.values()), dtype=float)
+            points, values = self._data_in_bounds()
+            points = self._scale(points)
            self._ip = interpolate.LinearNDInterpolator(points, values)
        return self._ip

    def ip_combined(self):
        if self._ip_combined is None:
-            data_combined = self.data_combined()
-            points = self._scale(list(data_combined.keys()))
-            values = np.array(list(data_combined.values()), dtype=float)
+            points, values = self._data_combined()
+            points = self._scale(points)
            self._ip_combined = interpolate.LinearNDInterpolator(points,
                                                                 values)
        return self._ip_combined

+    def inside_bounds(self, xy):
+        x, y = xy
+        (xmin, xmax), (ymin, ymax) = self.bounds
+        return xmin <= x <= xmax and ymin <= y <= ymax
+
    def tell(self, point, value):
        point = tuple(point)
+        self.data[point] = value
+        if not self.inside_bounds(point):
+            return
+        self.pending_points.discard(point)
+        self._ip = None
+        self._stack.pop(point, None)

-        if value is None:
-            self.pending_points.add(point)
-            self._ip_combined = None
-        else:
-            self.data[point] = value
-            self.pending_points.discard(point)
-            self._ip = None
-
+    def tell_pending(self, point):
+        point = tuple(point)
+        if not self.inside_bounds(point):
+            return
+        self.pending_points.add(point)
+        self._ip_combined = None
        self._stack.pop(point, None)

    def _fill_stack(self, stack_till=1):
@@ -345,13 +371,14 @@ class Learner2D(BaseLearner):

        return points_new, losses_new

-    def ask(self, n, add_data=True):
-        # Even if add_data is False we add the point such that _fill_stack
+    def ask(self, n, tell_pending=True):
+        # Even if tell_pending is False we add the point such that _fill_stack
        # will return new points, later we remove these points if needed.
        points = list(self._stack.keys())
        loss_improvements = list(self._stack.values())
        n_left = n - len(points)
-        self.tell_many(points[:n], itertools.repeat(None))
+        for p in points[:n]:
+            self.tell_pending(p)

        while n_left > 0:
            # The while loop is needed because `stack_till` could be larger
@@ -359,13 +386,14 @@ class Learner2D(BaseLearner):
            # it could fill up till a length smaller than `stack_till`.
            new_points, new_loss_improvements = self._fill_stack(
                stack_till=max(n_left, self.stack_size))
-            self.tell_many(new_points[:n_left], itertools.repeat(None))
+            for p in points[:n_left]:
+                self.tell_pending(p)
            n_left -= len(new_points)

            points += new_points
            loss_improvements += new_loss_improvements

-        if not add_data:
+        if not tell_pending:
            self._stack = OrderedDict(zip(points[:self.stack_size],
                                          loss_improvements))
            for point in points[:n]:

--- a/adaptive/learner/learnerND.py
+++ b/adaptive/learner/learnerND.py
@@ -8,11 +8,12 @@ import numpy as np
 from scipy import interpolate
 import scipy.spatial

-from ..notebook_integration import ensure_holoviews
 from .base_learner import BaseLearner

-from .triangulation import Triangulation, point_in_simplex, \
-                           circumsphere, simplex_volume_in_embedding
+from ..notebook_integration import ensure_holoviews
+from .triangulation import (Triangulation, point_in_simplex,
+                            circumsphere, simplex_volume_in_embedding)
+from ..utils import restore


 def volume(simplex, ys=None):
@@ -245,7 +246,7 @@ class LearnerND(BaseLearner):
            return  # we already know about the point

        if value is None:
-            return self._tell_pending(point)
+            return self.tell_pending(point)

        self._pending.discard(point)
        tri = self.tri
@@ -263,7 +264,7 @@ class LearnerND(BaseLearner):
        simplex = tuple(sorted(simplex))
        return simplex in self.tri.simplices

-    def _tell_pending(self, point, simplex=None):
+    def tell_pending(self, point, *, simplex=None):
        point = tuple(point)
        self._pending.add(point)

@@ -309,15 +310,23 @@ class LearnerND(BaseLearner):
            heapq.heappush(self._simplex_queue,
                           (-subloss, simplex, subsimplex))

-    def ask(self, n=1):
+    def _ask_and_tell_pending(self, n=1):
        xs, losses = zip(*(self._ask() for _ in range(n)))
        return list(xs), list(losses)

+    def ask(self, n, tell_pending=True):
+        """Chose points for learners."""
+        if not tell_pending:
+            with restore(self):
+                return self._ask_and_tell_pending(n)
+        else:
+            return self._ask_and_tell_pending(n)
+
    def _ask_bound_point(self):
        # get the next bound point that is still available
        new_point = next(p for p in self._bounds_points
                         if p not in self.data and p not in self._pending)
-        self._tell_pending(new_point)
+        self.tell_pending(new_point)
        return new_point, np.inf

    def _ask_point_without_known_simplices(self):
@@ -330,7 +339,7 @@ class LearnerND(BaseLearner):
        p = r * a + b
        p = tuple(p)

-        self._tell_pending(p)
+        self.tell_pending(p)
        return p, np.inf

    def _pop_highest_existing_simplex(self):
@@ -350,8 +359,8 @@ class LearnerND(BaseLearner):
        # Could not find a simplex, this code should never be reached
        assert self.tri is not None
        raise AssertionError(
-            """Could not find a simplex to. Yet there should always be a simplex 
-            available if LearnerND.tri() is not None"""
+            "Could not find a simplex to. Yet there should always be a simplex "
+            "available if LearnerND.tri() is not None"
        )

    def _ask_best_point(self):
@@ -371,7 +380,7 @@ class LearnerND(BaseLearner):
                                                  transform=self._transform))

        self._pending_to_simplex[point_new] = simplex
-        self._tell_pending(point_new, simplex)  # O(??)
+        self.tell_pending(point_new, simplex=simplex)  # O(??)

        return point_new, loss


--- a/adaptive/learner/skopt_learner.py
+++ b/adaptive/learner/skopt_learner.py
@@ -27,11 +27,13 @@ class SKOptLearner(Optimizer, BaseLearner):
        self.function = function
        super().__init__(**kwargs)

-    def tell(self, x, y, fit=True):
-        if y is not None:
-            # 'skopt.Optimizer' takes care of points we
-            # have not got results for.
-            super().tell([x], y, fit)
+    def tell(self, x, y, fit=True):    
+        super().tell([x], y, fit)
+
+    def tell_pending(self, x):
+        # 'skopt.Optimizer' takes care of points we
+        # have not got results for.
+        pass

    def remove_unfinished(self):
        pass
@@ -46,7 +48,10 @@ class SKOptLearner(Optimizer, BaseLearner):
            # estimator of loss, but it is the cheapest.
            return 1 - model.score(self.Xi, self.yi)

-    def ask(self, n, add_data=True):
+    def ask(self, n, tell_pending=True):
+        if not tell_pending:
+            raise NotImplementedError('Asking points is an irreversible '
+                'action, so use `ask(n, tell_pending=True`.')
        points = super().ask(n)
        # TODO: Choose a better estimate for the loss improvement.
        if self.space.n_dims > 1:

--- a/adaptive/tests/test_average_learner.py
+++ b/adaptive/tests/test_average_learner.py
@@ -10,7 +10,7 @@ def test_only_returns_new_points():
    for i in range(5, 10):
        learner.tell(i, 1)

-    learner.tell(0, None)  # This means it shouldn't return 0 anymore
+    learner.tell_pending(0)  # This means it shouldn't return 0 anymore

    assert learner.ask(1)[0][0] == 1
    assert learner.ask(1)[0][0] == 2

--- a/adaptive/tests/test_learners.py
+++ b/adaptive/tests/test_learners.py
@@ -351,7 +351,7 @@ def test_learner_performance_is_invariant_under_scaling(learner_type, f, learner
    assert abs(learner.loss() - control.loss()) / learner.loss() < 1e-11


-# XXX: the LearnerND currently fails because there is no `add_data=False` argument in ask.
+# XXX: The LearnerND shouldn't fail, see https://gitlab.kwant-project.org/qt/adaptive/issues/105
 @run_with(Learner1D, Learner2D, xfail(LearnerND), AverageLearner)
 def test_balancing_learner(learner_type, f, learner_kwargs):
    """Test if the BalancingLearner works with the different types of learners."""
@@ -366,7 +366,7 @@ def test_balancing_learner(learner_type, f, learner_kwargs):
    for i in range(100):
        n = random.randint(1, 10)
        m = random.randint(0, n)
-        xs, _ = learner.ask(n, add_data=False)
+        xs, _ = learner.ask(n, tell_pending=False)

        # Save 'm' random points out of `xs` for later
        random.shuffle(xs)