Merge branch 'efficient_tell_many' into 'master'

More efficient 'tell_many' See merge request !96

Merge branch 'efficient_tell_many' into 'master'
c26ac920 · Bas Nijholt · dc57ba19 · d09a3c93 · c26ac920 · c26ac920
Commit c26ac920 authored 6 years ago by Bas Nijholt
--- a/adaptive/learner/learner1D.py
+++ b/adaptive/learner/learner1D.py
@@ -67,6 +67,17 @@ def linspace(x_left, x_right, n):
        return [x_left + step * i for i in range(1, n)]


+def _get_neighbors_from_list(xs):
+    xs = np.sort(xs)
+    xs_left = np.roll(xs, 1).tolist()
+    xs_right = np.roll(xs, -1).tolist()
+    xs_left[0] = None
+    xs_right[-1] = None
+    neighbors = {x: [x_L, x_R] for x, x_L, x_R
+                 in zip(xs, xs_left, xs_right)}
+    return sortedcontainers.SortedDict(neighbors)
+
+
 class Learner1D(BaseLearner):
    """Learns and predicts a function 'f:ℝ → ℝ^N'.

@@ -105,7 +116,7 @@ class Learner1D(BaseLearner):
        self.losses = {}
        self.losses_combined = {}

-        self.data = sortedcontainers.SortedDict()
+        self.data = {}
        self.pending_points = set()

        # A dict {x_n: [x_{n-1}, x_{n+1}]} for quick checking of local
@@ -129,7 +140,17 @@ class Learner1D(BaseLearner):

    @property
    def vdim(self):
-        return 1 if self._vdim is None else self._vdim
+        if self._vdim is None:
+            if self.data:
+                y = next(iter(self.data.values()))
+                try:
+                    self._vdim = len(np.squeeze(y))
+                except TypeError:
+                    # Means we are taking the length of a float
+                    self._vdim = 1
+            else:
+                return 1
+        return self._vdim

    @property
    def npoints(self):
@@ -258,12 +279,6 @@ class Learner1D(BaseLearner):
        # remove from set of pending points
        self.pending_points.discard(x)

-        if self._vdim is None:
-            try:
-                self._vdim = len(np.squeeze(y))
-            except TypeError:
-                self._vdim = 1
-
        if not self.bounds[0] <= x <= self.bounds[1]:
            return

@@ -273,7 +288,7 @@ class Learner1D(BaseLearner):
        self.update_losses(x, real=True)

        # If the scale has increased enough, recompute all losses.
-        if self._scale[1] > self._oldscale[1] * 2:
+        if self._scale[1] > 2 * self._oldscale[1]:

            for interval in self.losses:
                self.update_interpolated_loss_in_interval(*interval)
@@ -288,6 +303,75 @@ class Learner1D(BaseLearner):
        self.update_neighbors(x, self.neighbors_combined)
        self.update_losses(x, real=False)

+    def tell_many(self, xs, ys, *, force=False):
+        if not force and not (len(xs) > 0.5 * len(self.data) and len(xs) > 2):
+            # Only run this more efficient method if there are
+            # at least 2 points and the amount of points added are
+            # at least half of the number of points already in 'data'.
+            # These "magic numbers" are somewhat arbitrary.
+            super().tell_many(xs, ys)
+            return
+
+        # Add data points
+        self.data.update(zip(xs, ys))
+        self.pending_points.difference_update(xs)
+
+        # Get all data as numpy arrays
+        points = np.array(list(self.data.keys()))
+        values = np.array(list(self.data.values()))
+        points_pending = np.array(list(self.pending_points))
+        points_combined = np.hstack([points_pending, points])
+
+        # Generate neighbors
+        self.neighbors = _get_neighbors_from_list(points)
+        self.neighbors_combined = _get_neighbors_from_list(points_combined)
+
+        # Update scale
+        self._bbox[0] = [points_combined.min(), points_combined.max()]
+        self._bbox[1] = [values.min(axis=0), values.max(axis=0)]
+        self._scale[0] = self._bbox[0][1] - self._bbox[0][0]
+        self._scale[1] = np.max(self._bbox[1][1] - self._bbox[1][0])
+        self._oldscale = deepcopy(self._scale)
+
+        # Find the intervals for which the losses should be calculated.
+        intervals, intervals_combined = [
+            [(x_m, x_r) for x_m, (x_l, x_r) in neighbors.items()][:-1]
+            for neighbors in (self.neighbors, self.neighbors_combined)]
+
+        # The the losses for the "real" intervals.
+        self.losses = {}
+        for x_left, x_right in intervals:
+            self.losses[x_left, x_right] = (
+                self.loss_per_interval((x_left, x_right), self._scale, self.data)
+                if x_right - x_left >= self._dx_eps else 0)
+
+        # List with "real" intervals that have interpolated intervals inside
+        to_interpolate = []
+
+        self.losses_combined = {}
+        for ival in intervals_combined:
+            # If this interval exists in 'losses' then copy it otherwise
+            # calculate it.
+            if ival in self.losses:
+                self.losses_combined[ival] = self.losses[ival]
+            else:
+                # Set all losses to inf now, later they might be udpdated if the
+                # interval appears to be inside a real interval.
+                self.losses_combined[ival] = np.inf
+                x_left, x_right = ival
+                a, b = to_interpolate[-1] if to_interpolate else (None, None)
+                if b == x_left and (a, b) not in self.losses:
+                    # join (a, b) and (x_left, x_right) --> (a, x_right)
+                    to_interpolate[-1] = (a, x_right)
+                else:
+                    to_interpolate.append((x_left, x_right))
+
+        for ival in to_interpolate:
+            if ival in self.losses:
+                # If this interval does not exist it should already
+                # have an inf loss.
+                self.update_interpolated_loss_in_interval(*ival)
+
    def ask(self, n, tell_pending=True):
        """Return n points that are expected to maximally reduce the loss."""
        points, loss_improvements = self._ask_points_without_adding(n)
@@ -379,8 +463,7 @@ class Learner1D(BaseLearner):
        elif not self.vdim > 1:
            p = hv.Scatter(self.data) * hv.Path([])
        else:
-            xs = list(self.data.keys())
-            ys = list(self.data.values())
+            xs, ys = zip(*sorted(self.data.items()))
            p = hv.Path((xs, ys)) * hv.Scatter([])

        # Plot with 5% empty margins such that the boundary points are visible

--- a/adaptive/tests/test_learner1d.py
+++ b/adaptive/tests/test_learner1d.py
@@ -235,3 +235,107 @@ def test_ask_does_not_return_known_points_when_returning_bounds():
    learner.tell(0, 0)
    points, _ = learner.ask(3)
    assert 0 not in points
+
+
+def test_tell_many():
+    def f(x, offset=0.123214):
+        a = 0.01
+        return (np.sin(x**2) + np.sin(x**5)
+            + a**2 / (a**2 + (x - offset)**2)
+            + x**2 + 1e-5 * x**3)
+
+    def f_vec(x, offset=0.123214):
+        a = 0.01
+        y = x + a**2 / (a**2 + (x - offset)**2)
+        return [y, 0.5 * y, y**2]
+
+    def assert_equal_dicts(d1, d2):
+        xs1, ys1 = zip(*sorted(d1.items()))
+        xs2, ys2 = zip(*sorted(d2.items()))
+        ys1 = np.array(ys1, dtype=np.float)
+        ys2 = np.array(ys2, dtype=np.float)
+        np.testing.assert_almost_equal(xs1, xs2)
+        np.testing.assert_almost_equal(ys1, ys2)
+
+    def test_equal(l1, l2):
+        assert_equal_dicts(l1.neighbors, l2.neighbors)
+        assert_equal_dicts(l1.neighbors_combined, l2.neighbors_combined)
+        assert_equal_dicts(l1.data, l2.data)
+        assert_equal_dicts(l2.losses, l1.losses)
+        assert_equal_dicts(l2.losses_combined, l1.losses_combined)
+        np.testing.assert_almost_equal(sorted(l1.pending_points),
+                                       sorted(l2.pending_points))
+        np.testing.assert_almost_equal(l1._bbox[1], l1._bbox[1])
+        assert l1._scale == l2._scale
+        assert l1._bbox[0] == l2._bbox[0]
+
+    for function in [f, f_vec]:
+        learner = Learner1D(function, bounds=(-1, 1))
+        learner2 = Learner1D(function, bounds=(-1, 1))
+        simple(learner, goal=lambda l: l.npoints > 200)
+        xs, ys = zip(*learner.data.items())
+
+        # Make the scale huge to no get a scale doubling
+        x = 1e-6
+        max_value = 1e6 if learner.vdim == 1 else np.array(learner.vdim * [1e6])
+        learner.tell(x, max_value)
+        learner2.tell(x, max_value)
+
+        for x in xs:
+            learner2.tell_pending(x)
+
+        learner2.tell_many(xs, ys)
+        test_equal(learner, learner2)
+
+    # Test non-determinism. We keep a list of points that will be
+    # evaluated later to emulate parallel execution.
+    def _random_run(learner, learner2, scale_doubling=True):
+        if not scale_doubling:
+            # Make the scale huge to no get a scale doubling
+            x = 1e-6
+            max_value = 1e6
+            learner.tell(x, max_value)
+            learner2.tell(x, max_value)
+
+        stash = []
+        for i in range(10):
+            xs, _ = learner.ask(10)
+            for x in xs:
+                learner2.tell_pending(x)
+
+            # Save 5 random points out of `xs` for later
+            random.shuffle(xs)
+            for _ in range(5):
+                stash.append(xs.pop())
+
+            ys = [learner.function(x) for x in xs]
+
+            learner.tell_many(xs, ys, force=True)
+            for x, y in zip(xs, ys):
+                learner2.tell(x, y)
+
+            # Evaluate and add N random points from `stash`
+            random.shuffle(stash)
+            xs = [stash.pop() for _ in range(random.randint(1, 5))]
+            ys = [learner.function(x) for x in xs]
+
+            learner.tell_many(xs, ys, force=True)
+            for x, y in zip(xs, ys):
+                learner2.tell(x, y)
+
+        if scale_doubling:
+            # Double the scale to trigger the loss updates
+            max_value = max(learner.data.values())
+            x = 1e-6
+            learner.tell(x, max_value * 10)
+            learner2.tell(x, max_value * 10)
+
+    learner = Learner1D(f, bounds=(-1, 1))
+    learner2 = Learner1D(f, bounds=(-1, 1))
+    _random_run(learner, learner2, scale_doubling=False)
+    test_equal(learner, learner2)
+
+    learner = Learner1D(f, bounds=(-1, 1))
+    learner2 = Learner1D(f, bounds=(-1, 1))
+    _random_run(learner, learner2, scale_doubling=True)
+    test_equal(learner, learner2)