Skip to content
Snippets Groups Projects
Commit c26ac920 authored by Bas Nijholt's avatar Bas Nijholt
Browse files

Merge branch 'efficient_tell_many' into 'master'

More efficient 'tell_many'

See merge request !96
parents dc57ba19 d09a3c93
No related branches found
No related tags found
1 merge request!96More efficient 'tell_many'
Pipeline #12361 passed
......@@ -67,6 +67,17 @@ def linspace(x_left, x_right, n):
return [x_left + step * i for i in range(1, n)]
def _get_neighbors_from_list(xs):
xs = np.sort(xs)
xs_left = np.roll(xs, 1).tolist()
xs_right = np.roll(xs, -1).tolist()
xs_left[0] = None
xs_right[-1] = None
neighbors = {x: [x_L, x_R] for x, x_L, x_R
in zip(xs, xs_left, xs_right)}
return sortedcontainers.SortedDict(neighbors)
class Learner1D(BaseLearner):
"""Learns and predicts a function 'f:ℝ → ℝ^N'.
......@@ -105,7 +116,7 @@ class Learner1D(BaseLearner):
self.losses = {}
self.losses_combined = {}
self.data = sortedcontainers.SortedDict()
self.data = {}
self.pending_points = set()
# A dict {x_n: [x_{n-1}, x_{n+1}]} for quick checking of local
......@@ -129,7 +140,17 @@ class Learner1D(BaseLearner):
@property
def vdim(self):
return 1 if self._vdim is None else self._vdim
if self._vdim is None:
if self.data:
y = next(iter(self.data.values()))
try:
self._vdim = len(np.squeeze(y))
except TypeError:
# Means we are taking the length of a float
self._vdim = 1
else:
return 1
return self._vdim
@property
def npoints(self):
......@@ -258,12 +279,6 @@ class Learner1D(BaseLearner):
# remove from set of pending points
self.pending_points.discard(x)
if self._vdim is None:
try:
self._vdim = len(np.squeeze(y))
except TypeError:
self._vdim = 1
if not self.bounds[0] <= x <= self.bounds[1]:
return
......@@ -273,7 +288,7 @@ class Learner1D(BaseLearner):
self.update_losses(x, real=True)
# If the scale has increased enough, recompute all losses.
if self._scale[1] > self._oldscale[1] * 2:
if self._scale[1] > 2 * self._oldscale[1]:
for interval in self.losses:
self.update_interpolated_loss_in_interval(*interval)
......@@ -288,6 +303,75 @@ class Learner1D(BaseLearner):
self.update_neighbors(x, self.neighbors_combined)
self.update_losses(x, real=False)
def tell_many(self, xs, ys, *, force=False):
if not force and not (len(xs) > 0.5 * len(self.data) and len(xs) > 2):
# Only run this more efficient method if there are
# at least 2 points and the amount of points added are
# at least half of the number of points already in 'data'.
# These "magic numbers" are somewhat arbitrary.
super().tell_many(xs, ys)
return
# Add data points
self.data.update(zip(xs, ys))
self.pending_points.difference_update(xs)
# Get all data as numpy arrays
points = np.array(list(self.data.keys()))
values = np.array(list(self.data.values()))
points_pending = np.array(list(self.pending_points))
points_combined = np.hstack([points_pending, points])
# Generate neighbors
self.neighbors = _get_neighbors_from_list(points)
self.neighbors_combined = _get_neighbors_from_list(points_combined)
# Update scale
self._bbox[0] = [points_combined.min(), points_combined.max()]
self._bbox[1] = [values.min(axis=0), values.max(axis=0)]
self._scale[0] = self._bbox[0][1] - self._bbox[0][0]
self._scale[1] = np.max(self._bbox[1][1] - self._bbox[1][0])
self._oldscale = deepcopy(self._scale)
# Find the intervals for which the losses should be calculated.
intervals, intervals_combined = [
[(x_m, x_r) for x_m, (x_l, x_r) in neighbors.items()][:-1]
for neighbors in (self.neighbors, self.neighbors_combined)]
# The the losses for the "real" intervals.
self.losses = {}
for x_left, x_right in intervals:
self.losses[x_left, x_right] = (
self.loss_per_interval((x_left, x_right), self._scale, self.data)
if x_right - x_left >= self._dx_eps else 0)
# List with "real" intervals that have interpolated intervals inside
to_interpolate = []
self.losses_combined = {}
for ival in intervals_combined:
# If this interval exists in 'losses' then copy it otherwise
# calculate it.
if ival in self.losses:
self.losses_combined[ival] = self.losses[ival]
else:
# Set all losses to inf now, later they might be udpdated if the
# interval appears to be inside a real interval.
self.losses_combined[ival] = np.inf
x_left, x_right = ival
a, b = to_interpolate[-1] if to_interpolate else (None, None)
if b == x_left and (a, b) not in self.losses:
# join (a, b) and (x_left, x_right) --> (a, x_right)
to_interpolate[-1] = (a, x_right)
else:
to_interpolate.append((x_left, x_right))
for ival in to_interpolate:
if ival in self.losses:
# If this interval does not exist it should already
# have an inf loss.
self.update_interpolated_loss_in_interval(*ival)
def ask(self, n, tell_pending=True):
"""Return n points that are expected to maximally reduce the loss."""
points, loss_improvements = self._ask_points_without_adding(n)
......@@ -379,8 +463,7 @@ class Learner1D(BaseLearner):
elif not self.vdim > 1:
p = hv.Scatter(self.data) * hv.Path([])
else:
xs = list(self.data.keys())
ys = list(self.data.values())
xs, ys = zip(*sorted(self.data.items()))
p = hv.Path((xs, ys)) * hv.Scatter([])
# Plot with 5% empty margins such that the boundary points are visible
......
......@@ -235,3 +235,107 @@ def test_ask_does_not_return_known_points_when_returning_bounds():
learner.tell(0, 0)
points, _ = learner.ask(3)
assert 0 not in points
def test_tell_many():
def f(x, offset=0.123214):
a = 0.01
return (np.sin(x**2) + np.sin(x**5)
+ a**2 / (a**2 + (x - offset)**2)
+ x**2 + 1e-5 * x**3)
def f_vec(x, offset=0.123214):
a = 0.01
y = x + a**2 / (a**2 + (x - offset)**2)
return [y, 0.5 * y, y**2]
def assert_equal_dicts(d1, d2):
xs1, ys1 = zip(*sorted(d1.items()))
xs2, ys2 = zip(*sorted(d2.items()))
ys1 = np.array(ys1, dtype=np.float)
ys2 = np.array(ys2, dtype=np.float)
np.testing.assert_almost_equal(xs1, xs2)
np.testing.assert_almost_equal(ys1, ys2)
def test_equal(l1, l2):
assert_equal_dicts(l1.neighbors, l2.neighbors)
assert_equal_dicts(l1.neighbors_combined, l2.neighbors_combined)
assert_equal_dicts(l1.data, l2.data)
assert_equal_dicts(l2.losses, l1.losses)
assert_equal_dicts(l2.losses_combined, l1.losses_combined)
np.testing.assert_almost_equal(sorted(l1.pending_points),
sorted(l2.pending_points))
np.testing.assert_almost_equal(l1._bbox[1], l1._bbox[1])
assert l1._scale == l2._scale
assert l1._bbox[0] == l2._bbox[0]
for function in [f, f_vec]:
learner = Learner1D(function, bounds=(-1, 1))
learner2 = Learner1D(function, bounds=(-1, 1))
simple(learner, goal=lambda l: l.npoints > 200)
xs, ys = zip(*learner.data.items())
# Make the scale huge to no get a scale doubling
x = 1e-6
max_value = 1e6 if learner.vdim == 1 else np.array(learner.vdim * [1e6])
learner.tell(x, max_value)
learner2.tell(x, max_value)
for x in xs:
learner2.tell_pending(x)
learner2.tell_many(xs, ys)
test_equal(learner, learner2)
# Test non-determinism. We keep a list of points that will be
# evaluated later to emulate parallel execution.
def _random_run(learner, learner2, scale_doubling=True):
if not scale_doubling:
# Make the scale huge to no get a scale doubling
x = 1e-6
max_value = 1e6
learner.tell(x, max_value)
learner2.tell(x, max_value)
stash = []
for i in range(10):
xs, _ = learner.ask(10)
for x in xs:
learner2.tell_pending(x)
# Save 5 random points out of `xs` for later
random.shuffle(xs)
for _ in range(5):
stash.append(xs.pop())
ys = [learner.function(x) for x in xs]
learner.tell_many(xs, ys, force=True)
for x, y in zip(xs, ys):
learner2.tell(x, y)
# Evaluate and add N random points from `stash`
random.shuffle(stash)
xs = [stash.pop() for _ in range(random.randint(1, 5))]
ys = [learner.function(x) for x in xs]
learner.tell_many(xs, ys, force=True)
for x, y in zip(xs, ys):
learner2.tell(x, y)
if scale_doubling:
# Double the scale to trigger the loss updates
max_value = max(learner.data.values())
x = 1e-6
learner.tell(x, max_value * 10)
learner2.tell(x, max_value * 10)
learner = Learner1D(f, bounds=(-1, 1))
learner2 = Learner1D(f, bounds=(-1, 1))
_random_run(learner, learner2, scale_doubling=False)
test_equal(learner, learner2)
learner = Learner1D(f, bounds=(-1, 1))
learner2 = Learner1D(f, bounds=(-1, 1))
_random_run(learner, learner2, scale_doubling=True)
test_equal(learner, learner2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment