Skip to content
Snippets Groups Projects
Commit 1b9ef594 authored by Joseph Weston's avatar Joseph Weston
Browse files

Merge branch 'tell_pending' into 'master'

Introduce 'tell_pending', which replaces 'tell(x, None)'

See merge request !107
parents 6896de47 2bdb12b5
No related branches found
No related tags found
1 merge request!107Introduce 'tell_pending' which replaces 'tell(x, None)'
Pipeline #12228 passed
......@@ -50,7 +50,7 @@ class AverageLearner(BaseLearner):
def n_requested(self):
return len(self.data) + len(self.pending_points)
def ask(self, n, add_data=True):
def ask(self, n, tell_pending=True):
points = list(range(self.n_requested, self.n_requested + n))
if any(p in self.data or p in self.pending_points for p in points):
......@@ -60,8 +60,9 @@ class AverageLearner(BaseLearner):
- set(self.pending_points))[:n]
loss_improvements = [self.loss_improvement(n) / n] * n
if add_data:
self.tell_many(points, itertools.repeat(None))
if tell_pending:
for p in points:
self.tell_pending(p)
return points, loss_improvements
def tell(self, n, value):
......@@ -69,14 +70,14 @@ class AverageLearner(BaseLearner):
# The point has already been added before.
return
if value is None:
self.pending_points.add(n)
else:
self.data[n] = value
self.pending_points.discard(n)
self.sum_f += value
self.sum_f_sq += value**2
self.npoints += 1
self.data[n] = value
self.pending_points.discard(n)
self.sum_f += value
self.sum_f_sq += value**2
self.npoints += 1
def tell_pending(self, n):
self.pending_points.add(n)
@property
def mean(self):
......
......@@ -71,7 +71,7 @@ class BalancingLearner(BaseLearner):
for index, learner in enumerate(self.learners):
if index not in self._points:
self._points[index] = learner.ask(
n=1, add_data=False)
n=1, tell_pending=False)
point, loss_improvement = self._points[index]
improvements_per_learner.append(loss_improvement[0])
pairs.append((index, point[0]))
......@@ -79,13 +79,13 @@ class BalancingLearner(BaseLearner):
key=itemgetter(1))
points.append(x)
loss_improvements.append(l)
self.tell(x, None)
self.tell_pending(x)
return points, loss_improvements
def ask(self, n, add_data=True):
def ask(self, n, tell_pending=True):
"""Chose points for learners."""
if not add_data:
if not tell_pending:
with restore(*self.learners):
return self._ask_and_tell(n)
else:
......@@ -97,6 +97,12 @@ class BalancingLearner(BaseLearner):
self._loss.pop(index, None)
self.learners[index].tell(x, y)
def tell_pending(self, x):
index, x = x
self._points.pop(index, None)
self._loss.pop(index, None)
self.learners[index].tell_pending(x)
def loss(self, real=True):
losses = []
for index, learner in enumerate(self.learners):
......
......@@ -44,6 +44,12 @@ class BaseLearner(metaclass=abc.ABCMeta):
for x, y in zip(xs, ys):
self.tell(x, y)
@abc.abstractmethod
def tell_pending(self, x):
"""Tell the learner that 'x' has been requested such
that it's not suggested again."""
pass
@abc.abstractmethod
def remove_unfinished(self):
"""Remove uncomputed data from the learner."""
......@@ -62,14 +68,14 @@ class BaseLearner(metaclass=abc.ABCMeta):
"""
@abc.abstractmethod
def ask(self, n, add_data=True):
def ask(self, n, tell_pending=True):
"""Choose the next 'n' points to evaluate.
Parameters
----------
n : int
The number of points to choose.
add_data : bool, default: True
tell_pending : bool, default: True
If True, add the chosen points to this
learner's 'data' with 'None' for the 'y'
values. Set this to False if you do not
......
......@@ -32,10 +32,13 @@ class DataSaver:
return getattr(self.learner, attr)
def tell(self, x, result):
y = self.arg_picker(result) if result is not None else None
y = self.arg_picker(result)
self.extra_data[x] = result
self.learner.tell(x, y)
def tell_pending(self, x):
self.learner.tell_pending(x)
def _ds(learner_type, arg_picker, *args, **kwargs):
args = args[2:] # functools.partial passes the first 2 arguments in 'args'!
......
......@@ -15,6 +15,7 @@ from .integrator_coeffs import (b_def, T_left, T_right, ns, hint,
ndiv_max, min_sep, eps, xi, V_inv,
Vcond, alpha, gamma)
from ..notebook_integration import ensure_holoviews
from ..utils import restore
def _downdate(c, nans, depth):
......@@ -393,6 +394,9 @@ class IntegratorLearner(BaseLearner):
assert ival in self.ivals
self.priority_split.append(ival)
def tell_pending(self):
pass
def propagate_removed(self, ival):
def _propagate_removed_down(ival):
ival.removed = True
......@@ -414,10 +418,16 @@ class IntegratorLearner(BaseLearner):
self._stack.append(x)
self.ivals.add(ival)
def ask(self, n, add_data=True):
if not add_data:
raise NotImplementedError(
"Asking points irreversibly changes the learner's data structure.")
def ask(self, n, tell_pending=True):
"""Choose points for learners."""
if not tell_pending:
with restore(self):
return self._ask_and_tell_pending(n)
else:
return self._ask_and_tell_pending(n)
def _ask_and_tell_pending(self, n):
points, loss_improvements = self.pop_from_stack(n)
n_left = n - len(points)
while n_left > 0:
......
......@@ -188,7 +188,8 @@ class Learner1D(BaseLearner):
if (b is not None) and right_loss_is_unknown:
self.losses_combined[x, b] = float('inf')
def find_neighbors(self, x, neighbors):
@staticmethod
def find_neighbors(x, neighbors):
if x in neighbors:
return neighbors[x]
pos = neighbors.bisect_left(x)
......@@ -212,7 +213,7 @@ class Learner1D(BaseLearner):
When the function returns a vector the learners y-scale is set by
the level with the the largest peak-to-peak value.
"""
"""
self._bbox[0][0] = min(self._bbox[0][0], x)
self._bbox[0][1] = max(self._bbox[0][1], x)
self._scale[0] = self._bbox[0][1] - self._bbox[0][0]
......@@ -236,36 +237,29 @@ class Learner1D(BaseLearner):
# The point is already evaluated before
return
real = y is not None
if real:
# either it is a float/int, if not, try casting to a np.array
if not isinstance(y, (float, int)):
y = np.asarray(y, dtype=float)
# either it is a float/int, if not, try casting to a np.array
if not isinstance(y, (float, int)):
y = np.asarray(y, dtype=float)
# Add point to the real data dict
self.data[x] = y
# remove from set of pending points
self.pending_points.discard(x)
# Add point to the real data dict
self.data[x] = y
if self._vdim is None:
try:
self._vdim = len(np.squeeze(y))
except TypeError:
self._vdim = 1
else:
# The keys of pending_points are the unknown points
self.pending_points.add(x)
# remove from set of pending points
self.pending_points.discard(x)
# Update the neighbors
self.update_neighbors(x, self.neighbors_combined)
if real:
self.update_neighbors(x, self.neighbors)
if self._vdim is None:
try:
self._vdim = len(np.squeeze(y))
except TypeError:
self._vdim = 1
# Update the scale
self.update_scale(x, y)
if not self.bounds[0] <= x <= self.bounds[1]:
return
# Update the losses
self.update_losses(x, real)
self.update_neighbors(x, self.neighbors_combined)
self.update_neighbors(x, self.neighbors)
self.update_scale(x, y)
self.update_losses(x, real=True)
# If the scale has increased enough, recompute all losses.
if self._scale[1] > self._oldscale[1] * 2:
......@@ -275,7 +269,15 @@ class Learner1D(BaseLearner):
self._oldscale = deepcopy(self._scale)
def ask(self, n, add_data=True):
def tell_pending(self, x):
if x in self.data:
# The point is already evaluated before
return
self.pending_points.add(x)
self.update_neighbors(x, self.neighbors_combined)
self.update_losses(x, real=False)
def ask(self, n, tell_pending=True):
"""Return n points that are expected to maximally reduce the loss."""
# Find out how to divide the n points over the intervals
# by finding positive integer n_i that minimize max(L_i / n_i) subject
......@@ -326,8 +328,9 @@ class Learner1D(BaseLearner):
itertools.repeat(-quality, n - 1)
for quality, x, n in quals))
if add_data:
self.tell_many(points, itertools.repeat(None))
if tell_pending:
for p in points:
self.tell_pending(p)
return points, loss_improvements
......
......@@ -269,50 +269,76 @@ class Learner2D(BaseLearner):
return not any((p in self.pending_points or p in self._stack)
for p in self._bounds_points)
def data_combined(self):
# Interpolate the unfinished points
data_combined = copy(self.data)
def _data_in_bounds(self):
if self.data:
points = np.array(list(self.data.keys()))
values = np.array(list(self.data.values()), dtype=float)
ll, ur = np.reshape(self.bounds, (2, 2)).T
inds = np.all(np.logical_and(ll <= points, points <= ur), axis=1)
return points[inds], values[inds].reshape(-1, self.vdim)
return np.zeros((0, 2)), np.zeros((0, self.vdim), dtype=float)
def _data_interp(self):
if self.pending_points:
points_interp = list(self.pending_points)
points = list(self.pending_points)
if self.bounds_are_done:
values_interp = self.ip()(self._scale(points_interp))
values = self.ip()(self._scale(points))
else:
# Without the bounds the interpolation cannot be done properly,
# so we just set everything to zero.
values_interp = np.zeros((len(points_interp), self.vdim))
values = np.zeros((len(points), self.vdim))
return points, values
return np.zeros((0, 2)), np.zeros((0, self.vdim), dtype=float)
def _data_combined(self):
points, values = self._data_in_bounds()
if not self.pending_points:
return points, values
points_interp, values_interp = self._data_interp()
points_combined = np.vstack([points, points_interp])
values_combined = np.vstack([values, values_interp])
return points_combined, values_combined
for point, value in zip(points_interp, values_interp):
data_combined[point] = value
return data_combined
def data_combined(self):
# Interpolate the unfinished points
points, values = self._data_combined()
return {tuple(k): v for k, v in zip(points, values)}
def ip(self):
if self._ip is None:
points = self._scale(list(self.data.keys()))
values = np.array(list(self.data.values()), dtype=float)
points, values = self._data_in_bounds()
points = self._scale(points)
self._ip = interpolate.LinearNDInterpolator(points, values)
return self._ip
def ip_combined(self):
if self._ip_combined is None:
data_combined = self.data_combined()
points = self._scale(list(data_combined.keys()))
values = np.array(list(data_combined.values()), dtype=float)
points, values = self._data_combined()
points = self._scale(points)
self._ip_combined = interpolate.LinearNDInterpolator(points,
values)
return self._ip_combined
def inside_bounds(self, xy):
x, y = xy
(xmin, xmax), (ymin, ymax) = self.bounds
return xmin <= x <= xmax and ymin <= y <= ymax
def tell(self, point, value):
point = tuple(point)
self.data[point] = value
if not self.inside_bounds(point):
return
self.pending_points.discard(point)
self._ip = None
self._stack.pop(point, None)
if value is None:
self.pending_points.add(point)
self._ip_combined = None
else:
self.data[point] = value
self.pending_points.discard(point)
self._ip = None
def tell_pending(self, point):
point = tuple(point)
if not self.inside_bounds(point):
return
self.pending_points.add(point)
self._ip_combined = None
self._stack.pop(point, None)
def _fill_stack(self, stack_till=1):
......@@ -345,13 +371,14 @@ class Learner2D(BaseLearner):
return points_new, losses_new
def ask(self, n, add_data=True):
# Even if add_data is False we add the point such that _fill_stack
def ask(self, n, tell_pending=True):
# Even if tell_pending is False we add the point such that _fill_stack
# will return new points, later we remove these points if needed.
points = list(self._stack.keys())
loss_improvements = list(self._stack.values())
n_left = n - len(points)
self.tell_many(points[:n], itertools.repeat(None))
for p in points[:n]:
self.tell_pending(p)
while n_left > 0:
# The while loop is needed because `stack_till` could be larger
......@@ -359,13 +386,14 @@ class Learner2D(BaseLearner):
# it could fill up till a length smaller than `stack_till`.
new_points, new_loss_improvements = self._fill_stack(
stack_till=max(n_left, self.stack_size))
self.tell_many(new_points[:n_left], itertools.repeat(None))
for p in points[:n_left]:
self.tell_pending(p)
n_left -= len(new_points)
points += new_points
loss_improvements += new_loss_improvements
if not add_data:
if not tell_pending:
self._stack = OrderedDict(zip(points[:self.stack_size],
loss_improvements))
for point in points[:n]:
......
......@@ -8,11 +8,12 @@ import numpy as np
from scipy import interpolate
import scipy.spatial
from ..notebook_integration import ensure_holoviews
from .base_learner import BaseLearner
from .triangulation import Triangulation, point_in_simplex, \
circumsphere, simplex_volume_in_embedding
from ..notebook_integration import ensure_holoviews
from .triangulation import (Triangulation, point_in_simplex,
circumsphere, simplex_volume_in_embedding)
from ..utils import restore
def volume(simplex, ys=None):
......@@ -245,7 +246,7 @@ class LearnerND(BaseLearner):
return # we already know about the point
if value is None:
return self._tell_pending(point)
return self.tell_pending(point)
self._pending.discard(point)
tri = self.tri
......@@ -263,7 +264,7 @@ class LearnerND(BaseLearner):
simplex = tuple(sorted(simplex))
return simplex in self.tri.simplices
def _tell_pending(self, point, simplex=None):
def tell_pending(self, point, *, simplex=None):
point = tuple(point)
self._pending.add(point)
......@@ -309,15 +310,23 @@ class LearnerND(BaseLearner):
heapq.heappush(self._simplex_queue,
(-subloss, simplex, subsimplex))
def ask(self, n=1):
def _ask_and_tell_pending(self, n=1):
xs, losses = zip(*(self._ask() for _ in range(n)))
return list(xs), list(losses)
def ask(self, n, tell_pending=True):
"""Chose points for learners."""
if not tell_pending:
with restore(self):
return self._ask_and_tell_pending(n)
else:
return self._ask_and_tell_pending(n)
def _ask_bound_point(self):
# get the next bound point that is still available
new_point = next(p for p in self._bounds_points
if p not in self.data and p not in self._pending)
self._tell_pending(new_point)
self.tell_pending(new_point)
return new_point, np.inf
def _ask_point_without_known_simplices(self):
......@@ -330,7 +339,7 @@ class LearnerND(BaseLearner):
p = r * a + b
p = tuple(p)
self._tell_pending(p)
self.tell_pending(p)
return p, np.inf
def _pop_highest_existing_simplex(self):
......@@ -350,8 +359,8 @@ class LearnerND(BaseLearner):
# Could not find a simplex, this code should never be reached
assert self.tri is not None
raise AssertionError(
"""Could not find a simplex to. Yet there should always be a simplex
available if LearnerND.tri() is not None"""
"Could not find a simplex to. Yet there should always be a simplex "
"available if LearnerND.tri() is not None"
)
def _ask_best_point(self):
......@@ -371,7 +380,7 @@ class LearnerND(BaseLearner):
transform=self._transform))
self._pending_to_simplex[point_new] = simplex
self._tell_pending(point_new, simplex) # O(??)
self.tell_pending(point_new, simplex=simplex) # O(??)
return point_new, loss
......
......@@ -27,11 +27,13 @@ class SKOptLearner(Optimizer, BaseLearner):
self.function = function
super().__init__(**kwargs)
def tell(self, x, y, fit=True):
if y is not None:
# 'skopt.Optimizer' takes care of points we
# have not got results for.
super().tell([x], y, fit)
def tell(self, x, y, fit=True):
super().tell([x], y, fit)
def tell_pending(self, x):
# 'skopt.Optimizer' takes care of points we
# have not got results for.
pass
def remove_unfinished(self):
pass
......@@ -46,7 +48,10 @@ class SKOptLearner(Optimizer, BaseLearner):
# estimator of loss, but it is the cheapest.
return 1 - model.score(self.Xi, self.yi)
def ask(self, n, add_data=True):
def ask(self, n, tell_pending=True):
if not tell_pending:
raise NotImplementedError('Asking points is an irreversible '
'action, so use `ask(n, tell_pending=True`.')
points = super().ask(n)
# TODO: Choose a better estimate for the loss improvement.
if self.space.n_dims > 1:
......
......@@ -10,7 +10,7 @@ def test_only_returns_new_points():
for i in range(5, 10):
learner.tell(i, 1)
learner.tell(0, None) # This means it shouldn't return 0 anymore
learner.tell_pending(0) # This means it shouldn't return 0 anymore
assert learner.ask(1)[0][0] == 1
assert learner.ask(1)[0][0] == 2
......
......@@ -351,7 +351,7 @@ def test_learner_performance_is_invariant_under_scaling(learner_type, f, learner
assert abs(learner.loss() - control.loss()) / learner.loss() < 1e-11
# XXX: the LearnerND currently fails because there is no `add_data=False` argument in ask.
# XXX: The LearnerND shouldn't fail, see https://gitlab.kwant-project.org/qt/adaptive/issues/105
@run_with(Learner1D, Learner2D, xfail(LearnerND), AverageLearner)
def test_balancing_learner(learner_type, f, learner_kwargs):
"""Test if the BalancingLearner works with the different types of learners."""
......@@ -366,7 +366,7 @@ def test_balancing_learner(learner_type, f, learner_kwargs):
for i in range(100):
n = random.randint(1, 10)
m = random.randint(0, n)
xs, _ = learner.ask(n, add_data=False)
xs, _ = learner.ask(n, tell_pending=False)
# Save 'm' random points out of `xs` for later
random.shuffle(xs)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment