Skip to content
Snippets Groups Projects
Commit 92a66bf4 authored by Bas Nijholt's avatar Bas Nijholt
Browse files

use interpolation as default and impplement parallel interface

parent db967eb4
No related branches found
No related tags found
No related merge requests found
......@@ -22,7 +22,7 @@ class Learner1D(object):
"""
def __init__(self, xdata=None, ydata=None):
def __init__(self, xdata=None, ydata=None, client=None):
"""Initialize the learner.
Parameters
......@@ -35,14 +35,14 @@ class Learner1D(object):
# Set internal variables
# A dict storing the loss function for each interval x_n.
self._losses = {}
self.losses = {}
# A dict {x_n: [x_{n-1}, x_{n+1}]} for quick checking of local
# properties.
self._neighbors = {}
self.neighbors = {}
# A dict {x_n: y_n} for quick checking of local
# properties.
self._ydata = {}
self.data = {}
# Bounding box [[minx, maxx], [miny, maxy]].
self._bbox = [[np.inf, -np.inf], [np.inf, -np.inf]]
......@@ -57,27 +57,18 @@ class Learner1D(object):
if xdata is not None:
self.add_data(xdata, ydata)
self.client = client
def loss(self, x_left, x_right, interpolate=False):
def loss(self, x_left, x_right):
"""Calculate loss in the interval x_left, x_right.
Currently returns the rescaled length of the interval. If one of the
y-values is missing, returns 0 (so the intervals with missing data are
never touched. This behavior should be improved later.
"""
if interpolate:
ydata = self.interp_ydata
assert ydata.keys() == self._ydata.keys()
else:
ydata = self._ydata
assert x_left < x_right and self._neighbors[x_left][1] == x_right
try:
y_right, y_left = ydata[x_right], ydata[x_left]
return sqrt(((x_right - x_left) / self._scale[0])**2 +
((y_right - y_left) / self._scale[1])**2)
except TypeError: # One of y-values is None.
return 0
y_right, y_left = self.interp_data[x_right], self.interp_data[x_left]
return sqrt(((x_right - x_left) / self._scale[0])**2 +
((y_right - y_left) / self._scale[1])**2)
def add_data(self, xvalues, yvalues):
"""Add data to the intervals.
......@@ -98,21 +89,7 @@ class Learner1D(object):
def add_point(self, x, y):
"""Update the data."""
self._ydata[x] = y
# Update the neighbors.
if x not in self._neighbors: # The point is new
xvals = sorted(self._neighbors)
pos = np.searchsorted(xvals, x) # This could be done for multiple vals at once
self._neighbors[None] = [None, None] # To reduce the number of condititons.
x_left = xvals[pos-1] if pos != 0 else None
x_right = xvals[pos] if pos != len(xvals) else None
self._neighbors[x] = [x_left, x_right]
self._neighbors[x_left][1] = x
self._neighbors[x_right][0] = x
del self._neighbors[None]
self.data[x] = y
# Update the scale.
self._bbox[0][0] = min(self._bbox[0][0], x)
......@@ -123,23 +100,7 @@ class Learner1D(object):
self._scale = [self._bbox[0][1] - self._bbox[0][0],
self._bbox[1][1] - self._bbox[1][0]]
# Update the losses.
x_left, x_right = self._neighbors[x]
if x_left is not None:
self._losses[x_left, x] = self.loss(x_left, x)
if x_right is not None:
self._losses[x, x_right] = self.loss(x, x_right)
try:
del self._losses[x_left, x_right]
except KeyError:
pass
# If the scale has doubled, recompute all losses.
if self._scale > self._oldscale * 2:
self._losses = {key: self.loss(*key) for key in self._losses}
self._oldscale = self._scale
def choose_points(self, n=10, add_to_data=False, interpolate=False):
def choose_points(self, n=10, add_to_data=True):
"""Return n points that are expected to maximally reduce the loss."""
# Find out how to divide the n points over the intervals
# by finding positive integer n_i that minimize max(L_i / n_i) subject
......@@ -148,19 +109,17 @@ class Learner1D(object):
# Return equally spaced points within each interval to which points
# will be added.
if interpolate:
self.interpolate()
losses = self.interp_losses.items()
else:
losses = self._losses.items()
self.get_results() # Insert finished results into self.data
self.interpolate() # Apply new interpolation step if new results
def points(x, n):
return list(np.linspace(x[0], x[1], n, endpoint=False)[1:])
# Calculate how many points belong to each interval.
quals = [(-loss, x_range, 1) for (x_range, loss) in
losses]
self.losses.items()]
heapq.heapify(quals)
for point_number in range(n):
quality, x, n = quals[0]
heapq.heapreplace(quals, (quality * n / (n+1), x, n + 1))
......@@ -174,14 +133,6 @@ class Learner1D(object):
return xs
def get_status(self):
"""Report current status.
So far just returns some internal variables [losses, intervals and
data]
"""
return self._losses, self._neighbors, self._ydata
def get_results(self):
"""Work with distributed.client.Future objects."""
done = [(x, y.result()) for x, y in self.unfinished.items() if y.done()]
......@@ -198,51 +149,52 @@ class Learner1D(object):
self.unfinished[xs] = ys
def interpolate(self):
"""Estimates the approximate positions of unknown y-values by
interpolating and assuming the unknown point lies on a line between
its nearest known neighbors.
Upon running this function it adds:
self.interp_ydata
self.interp_losses
self.real_neighbors
"""
ydata = sorted([x for x, y in self._ydata.items() if y is not None])
self.real_neighbors = {}
for i, y in enumerate(ydata):
if i == 0:
self.real_neighbors[y] = [None, ydata[1]]
elif i == len(ydata) - 1:
self.real_neighbors[y] = [ydata[i-1], None]
xdata = []
ydata = []
xdata_unfinished = []
self.interp_data = {}
for x in sorted(self.data):
y = self.data[x]
if y is None:
xdata_unfinished.append(x)
else:
self.real_neighbors[y] = [ydata[i-1], ydata[i+1]]
xdata.append(x)
ydata.append(y)
self.interp_data[x] = y
ydata_unfinished = [x for x, y in self._ydata.items() if y is None]
indices = np.searchsorted(ydata, ydata_unfinished)
if len(ydata) == 0:
ydata_unfinished = (0, ) * len(xdata_unfinished)
else:
ydata_unfinished = np.interp(xdata_unfinished, xdata, ydata)
for i, y in zip(indices, ydata_unfinished):
x_left, x_right = self.real_neighbors[ydata[i]]
self.real_neighbors[y] = [x_left, ydata[i]]
for x, y in zip(xdata_unfinished, ydata_unfinished):
self.interp_data[x] = y
self.interp_ydata = {}
for x, (x_left, x_right) in self.real_neighbors.items():
y = self._ydata[x]
if y is None:
y_left = self._ydata[x_left]
y_right = self._ydata[x_right]
y = np.interp(x, [x_left, x_right], [y_left, y_right])
self.interp_ydata[x] = y
self.neighbors = {}
xdata_sorted = sorted(self.interp_data)
for i, x in enumerate(xdata_sorted):
if i == 0:
self.neighbors[x] = [None, xdata_sorted[1]]
elif i == len(xdata_sorted) - 1:
self.neighbors[x] = [xdata_sorted[i-1], None]
else:
self.neighbors[x] = [xdata_sorted[i-1], xdata_sorted[i+1]]
self.interp_losses = {}
for x, (x_left, x_right) in self.real_neighbors.items():
self.losses = {}
for x, (x_left, x_right) in self.neighbors.items():
if x_left is not None:
self.interp_losses[(x_left, x)] = self.loss(
x_left, x, interpolate=True)
self.losses[(x_left, x)] = self.loss(x_left, x)
if x_right is not None:
self.interp_losses[x, x_right] = self.loss(
x, x_right, interpolate=True)
self.losses[x, x_right] = self.loss(x, x_right)
try:
del self.interp_losses[x_left, x_right]
del self.losses[x_left, x_right]
except KeyError:
pass
def map(self, func, xs):
ys = self.client.map(func, xs)
self.add_futures(xs, ys)
def initialize(self, func, xmin, xmax):
self.map(func, [xmin, xmax])
self.add_data([xmin, xmax], [None, None])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment