Commit 1b160c95 authored by Christoph Groth's avatar Christoph Groth
Browse files

dtype can be int, float or complex; lots of improvements

parent 6909b700
* 0-d arrays should not be sequences. Currently there are problems with using
0-d arrays with PySequence_Fast.
* Consider reducing the amount of templated code by having more dtype-flexible
code.
......@@ -5,11 +5,11 @@ from time import time
################################################################
# Mock a numpy like "module" using Python tuples.
def tuple_array(seq, dtype):
def tuple_array(seq):
return tuple(seq)
def tuple_zeros(shape, dtype):
return (0,) * shape
return (dtype(0),) * shape
def tuple_dot(a, b):
return a[0] * b[0] + a[1] * b[1]
......@@ -25,41 +25,48 @@ tuples.dot = tuple_dot
################################################################
def zeros(module, n=100000):
def zeros(module, dtype, n=100000):
zeros = module.zeros
return list(zeros(2, int) for i in xrange(n))
return list(zeros(2, dtype) for i in xrange(n))
def make_from_list(module, n=100000):
def make_from_list(module, dtype, n=100000):
array = module.array
l = range(3)
return list(array(l, int) for i in xrange(n))
l = [dtype(e) for e in range(3)]
return list(array(l) for i in xrange(n))
def dot(module, n=1000000):
def dot(module, dtype, n=1000000):
dot = module.dot
a = module.zeros(2, int)
b = module.zeros(2, int)
a = module.zeros(2, dtype)
b = module.zeros(2, dtype)
for i in xrange(n):
c = dot(a, b)
def dot_tuple(module, n=100000):
def dot_tuple(module, dtype, n=100000):
dot = module.dot
a = module.zeros(2, int)
b = (0, 0)
a = module.zeros(2, dtype)
b = (dtype(0),) * 2
for i in xrange(n):
c = dot(a, b)
def compare(function, modules):
print '****', function.__name__, '****'
print '{0}:'.format(function.__name__)
for module in modules:
t = time()
try:
function(module)
except:
print "{0:15} failed".format(module.__name__)
else:
print '{0:15} {1:.4f} s'.format(module.__name__, time() - t)
# Execute the function once to make the following timings more
# accurate.
function(module, int)
print " {0:15}".format(module.__name__),
for dtype in (int, float, complex):
t = time()
try:
function(module, dtype)
except:
print " failed ",
else:
print ' {0:.4f} s'.format(time() - t),
print
def main():
print ' int float complex'
modules = [tuples, tinyarray, numpy]
compare(zeros, modules)
compare(make_from_list, modules)
......
......@@ -5,8 +5,8 @@ from distutils.core import setup, Extension
module = Extension('tinyarray',
language = 'c++',
extra_compile_args = ['--std=c++0x'],
sources = ['src/main.cc', 'src/array.cc',
'src/functions.cc'])
sources = ['src/array.cc', 'src/functions.cc',
'src/arithmetic.cc'])
setup (name = 'tinyarray',
version = '0.0',
......
#include <Python.h>
#include <algorithm>
#include <complex>
#include "array.hh"
#include "arithmetic.hh"
#include "conversion.hh"
template <typename T>
PyObject *array_scalar_product(PyObject *a_, PyObject *b_)
{
assert(Array<T>::check_exact(a_)); Array<T> *a = (Array<T>*)a_;
assert(Array<T>::check_exact(b_)); Array<T> *b = (Array<T>*)b_;
int ndim_a, ndim_b;
size_t *shape_a, *shape_b;
a->ndim_shape(&ndim_a, &shape_a);
b->ndim_shape(&ndim_b, &shape_b);
assert(ndim_a == 1);
assert(ndim_b == 1);
size_t n = shape_a[0];
if (n != shape_b[0]) {
PyErr_SetString(PyExc_ValueError,
"Both arguments must have same length.");
return 0;
}
T *data_a = a->data(), *data_b = b->data();
// It's important not to start with result = 0. This leads to wrong
// results with regard to the sign of zero as 0.0 + -0.0 is 0.0.
assert(n > 0);
T result = data_a[0] * data_b[0];
for (size_t i = 1; i < n; ++i) {
result += data_a[i] * data_b[i];
}
return pyobject_from_number(result);
}
// This routine is not heavily optimized. It's performance has been measured
// to be adequate, given that it will be called from Python. The actual
// calculation of the matrix product typically uses less than half of the
// execution time of tinyarray.dot for two 3 by 3 matrices.
template <typename T>
PyObject *array_matrix_product(PyObject *a_, PyObject *b_)
{
assert(Array<T>::check_exact(a_)); Array<T> *a = (Array<T>*)a_;
assert(Array<T>::check_exact(b_)); Array<T> *b = (Array<T>*)b_;
int ndim_a, ndim_b;
size_t *shape_a, *shape_b;
a->ndim_shape(&ndim_a, &shape_a);
b->ndim_shape(&ndim_b, &shape_b);
assert(ndim_a > 0);
assert(ndim_b > 0);
int ndim = ndim_a + ndim_b - 2;
assert(ndim > 0);
if (ndim > max_ndim) {
PyErr_SetString(PyExc_ValueError,
"Result would have too many dimensions.");
return 0;
}
const size_t n = shape_a[ndim_a - 1];
size_t shape[ndim];
size_t d = 0, a0 = 1;
for (int id = 0, e = ndim_a - 1; id < e; ++id)
a0 *= shape[d++] = shape_a[id];
size_t b0 = 1;
for (int id = 0, e = ndim_b - 2; id < e; ++id)
b0 *= shape[d++] = shape_b[id];
size_t b1, n2;
if (ndim_b == 1) {
n2 = shape_b[0];
b1 = 1;
} else {
n2 = shape_b[ndim_b - 2];
b1 = shape[d++] = shape_b[ndim_b - 1];
}
if (n2 != n) {
PyErr_SetString(PyExc_ValueError, "Matrices are not aligned.");
return 0;
}
Array<T> *result = Array<T>::make(ndim, shape);
if (!result) return 0;
const T *data_a = a->data(), *data_b = b->data();
T *dest = result->data();
const T *src_a = data_a;
for (size_t i = 0; i < a0; ++i, src_a += n) {
const T *src_b = data_b;
for (size_t j = 0; j < b0; ++j, src_b += (n - 1) * b1) {
for (size_t k = 0; k < b1; ++k, ++src_b) {
// It's important not to start with sum = 0. This leads to
// wrong results with regard to the sign of zero as 0.0 + -0.0
// is 0.0.
assert(n > 0);
T sum = src_a[0] * src_b[0];
for (size_t l = 1; l < n; ++l)
sum += src_a[l] * src_b[l * b1];
*dest++ = sum;
}
}
}
return (PyObject*)result;
}
PyNumberMethods as_number = {
(binaryfunc)0/*add*/, // nb_add
(binaryfunc)0, // nb_subtract
(binaryfunc)0, // nb_multiply
(binaryfunc)0, // nb_divide
(binaryfunc)0, // nb_remainder
(binaryfunc)0, // nb_divmod
(ternaryfunc)0, // nb_power
(unaryfunc)0, // nb_negative
(unaryfunc)0, // nb_positive
(unaryfunc)0, // nb_absolute
(inquiry)0, // nb_nonzero
(unaryfunc)0, // nb_invert
(binaryfunc)0, // nb_lshift
(binaryfunc)0, // nb_rshift
(binaryfunc)0, // nb_and
(binaryfunc)0, // nb_xor
(binaryfunc)0, // nb_or
(coercion)0, // nb_coerce
(unaryfunc)0, // nb_int
(unaryfunc)0, // nb_long
(unaryfunc)0, // nb_float
(unaryfunc)0, // nb_oct
(unaryfunc)0, // nb_hex
(binaryfunc)0, // nb_inplace_add
(binaryfunc)0, // nb_inplace_subtract
(binaryfunc)0, // nb_inplace_multiply
(binaryfunc)0, // nb_inplace_divide
(binaryfunc)0, // nb_inplace_remainder
(ternaryfunc)0, // nb_inplace_power
(binaryfunc)0, // nb_inplace_lshift
(binaryfunc)0, // nb_inplace_rshift
(binaryfunc)0, // nb_inplace_and
(binaryfunc)0, // nb_inplace_xor
(binaryfunc)0, // nb_inplace_or
(binaryfunc)0, // nb_floor_divide
(binaryfunc)0, // nb_true_divide
(binaryfunc)0, // nb_inplace_floor_divide
(binaryfunc)0, // nb_inplace_true_divide
(unaryfunc)0 // nb_index
};
// Explicit instantiations.
template
PyObject *array_scalar_product<long>(PyObject*, PyObject*);
template
PyObject *array_scalar_product<double>(PyObject*, PyObject*);
template
PyObject *array_scalar_product<Complex>(PyObject*, PyObject*);
template
PyObject *array_matrix_product<long>(PyObject*, PyObject*);
template
PyObject *array_matrix_product<double>(PyObject*, PyObject*);
template
PyObject *array_matrix_product<Complex>(PyObject*, PyObject*);
#ifndef ARITHMETIC_HH
#define ARITHMETIC_HH
template <typename T>
PyObject *array_scalar_product(PyObject *a, PyObject *b);
template <typename T>
PyObject *array_matrix_product(PyObject *a, PyObject *b);
extern PyNumberMethods as_number;
#endif // !ARITHMETIC_HH
This diff is collapsed.
#ifndef ARRAY_HH
#define ARRAY_HH
#include <climits>
#include <complex>
typedef std::complex<double> Complex;
enum class Dtype : char {DOUBLE = 0, COMPLEX, LONG, NONE};
const int max_ndim = 16;
enum class Dtype : char {LONG = 0, DOUBLE, COMPLEX, NONE};
const Dtype default_dtype = Dtype::DOUBLE;
int dtype_converter(const PyObject *ob, Dtype *dtype);
const int max_ndim = 3;
typedef unsigned short Shape_t;
// Set NO_OVERFLOW_DANGER if the linear index of an array entry calculated for
// a valid shape can never overflow.
// KEEP THE FOLLOWING CONSISTENT WITH THE PREVIOUS DEFINITIONS.
#if ((((size_t)-1)>>1) >= 3 * USHRT_MAX)
#ifdef NDEBUG
#define NO_OVERFLOW_DANGER
#endif
#endif
struct Array {
PyObject_VAR_HEAD
Py_ssize_t *buffer_internal; // Can be removed for Python 3.3, see array.cc
Dtype dtype;
unsigned char ndim;
Shape_t shape[max_ndim];
long ob_item[1];
#define DTYPE_DISPATCH(func) {func<long>, func<double>, func<Complex>}
// We use the ob_size field in a clever way to encode either the length of a
// 1-d array, or the number of dimensions for multi-dimensional arrays. The
// following code codifies the conventions.
class Array_base {
public:
void ndim_shape(int *ndim, size_t **shape) {
const Py_ssize_t ob_size = ob_base.ob_size;
if (ob_size >= 0) {
if (ndim) *ndim = 1;
if (shape) *shape = (size_t*)&ob_base.ob_size;
} else if (ob_size < -1) {
if (ndim) *ndim = -ob_size;
if (shape) *shape = (size_t*)((char*)this + sizeof(Array_base));
} else {
if (ndim) *ndim = 0;
if (shape) *shape = 0;
}
}
protected:
PyVarObject ob_base;
};
PyAPI_DATA(PyTypeObject) Array_type;
extern "C" void inittinyarray();
template <typename T>
class Array : public Array_base {
public:
T *data() {
if (ob_base.ob_size >= -1) {
// ndim == 0 or 1
return ob_item;
} else {
// ndim > 1
return ob_item + (-ob_base.ob_size * sizeof(size_t) +
sizeof(T) - 1) / sizeof(T);
}
}
static bool check_exact(PyObject *candidate) {
return (Py_TYPE(candidate) == &pytype);
}
static Array<T> *make(int ndim, size_t size);
static Array<T> *make(int ndim, const size_t *shape, size_t *size = 0);
static const char *pyname, *pyformat;
private:
T ob_item[1];
static PySequenceMethods as_sequence;
static PyMappingMethods as_mapping;
static PyBufferProcs as_buffer;
static PyTypeObject pytype;
friend Dtype get_dtype(PyObject *obj);
friend void inittinyarray();
};
Py_ssize_t load_seq_as_long(PyObject *obj, long *out, Py_ssize_t maxlen);
Py_ssize_t load_seq_as_ulong(PyObject *obj, unsigned long *uout,
Py_ssize_t maxlen, const char *errmsg = 0);
inline size_t calc_size(int ndim, const size_t *shape)
{
if (ndim == 0) return 1;
size_t result = shape[0];
for (int d = 1; d < ndim; ++d) result *= shape[d];
return result;
}
#define array_check_exact(op) (Py_TYPE(op) == &Array_type)
inline Dtype get_dtype(PyObject *obj)
{
PyTypeObject *pytype = Py_TYPE(obj);
if (pytype == &Array<long>::pytype) return Dtype::LONG;
if (pytype == &Array<double>::pytype) return Dtype::DOUBLE;
if (pytype == &Array<Complex>::pytype) return Dtype::COMPLEX;
return Dtype::NONE;
}
Array *array_make(PyObject *shape_obj, Dtype dtype);
PyObject *array_from_arraylike(PyObject *src, Dtype *dtype);
#endif // !ARRAY_HH
#ifndef CONVERSION_HH
#define CONVERSION_HH
#include <complex>
typedef std::complex<double> Complex;
inline PyObject *pyobject_from_number(long x)
{
return PyInt_FromLong(x);
}
inline PyObject *pyobject_from_number(double x)
{
return PyFloat_FromDouble(x);
}
inline PyObject *pyobject_from_number(Complex x)
{
Py_complex *p = (Py_complex*)&x;
return PyComplex_FromCComplex(*p);
}
template <typename T>
T number_from_pyobject(PyObject *obj);
template <>
inline double number_from_pyobject(PyObject *obj)
{
return PyFloat_AsDouble(obj);
}
template <>
inline long number_from_pyobject(PyObject *obj)
{
return PyInt_AsLong(obj);
}
template <>
inline Complex number_from_pyobject(PyObject *obj)
{
Py_complex temp = PyComplex_AsCComplex(obj);
return Complex(temp.real, temp.imag);
}
template <typename T>
T number_from_pyobject_exact(PyObject *obj);
template <>
inline double number_from_pyobject_exact(PyObject *obj)
{
return PyFloat_AsDouble(obj);
}
template <>
inline long number_from_pyobject_exact(PyObject *obj)
{
// This function will fail when the conversion to long is not exact.
return PyNumber_AsSsize_t(obj, PyExc_TypeError);
}
template <>
inline Complex number_from_pyobject_exact(PyObject *obj)
{
Py_complex temp = PyComplex_AsCComplex(obj);
return Complex(temp.real, temp.imag);
}
#endif // !CONVERSION_HH
#include <Python.h>
#include <limits>
#include "array.hh"
#include "arithmetic.hh"
#include "functions.hh"
using namespace std;
namespace {
static PyObject *empty(PyObject *module, PyObject *args)
int dtype_converter(const PyObject *ob, Dtype *dtype)
{
PyObject *shape;
Dtype dtype = default_dtype;
if (!PyArg_ParseTuple(args, "O|O&", &shape, dtype_converter, &dtype))
if (ob == Py_None) {
*dtype = default_dtype;
} else if (ob == (PyObject *)(&PyInt_Type) ||
ob == (PyObject *)(&PyLong_Type)) {
*dtype = Dtype::LONG;
} else if (ob == (PyObject *)(&PyFloat_Type)) {
*dtype = Dtype::DOUBLE;
} else if (ob == (PyObject *)(&PyComplex_Type)) {
*dtype = Dtype::COMPLEX;
} else {
PyErr_SetString(PyExc_TypeError, "Invalid dtype.");
return 0;
return (PyObject *)array_make(shape, dtype);
}
return 1;
}
static PyObject *zeros(PyObject *module, PyObject *args)
template <typename T>
PyObject *filled(size_t ndim, const size_t *shape, int value)
{
Array *a = (Array*)empty(module, args);
if (a)
for (long *p = a->ob_item, *e = p + a->ob_size; p < e; ++p)
*p = 0;
return (PyObject *)a;
size_t size;
Array<T> *result = Array<T>::make(ndim, shape, &size);
if (!result) return 0;
T *data = result->data();
for (size_t i = 0; i < size; ++i) data[i] = value;
return (PyObject*)result;
}
static PyObject *ones(PyObject *module, PyObject *args)
{
Array *a = (Array*)empty(module, args);
if (a)
for (long *p = a->ob_item, *e = p + a->ob_size; p < e; ++p)
*p = 1;
return (PyObject *)a;
}
PyObject *(*filled_dtable[])(size_t, const size_t*, int) =
DTYPE_DISPATCH(filled);
static PyObject *identity(PyObject *module, PyObject *args)
PyObject *filled_pyargs(PyObject *args, int value)
{
long n;
PyObject *pyshape;
Dtype dtype = default_dtype;
if (!PyArg_ParseTuple(args, "l|O&", &n, dtype_converter, &dtype))
if (!PyArg_ParseTuple(args, "O|O&", &pyshape, dtype_converter, &dtype))
return 0;
if (n < 0) {
PyErr_SetString(PyExc_ValueError,
"Negative dimensions are not allowed.");
return 0;
}
if (n > std::numeric_limits<Shape_t>::max()) {
PyErr_SetString(PyExc_ValueError,
"Requested size is too big.");
return 0;
}
Array *a = PyObject_NewVar(Array, &Array_type, n * n);
if (!a) return 0;
a->buffer_internal = 0;
a->ndim = 2;
a->shape[0] = a->shape[1] = n;
size_t shape[max_ndim];
Py_ssize_t ndim = load_seq_as_ulong(
pyshape, shape, max_ndim, "Negative dimensions are not allowed.");
if (ndim == -1) return 0;
long *p = a->ob_item;
for (int i = 1; i < n; ++i) {
*p++ = 1;
for (long *e = p + n; p < e; ++p)
*p = 0;
}
if (n) *p = 1;
return (PyObject *)a;
return filled_dtable[int(dtype)](ndim, shape, value);
}
static Array *array_from_sequence(PyObject *top_level_seq,
Dtype dtype=Dtype::NONE)
PyObject *zeros(PyObject *, PyObject *args)
{
assert(PySequence_Check(top_level_seq));
int d = 0, ndim = 0;
long shape[max_ndim];
return filled_pyargs(args, 0);
}
// seqs is the stack of sequences being processed, all returned by
// PySequence_Fast. ps[d] and es[d] are the begin and end of the elements
// of seqs[d - 1].
PyObject *seqs[max_ndim], **ps[max_ndim], **es[max_ndim];
es[0] = 1 + (ps[0] = &top_level_seq);
PyObject *ones(PyObject *, PyObject *args)
{
return filled_pyargs(args, 1);
}
Py_ssize_t ob_size = 1;
long *dest = 0; // Init. only needed to suppress a warning.
Array *result = 0;
while (true) {
// See http://projects.scipy.org/numpy/ticket/2199.
const char *msg = "A sequence does not support sequence protocol - "
"this is probably due to a bug in numpy for 0-d arrays.";
seqs[d] = PySequence_Fast(*ps[d]++, msg);
if (!seqs[d]) {--d; goto fail_gracefully;}
Py_ssize_t len = PySequence_Fast_GET_SIZE(seqs[d]);
if (result) {
// Check that the length of current sequence agrees with the shape.
if (len != shape[d]) {
PyErr_SetString(PyExc_ValueError,
"Input has irregular shape.");
goto fail_gracefully;
}