Commit 1b160c95 by Christoph Groth

### dtype can be int, float or complex; lots of improvements

parent 6909b700
TODO.txt 0 → 100644
 * 0-d arrays should not be sequences. Currently there are problems with using 0-d arrays with PySequence_Fast. * Consider reducing the amount of templated code by having more dtype-flexible code.
 ... ... @@ -5,11 +5,11 @@ from time import time ################################################################ # Mock a numpy like "module" using Python tuples. def tuple_array(seq, dtype): def tuple_array(seq): return tuple(seq) def tuple_zeros(shape, dtype): return (0,) * shape return (dtype(0),) * shape def tuple_dot(a, b): return a[0] * b[0] + a[1] * b[1] ... ... @@ -25,41 +25,48 @@ tuples.dot = tuple_dot ################################################################ def zeros(module, n=100000): def zeros(module, dtype, n=100000): zeros = module.zeros return list(zeros(2, int) for i in xrange(n)) return list(zeros(2, dtype) for i in xrange(n)) def make_from_list(module, n=100000): def make_from_list(module, dtype, n=100000): array = module.array l = range(3) return list(array(l, int) for i in xrange(n)) l = [dtype(e) for e in range(3)] return list(array(l) for i in xrange(n)) def dot(module, n=1000000): def dot(module, dtype, n=1000000): dot = module.dot a = module.zeros(2, int) b = module.zeros(2, int) a = module.zeros(2, dtype) b = module.zeros(2, dtype) for i in xrange(n): c = dot(a, b) def dot_tuple(module, n=100000): def dot_tuple(module, dtype, n=100000): dot = module.dot a = module.zeros(2, int) b = (0, 0) a = module.zeros(2, dtype) b = (dtype(0),) * 2 for i in xrange(n): c = dot(a, b) def compare(function, modules): print '****', function.__name__, '****' print '{0}:'.format(function.__name__) for module in modules: t = time() try: function(module) except: print "{0:15} failed".format(module.__name__) else: print '{0:15} {1:.4f} s'.format(module.__name__, time() - t) # Execute the function once to make the following timings more # accurate. function(module, int) print " {0:15}".format(module.__name__), for dtype in (int, float, complex): t = time() try: function(module, dtype) except: print " failed ", else: print ' {0:.4f} s'.format(time() - t), print def main(): print ' int float complex' modules = [tuples, tinyarray, numpy] compare(zeros, modules) compare(make_from_list, modules) ... ...
 ... ... @@ -5,8 +5,8 @@ from distutils.core import setup, Extension module = Extension('tinyarray', language = 'c++', extra_compile_args = ['--std=c++0x'], sources = ['src/main.cc', 'src/array.cc', 'src/functions.cc']) sources = ['src/array.cc', 'src/functions.cc', 'src/arithmetic.cc']) setup (name = 'tinyarray', version = '0.0', ... ...
src/arithmetic.cc 0 → 100644
 #include #include #include #include "array.hh" #include "arithmetic.hh" #include "conversion.hh" template PyObject *array_scalar_product(PyObject *a_, PyObject *b_) { assert(Array::check_exact(a_)); Array *a = (Array*)a_; assert(Array::check_exact(b_)); Array *b = (Array*)b_; int ndim_a, ndim_b; size_t *shape_a, *shape_b; a->ndim_shape(&ndim_a, &shape_a); b->ndim_shape(&ndim_b, &shape_b); assert(ndim_a == 1); assert(ndim_b == 1); size_t n = shape_a[0]; if (n != shape_b[0]) { PyErr_SetString(PyExc_ValueError, "Both arguments must have same length."); return 0; } T *data_a = a->data(), *data_b = b->data(); // It's important not to start with result = 0. This leads to wrong // results with regard to the sign of zero as 0.0 + -0.0 is 0.0. assert(n > 0); T result = data_a[0] * data_b[0]; for (size_t i = 1; i < n; ++i) { result += data_a[i] * data_b[i]; } return pyobject_from_number(result); } // This routine is not heavily optimized. It's performance has been measured // to be adequate, given that it will be called from Python. The actual // calculation of the matrix product typically uses less than half of the // execution time of tinyarray.dot for two 3 by 3 matrices. template PyObject *array_matrix_product(PyObject *a_, PyObject *b_) { assert(Array::check_exact(a_)); Array *a = (Array*)a_; assert(Array::check_exact(b_)); Array *b = (Array*)b_; int ndim_a, ndim_b; size_t *shape_a, *shape_b; a->ndim_shape(&ndim_a, &shape_a); b->ndim_shape(&ndim_b, &shape_b); assert(ndim_a > 0); assert(ndim_b > 0); int ndim = ndim_a + ndim_b - 2; assert(ndim > 0); if (ndim > max_ndim) { PyErr_SetString(PyExc_ValueError, "Result would have too many dimensions."); return 0; } const size_t n = shape_a[ndim_a - 1]; size_t shape[ndim]; size_t d = 0, a0 = 1; for (int id = 0, e = ndim_a - 1; id < e; ++id) a0 *= shape[d++] = shape_a[id]; size_t b0 = 1; for (int id = 0, e = ndim_b - 2; id < e; ++id) b0 *= shape[d++] = shape_b[id]; size_t b1, n2; if (ndim_b == 1) { n2 = shape_b[0]; b1 = 1; } else { n2 = shape_b[ndim_b - 2]; b1 = shape[d++] = shape_b[ndim_b - 1]; } if (n2 != n) { PyErr_SetString(PyExc_ValueError, "Matrices are not aligned."); return 0; } Array *result = Array::make(ndim, shape); if (!result) return 0; const T *data_a = a->data(), *data_b = b->data(); T *dest = result->data(); const T *src_a = data_a; for (size_t i = 0; i < a0; ++i, src_a += n) { const T *src_b = data_b; for (size_t j = 0; j < b0; ++j, src_b += (n - 1) * b1) { for (size_t k = 0; k < b1; ++k, ++src_b) { // It's important not to start with sum = 0. This leads to // wrong results with regard to the sign of zero as 0.0 + -0.0 // is 0.0. assert(n > 0); T sum = src_a[0] * src_b[0]; for (size_t l = 1; l < n; ++l) sum += src_a[l] * src_b[l * b1]; *dest++ = sum; } } } return (PyObject*)result; } PyNumberMethods as_number = { (binaryfunc)0/*add*/, // nb_add (binaryfunc)0, // nb_subtract (binaryfunc)0, // nb_multiply (binaryfunc)0, // nb_divide (binaryfunc)0, // nb_remainder (binaryfunc)0, // nb_divmod (ternaryfunc)0, // nb_power (unaryfunc)0, // nb_negative (unaryfunc)0, // nb_positive (unaryfunc)0, // nb_absolute (inquiry)0, // nb_nonzero (unaryfunc)0, // nb_invert (binaryfunc)0, // nb_lshift (binaryfunc)0, // nb_rshift (binaryfunc)0, // nb_and (binaryfunc)0, // nb_xor (binaryfunc)0, // nb_or (coercion)0, // nb_coerce (unaryfunc)0, // nb_int (unaryfunc)0, // nb_long (unaryfunc)0, // nb_float (unaryfunc)0, // nb_oct (unaryfunc)0, // nb_hex (binaryfunc)0, // nb_inplace_add (binaryfunc)0, // nb_inplace_subtract (binaryfunc)0, // nb_inplace_multiply (binaryfunc)0, // nb_inplace_divide (binaryfunc)0, // nb_inplace_remainder (ternaryfunc)0, // nb_inplace_power (binaryfunc)0, // nb_inplace_lshift (binaryfunc)0, // nb_inplace_rshift (binaryfunc)0, // nb_inplace_and (binaryfunc)0, // nb_inplace_xor (binaryfunc)0, // nb_inplace_or (binaryfunc)0, // nb_floor_divide (binaryfunc)0, // nb_true_divide (binaryfunc)0, // nb_inplace_floor_divide (binaryfunc)0, // nb_inplace_true_divide (unaryfunc)0 // nb_index }; // Explicit instantiations. template PyObject *array_scalar_product(PyObject*, PyObject*); template PyObject *array_scalar_product(PyObject*, PyObject*); template PyObject *array_scalar_product(PyObject*, PyObject*); template PyObject *array_matrix_product(PyObject*, PyObject*); template PyObject *array_matrix_product(PyObject*, PyObject*); template PyObject *array_matrix_product(PyObject*, PyObject*);
src/arithmetic.hh 0 → 100644
 #ifndef ARITHMETIC_HH #define ARITHMETIC_HH template PyObject *array_scalar_product(PyObject *a, PyObject *b); template PyObject *array_matrix_product(PyObject *a, PyObject *b); extern PyNumberMethods as_number; #endif // !ARITHMETIC_HH
This diff is collapsed.
 #ifndef ARRAY_HH #define ARRAY_HH #include #include typedef std::complex Complex; enum class Dtype : char {DOUBLE = 0, COMPLEX, LONG, NONE}; const int max_ndim = 16; enum class Dtype : char {LONG = 0, DOUBLE, COMPLEX, NONE}; const Dtype default_dtype = Dtype::DOUBLE; int dtype_converter(const PyObject *ob, Dtype *dtype); const int max_ndim = 3; typedef unsigned short Shape_t; // Set NO_OVERFLOW_DANGER if the linear index of an array entry calculated for // a valid shape can never overflow. // KEEP THE FOLLOWING CONSISTENT WITH THE PREVIOUS DEFINITIONS. #if ((((size_t)-1)>>1) >= 3 * USHRT_MAX) #ifdef NDEBUG #define NO_OVERFLOW_DANGER #endif #endif struct Array { PyObject_VAR_HEAD Py_ssize_t *buffer_internal; // Can be removed for Python 3.3, see array.cc Dtype dtype; unsigned char ndim; Shape_t shape[max_ndim]; long ob_item[1]; #define DTYPE_DISPATCH(func) {func, func, func} // We use the ob_size field in a clever way to encode either the length of a // 1-d array, or the number of dimensions for multi-dimensional arrays. The // following code codifies the conventions. class Array_base { public: void ndim_shape(int *ndim, size_t **shape) { const Py_ssize_t ob_size = ob_base.ob_size; if (ob_size >= 0) { if (ndim) *ndim = 1; if (shape) *shape = (size_t*)&ob_base.ob_size; } else if (ob_size < -1) { if (ndim) *ndim = -ob_size; if (shape) *shape = (size_t*)((char*)this + sizeof(Array_base)); } else { if (ndim) *ndim = 0; if (shape) *shape = 0; } } protected: PyVarObject ob_base; }; PyAPI_DATA(PyTypeObject) Array_type; extern "C" void inittinyarray(); template class Array : public Array_base { public: T *data() { if (ob_base.ob_size >= -1) { // ndim == 0 or 1 return ob_item; } else { // ndim > 1 return ob_item + (-ob_base.ob_size * sizeof(size_t) + sizeof(T) - 1) / sizeof(T); } } static bool check_exact(PyObject *candidate) { return (Py_TYPE(candidate) == &pytype); } static Array *make(int ndim, size_t size); static Array *make(int ndim, const size_t *shape, size_t *size = 0); static const char *pyname, *pyformat; private: T ob_item[1]; static PySequenceMethods as_sequence; static PyMappingMethods as_mapping; static PyBufferProcs as_buffer; static PyTypeObject pytype; friend Dtype get_dtype(PyObject *obj); friend void inittinyarray(); }; Py_ssize_t load_seq_as_long(PyObject *obj, long *out, Py_ssize_t maxlen); Py_ssize_t load_seq_as_ulong(PyObject *obj, unsigned long *uout, Py_ssize_t maxlen, const char *errmsg = 0); inline size_t calc_size(int ndim, const size_t *shape) { if (ndim == 0) return 1; size_t result = shape[0]; for (int d = 1; d < ndim; ++d) result *= shape[d]; return result; } #define array_check_exact(op) (Py_TYPE(op) == &Array_type) inline Dtype get_dtype(PyObject *obj) { PyTypeObject *pytype = Py_TYPE(obj); if (pytype == &Array::pytype) return Dtype::LONG; if (pytype == &Array::pytype) return Dtype::DOUBLE; if (pytype == &Array::pytype) return Dtype::COMPLEX; return Dtype::NONE; } Array *array_make(PyObject *shape_obj, Dtype dtype); PyObject *array_from_arraylike(PyObject *src, Dtype *dtype); #endif // !ARRAY_HH
src/conversion.hh 0 → 100644
 #ifndef CONVERSION_HH #define CONVERSION_HH #include typedef std::complex Complex; inline PyObject *pyobject_from_number(long x) { return PyInt_FromLong(x); } inline PyObject *pyobject_from_number(double x) { return PyFloat_FromDouble(x); } inline PyObject *pyobject_from_number(Complex x) { Py_complex *p = (Py_complex*)&x; return PyComplex_FromCComplex(*p); } template T number_from_pyobject(PyObject *obj); template <> inline double number_from_pyobject(PyObject *obj) { return PyFloat_AsDouble(obj); } template <> inline long number_from_pyobject(PyObject *obj) { return PyInt_AsLong(obj); } template <> inline Complex number_from_pyobject(PyObject *obj) { Py_complex temp = PyComplex_AsCComplex(obj); return Complex(temp.real, temp.imag); } template T number_from_pyobject_exact(PyObject *obj); template <> inline double number_from_pyobject_exact(PyObject *obj) { return PyFloat_AsDouble(obj); } template <> inline long number_from_pyobject_exact(PyObject *obj) { // This function will fail when the conversion to long is not exact. return PyNumber_AsSsize_t(obj, PyExc_TypeError); } template <> inline Complex number_from_pyobject_exact(PyObject *obj) { Py_complex temp = PyComplex_AsCComplex(obj); return Complex(temp.real, temp.imag); } #endif // !CONVERSION_HH
 #include #include #include "array.hh" #include "arithmetic.hh" #include "functions.hh" using namespace std; namespace { static PyObject *empty(PyObject *module, PyObject *args) int dtype_converter(const PyObject *ob, Dtype *dtype) { PyObject *shape; Dtype dtype = default_dtype; if (!PyArg_ParseTuple(args, "O|O&", &shape, dtype_converter, &dtype)) if (ob == Py_None) { *dtype = default_dtype; } else if (ob == (PyObject *)(&PyInt_Type) || ob == (PyObject *)(&PyLong_Type)) { *dtype = Dtype::LONG; } else if (ob == (PyObject *)(&PyFloat_Type)) { *dtype = Dtype::DOUBLE; } else if (ob == (PyObject *)(&PyComplex_Type)) { *dtype = Dtype::COMPLEX; } else { PyErr_SetString(PyExc_TypeError, "Invalid dtype."); return 0; return (PyObject *)array_make(shape, dtype); } return 1; } static PyObject *zeros(PyObject *module, PyObject *args) template PyObject *filled(size_t ndim, const size_t *shape, int value) { Array *a = (Array*)empty(module, args); if (a) for (long *p = a->ob_item, *e = p + a->ob_size; p < e; ++p) *p = 0; return (PyObject *)a; size_t size; Array *result = Array::make(ndim, shape, &size); if (!result) return 0; T *data = result->data(); for (size_t i = 0; i < size; ++i) data[i] = value; return (PyObject*)result; } static PyObject *ones(PyObject *module, PyObject *args) { Array *a = (Array*)empty(module, args); if (a) for (long *p = a->ob_item, *e = p + a->ob_size; p < e; ++p) *p = 1; return (PyObject *)a; } PyObject *(*filled_dtable[])(size_t, const size_t*, int) = DTYPE_DISPATCH(filled); static PyObject *identity(PyObject *module, PyObject *args) PyObject *filled_pyargs(PyObject *args, int value) { long n; PyObject *pyshape; Dtype dtype = default_dtype; if (!PyArg_ParseTuple(args, "l|O&", &n, dtype_converter, &dtype)) if (!PyArg_ParseTuple(args, "O|O&", &pyshape, dtype_converter, &dtype)) return 0; if (n < 0) { PyErr_SetString(PyExc_ValueError, "Negative dimensions are not allowed."); return 0; } if (n > std::numeric_limits::max()) { PyErr_SetString(PyExc_ValueError, "Requested size is too big."); return 0; } Array *a = PyObject_NewVar(Array, &Array_type, n * n); if (!a) return 0; a->buffer_internal = 0; a->ndim = 2; a->shape[0] = a->shape[1] = n; size_t shape[max_ndim]; Py_ssize_t ndim = load_seq_as_ulong( pyshape, shape, max_ndim, "Negative dimensions are not allowed."); if (ndim == -1) return 0; long *p = a->ob_item; for (int i = 1; i < n; ++i) { *p++ = 1; for (long *e = p + n; p < e; ++p) *p = 0; } if (n) *p = 1; return (PyObject *)a; return filled_dtable[int(dtype)](ndim, shape, value); } static Array *array_from_sequence(PyObject *top_level_seq, Dtype dtype=Dtype::NONE) PyObject *zeros(PyObject *, PyObject *args) { assert(PySequence_Check(top_level_seq)); int d = 0, ndim = 0; long shape[max_ndim]; return filled_pyargs(args, 0); } // seqs is the stack of sequences being processed, all returned by // PySequence_Fast. ps[d] and es[d] are the begin and end of the elements // of seqs[d - 1]. PyObject *seqs[max_ndim], **ps[max_ndim], **es[max_ndim]; es[0] = 1 + (ps[0] = &top_level_seq); PyObject *ones(PyObject *, PyObject *args) { return filled_pyargs(args, 1); } Py_ssize_t ob_size = 1; long *dest = 0; // Init. only needed to suppress a warning. Array *result = 0; while (true) { // See http://projects.scipy.org/numpy/ticket/2199. const char *msg = "A sequence does not support sequence protocol - " "this is probably due to a bug in numpy for 0-d arrays."; seqs[d] = PySequence_Fast(*ps[d]++, msg); if (!seqs[d]) {--d; goto fail_gracefully;} Py_ssize_t len = PySequence_Fast_GET_SIZE(seqs[d]); if (result) { // Check that the length of current sequence agrees with the shape. if (len != shape[d]) { PyErr_SetString(PyExc_ValueError, "Input has irregular shape."); goto fail_gracefully; }