Commit 632dc6dc authored by Christoph Groth's avatar Christoph Groth
Browse files

code by Christoph Groth

parent b4d1da05
*~
*.pyc
*.so
/build
/dist
/MANIFEST
/src/version.hh
=================
Tinyarray License
=================
Copyright 2012-2013 CEA <http://cea.fr/> and others. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# This file specifies the files to be included in the source distribution
# in addition to the default ones.
include MANIFEST.in
include LICENSE
include TODO
include README_WINDOWS.txt
include test_tinyarray.py
include benchmark.py
recursive-include src *.hh
recursive-include debian *
recursive-exclude debian *~
=========
Tinyarray
=========
Tinyarray is a numerical array module for Python. The multi-dimensional arrays
it provides are best thought of as (possibly nested) tuples of numbers that,
unlike Python's built-in tuples, support mathematical operations. Like tuples,
tinyarrays are hashable and immutable and thus can be used as dictionary keys.
The module's interface is a subset of that of NumPy and hence should be familiar
to many Python programmers. Tinyarray has been heavily optimized for small
arrays: For example, common operations on 1-d arrays of length 3 run up to 35
times faster than with NumPy. When storing many small arrays, memory
consumption is reduced by a factor of 3. In summary, Tinyarray is a more
efficient alternative to NumPy when many separate small numerical arrays are to
be used.
License
-------
Tinyarray is licensed under the "simplified BSD License". The license is
included in the file LICENSE in this directory.
Authors
-------
The principal developer of Tinyarray is Christoph Groth (SPSMS-INAC-CEA
Grenoble). His contributions are part of his work at `CEA <http://cea.fr/>`_,
the French Commissariat à l'énergie atomique et aux énergies alternatives.
The author can be reached at christoph.groth@cea.fr.
Other people that have contributed to Tinyarray include
* Michael Wimmer (Leiden University)
* Joseph Weston (SPSMS-INAC-CEA Grenoble)
To find out who exactly wrote a certain part of Tinyarray, please use the
"blame" feature of `git <http://git-scm.com/>`_, the version control system.
Installation
------------
Tinyarray requires Python 2.6 or 2.7.
To install, run::
python setup.py build
python setup.py install
Usage
-----
At this stage, tinyarray does not provide documentation beyond this file. See
the module docstring for a list of all the available functions and methods. All
of them are simplified versions of their NumPy counterparts.
For example, arrays can be created with::
tinyarray.array(arraylike, [dtype])
where `arraylike` can be a nested sequence of numbers or an object supporting
the buffer protocol. The `dtype` parameter is optional and can only take the
values `int`, 'float`, and `complex`. Note that `dtype` is a positional
argument and cannot be used as a keyword argument. Arrays can be also created
with the functions `identity`, `zeros`, and `ones`.
Tinyarrays support iteration and indexing (currently without slicing), as well
as vectorized elementwise arithmetics. A small number of operations like `dot`,
`floor`, and `transpose` are provided. Printing works as well as pickling and
unpickling. Whenever an operation is missing from Tinyarray, NumPy can be used
directly, e.g.: `numpy.linalg.det(my_tinyarray)`.
A note for users of Microsoft Windows
-------------------------------------
To read the text files in this directory (README, LICENSE, etc.), right-click,
choose "Open", and open with "WordPad". ("Notepad" will not display the files
properly.)
* Fix float hash on 32 bit machines.
* Implement missing arithmetic operations.
* Implement missing comparisons.
* 0-d arrays should not be sequences. Currently there are problems with using
0-d arrays with PySequence_Fast.
* Consider reducing the amount of templated code by having more dtype-flexible
code.
* Merge array_from_arraylike and matrix_from_arraylike
* Optimize readin_arraylike for elements that are tinyarrays.
* Implement concatenate.
import tinyarray
import numpy
from time import time
################################################################
# Mock a numpy like "module" using Python tuples.
def tuple_array(seq):
return tuple(seq)
def tuple_zeros(shape, dtype):
return (dtype(0),) * shape
def tuple_dot(a, b):
return a[0] * b[0] + a[1] * b[1]
class Empty(object):
pass
tuples = Empty()
tuples.__name__ = 'tuples'
tuples.array = tuple_array
tuples.zeros = tuple_zeros
tuples.dot = tuple_dot
################################################################
def zeros(module, dtype, n=100000):
zeros = module.zeros
return list(zeros(2, dtype) for i in xrange(n))
def make_from_list(module, dtype, n=100000):
array = module.array
l = [dtype(e) for e in range(3)]
return list(array(l) for i in xrange(n))
def dot(module, dtype, n=1000000):
dot = module.dot
a = module.zeros(2, dtype)
b = module.zeros(2, dtype)
for i in xrange(n):
c = dot(a, b)
def dot_tuple(module, dtype, n=100000):
dot = module.dot
a = module.zeros(2, dtype)
b = (dtype(0),) * 2
for i in xrange(n):
c = dot(a, b)
def compare(function, modules):
print '{0}:'.format(function.__name__)
for module in modules:
# Execute the function once to make the following timings more
# accurate.
function(module, int)
print " {0:15}".format(module.__name__),
for dtype in (int, float, complex):
t = time()
try:
function(module, dtype)
except:
print " failed ",
else:
print ' {0:.4f} s'.format(time() - t),
print
def main():
print ' int float complex'
modules = [tuples, tinyarray, numpy]
compare(zeros, modules)
compare(make_from_list, modules)
compare(dot, modules)
compare(dot_tuple, modules)
if __name__ == '__main__':
main()
python-tinyarray (1.0) unstable; urgency=low
* Initial release.
-- Christoph Groth <christoph.groth@cea.fr> Thu, 01 Aug 2013 14:53:39 +0200
Source: python-tinyarray
Maintainer: Christoph Groth <christoph.groth@cea.fr>
Section: python
Priority: extra
Build-Depends: python-all-dev (>= 2.6.6-3), debhelper (>= 7)
Standards-Version: 3.9.1
Vcs-Git: http://git.kwant-project.org/tinyarray/
Homepage: http://kwant-project.org/tinyarray/
Package: python-tinyarray
Architecture: any
Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends}
Description: Arrays of numbers for Python, optimized for small sizes
Tinyarray is a numerical array module for Python. The multi-dimensional arrays
it provides are best thought of as (possibly nested) tuples of numbers that,
unlike Python's built-in tuples, support mathematical operations. Like tuples,
tinyarrays are hashable and immutable and thus can be used as dictionary keys.
The module's interface is a subset of that of NumPy and hence should be
familiar to many Python programmers. Tinyarray has been heavily optimized for
small arrays: For example, common operations on 1-d arrays of length 3 run up
to 35 times faster than with NumPy. When storing many small arrays, memory
consumption is reduced by a factor of 3. In summary, Tinyarray is a more
efficient alternative to NumPy when many separate small numerical arrays are to
be used.
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: tinyarray
Upstream-Contact: Christoph Groth <christoph.groth@cea.fr>
Source: http://www.kwant-project.org/tinyarray/
Files: *
Copyright: 2012-2013 CEA <http://cea.fr/> and others
License: BSD-2-Clause
#!/usr/bin/make -f
# This file was automatically generated by stdeb 0.6.0+git at
# Thu, 01 Aug 2013 14:53:39 +0200
%:
dh $@ --with python2 --buildsystem=python_distutils
#!/usr/bin/env python
# Copyright 2012-2013 Tinyarray authors.
#
# This file is part of Tinyarray. It is subject to the license terms in the
# LICENSE file found in the top-level directory of this distribution and at
# http://kwant-project.org/tinyarray/license. A list of Tinyarray authors can
# be found in the README file at the top-level directory of this distribution
# and at http://kwant-project.org/tinyarray.
import subprocess
import os
from distutils.core import setup, Extension
README_FILE = 'README'
STATIC_VERSION_FILE = 'src/version.hh'
tinyarray_dir = os.path.dirname(os.path.abspath(__file__))
def get_version_from_git():
try:
p = subprocess.Popen(['git', 'describe'], cwd=tinyarray_dir,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
return
if p.wait() != 0:
return
version = p.communicate()[0].strip()
if version[0] == 'v':
version = version[1:]
try:
p = subprocess.Popen(['git', 'diff', '--quiet'], cwd=tinyarray_dir)
except OSError:
version += '-confused' # This should never happen.
else:
if p.wait() == 1:
version += '-dirty'
return version
def get_static_version():
"""Return the version as recorded inside a file."""
try:
with open(STATIC_VERSION_FILE) as f:
contents = f.read()
assert contents[:17] == '#define VERSION "'
assert contents[-2:] == '"\n'
return contents[17:-2]
except:
return None
def version():
"""Determine the version of Tinyarray. Return it and save it in a file."""
git_version = get_version_from_git()
static_version = get_static_version()
if git_version is not None:
version = git_version
if static_version != git_version:
with open(STATIC_VERSION_FILE, 'w') as f:
f.write('#define VERSION "{}"\n'.format(version))
elif static_version is not None:
version = static_version
else:
version = 'unknown'
return version
def long_description():
text = []
skip = True
try:
with open(README_FILE) as f:
for line in f:
if line == "\n":
if skip:
skip = False
continue
else:
break
if not skip:
text.append(line.rstrip())
except:
return ''
return '\n'.join(text)
module = Extension('tinyarray',
language='c++',
sources=['src/arithmetic.cc', 'src/array.cc',
'src/functions.cc'],
depends=['src/arithmetic.hh', 'src/array.hh',
'src/conversion.hh', 'src/functions.hh'])
def main():
setup(name='tinyarray',
version=version(),
author='Christoph Groth and others',
author_email='christoph.groth@cea.fr',
description="Arrays of numbers for Python, optimized for small sizes",
long_description=long_description(),
url="http://kwant-project.org/tinyarray/",
license="BSD",
platforms=["Unix", "Linux", "Mac OS-X", "Windows"],
ext_modules=[module])
if __name__ == '__main__':
main()
// Copyright 2012-2013 Tinyarray authors.
//
// This file is part of Tinyarray. It is subject to the license terms in the
// LICENSE file found in the top-level directory of this distribution and at
// http://kwant-project.org/tinyarray/license. A list of Tinyarray authors can
// be found in the README file at the top-level directory of this distribution
// and at http://kwant-project.org/tinyarray.
#include <Python.h>
#include <limits>
#include <cmath>
#include <sstream>
#include <functional>
#include <algorithm>
#include "array.hh"
#include "arithmetic.hh"
#include "conversion.hh"
// This module assumes C99 behavior of division:
// int(-3) / int(2) == -1
namespace {
template <typename T>
PyObject *array_scalar_product(PyObject *a_, PyObject *b_)
{
assert(Array<T>::check_exact(a_)); Array<T> *a = (Array<T>*)a_;
assert(Array<T>::check_exact(b_)); Array<T> *b = (Array<T>*)b_;
int ndim_a, ndim_b;
size_t *shape_a, *shape_b;
a->ndim_shape(&ndim_a, &shape_a);
b->ndim_shape(&ndim_b, &shape_b);
assert(ndim_a == 1);
assert(ndim_b == 1);
size_t n = shape_a[0];
if (n != shape_b[0]) {
PyErr_SetString(PyExc_ValueError,
"Both arguments must have same length.");
return 0;
}
T *data_a = a->data(), *data_b = b->data();
// It's important not to start with result = 0. This leads to wrong
// results with regard to the sign of zero as 0.0 + -0.0 is 0.0.
if (n == 0) return pyobject_from_number(T(0));
assert(n > 0);
T result = data_a[0] * data_b[0];
for (size_t i = 1; i < n; ++i) {
result += data_a[i] * data_b[i];
}
return pyobject_from_number(result);
}
PyObject *(*array_scalar_product_dtable[])(PyObject*, PyObject*) =
DTYPE_DISPATCH(array_scalar_product);
// This routine is not heavily optimized. It's performance has been measured
// to be adequate, given that it will be called from Python. The actual
// calculation of the matrix product typically uses less than half of the
// execution time of tinyarray.dot for two 3 by 3 matrices.
template <typename T>
PyObject *array_matrix_product(PyObject *a_, PyObject *b_)
{
assert(Array<T>::check_exact(a_)); Array<T> *a = (Array<T>*)a_;
assert(Array<T>::check_exact(b_)); Array<T> *b = (Array<T>*)b_;
int ndim_a, ndim_b;
size_t *shape_a, *shape_b;
a->ndim_shape(&ndim_a, &shape_a);
b->ndim_shape(&ndim_b, &shape_b);
assert(ndim_a > 0);
assert(ndim_b > 0);
int ndim = ndim_a + ndim_b - 2;
assert(ndim > 0);
if (ndim > max_ndim) {
PyErr_SetString(PyExc_ValueError,
"Result would have too many dimensions.");
return 0;
}
const size_t n = shape_a[ndim_a - 1];
size_t shape[max_ndim];
size_t d = 0, a0 = 1;
for (int id = 0, e = ndim_a - 1; id < e; ++id)
a0 *= shape[d++] = shape_a[id];
size_t b0 = 1;
for (int id = 0, e = ndim_b - 2; id < e; ++id)
b0 *= shape[d++] = shape_b[id];
size_t b1, n2;
if (ndim_b == 1) {
n2 = shape_b[0];
b1 = 1;
} else {
n2 = shape_b[ndim_b - 2];
b1 = shape[d++] = shape_b[ndim_b - 1];
}
if (n2 != n) {
PyErr_SetString(PyExc_ValueError, "Matrices are not aligned.");
return 0;
}
size_t size;
Array<T> *result = Array<T>::make(ndim, shape, &size);
if (!result) return 0;
T *dest = result->data();
if (n == 0) {
for (size_t i = 0; i < size; ++i) dest[i] = 0;
} else {
assert(n > 0);
const T *data_a = a->data(), *data_b = b->data();
const T *src_a = data_a;
for (size_t i = 0; i < a0; ++i, src_a += n) {
const T *src_b = data_b;
for (size_t j = 0; j < b0; ++j, src_b += (n - 1) * b1) {
for (size_t k = 0; k < b1; ++k, ++src_b) {
// It's important not to start with sum = 0. This leads to
// wrong results with regard to the sign of zero as 0.0 +
// -0.0 is 0.0.
T sum = src_a[0] * src_b[0];
for (size_t l = 1; l < n; ++l)
sum += src_a[l] * src_b[l * b1];
*dest++ = sum;
}
}
}
}
return (PyObject*)result;
}
PyObject *(*array_matrix_product_dtable[])(PyObject*, PyObject*) =
DTYPE_DISPATCH(array_matrix_product);
PyObject *apply_binary_ufunc(Binary_ufunc **ufunc_dtable,
PyObject *a, PyObject *b)
{
Dtype dtype;
if (coerce_to_arrays(&a, &b, &dtype) < 0) return 0;
int ndim_a, ndim_b;
size_t *shape_a, *shape_b;
reinterpret_cast<Array_base*>(a)->ndim_shape(&ndim_a, &shape_a);
reinterpret_cast<Array_base*>(b)->ndim_shape(&ndim_b, &shape_b);
PyObject *result = 0;
int ndim = std::max(ndim_a, ndim_b);
size_t stride_a = 1, stride_b = 1, shape[max_ndim];;
ptrdiff_t hops_a[max_ndim], hops_b[max_ndim];
for (int d = ndim - 1, d_a = ndim_a - 1, d_b = ndim_b - 1;
d >= 0; --d, --d_a, --d_b) {
size_t ext_a = d_a >= 0 ? shape_a[d_a] : 1;
size_t ext_b = d_b >= 0 ? shape_b[d_b] : 1;
if (ext_a == ext_b) {
hops_a[d] = stride_a;
hops_b[d] = stride_b;
shape[d] = ext_a;
stride_a *= ext_a;
stride_b *= ext_b;
} else if (ext_a == 1) {
hops_a[d] = 0;
hops_b[d] = stride_b;
stride_b *= shape[d] = ext_b;
} else if (ext_b == 1) {
hops_a[d] = stride_a;
hops_b[d] = 0;
stride_a *= shape[d] = ext_a;
} else {
std::ostringstream s;
s << "Operands could not be broadcast together with shapes (";
for (int d = 0; d < ndim_a; ++d) {
s << shape_a[d];
if (d + 1 < ndim_a) s << ", ";
}
s << ") and (";
for (int d = 0; d < ndim_b; ++d) {
s << shape_b[d];
if (d + 1 < ndim_b) s << ", ";
}
s << ").";
PyErr_SetString(PyExc_ValueError, s.str().c_str());
goto end;
}
}
for (int d = 1; d < ndim; ++d)
{
hops_a[d - 1] -= hops_a[d] * shape[d];
hops_b[d - 1] -= hops_b[d] * shape[d];
}
result = ufunc_dtable[int(dtype)](ndim, shape, a, hops_a, b, hops_b);
end:
Py_DECREF(a);
Py_DECREF(b);
return result;
}
} // Anonymous namespace
template <template <typename> class Op>
template <typename T>
PyObject *Binary_op<Op>::ufunc(int ndim, const size_t *shape,
PyObject *a_, const ptrdiff_t *hops_a,
PyObject *b_, const ptrdiff_t *hops_b)
{
Op<T> operation;
assert(Array<T>::check_exact(a_)); Array<T> *a = (Array<T>*)a_;
assert(Array<T>::check_exact(b_)); Array<T> *b = (Array<T>*)b_;
T *src_a = a->data(), *src_b = b->data();
if (ndim == 0) {
T result;
if (operation(result, *src_a, *src_b)) return 0;
return (PyObject*)pyobject_from_number(result);
}
Array<T> *result = Array<T>::make(ndim, shape);
if (result == 0) return 0;
T *dest = result->data();