array.cc 54.1 KB
Newer Older
Christoph Groth's avatar
Christoph Groth committed
1
2
3
4
// Copyright 2012-2013 Tinyarray authors.
//
// This file is part of Tinyarray.  It is subject to the license terms in the
// LICENSE file found in the top-level directory of this distribution and at
Christoph Groth's avatar
Christoph Groth committed
5
6
7
8
// http://git.kwant-project.org/tinyarray/about/LICENSE.  A list of Tinyarray
// authors can be found in the README file at the top-level directory of this
// distribution and at http://git.kwant-project.org/tinyarray/about/.

Christoph Groth's avatar
Christoph Groth committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

#include <Python.h>
#include <cstddef>
#include <sstream>
#include <limits>
#include "array.hh"
#include "arithmetic.hh"
#include "functions.hh"
#include "conversion.hh"
#include "version.hh"

template <>
const char *Array<long>::pyformat = "l";
template <>
const char *Array<double>::pyformat = "d";
template <>
const char *Array<Complex>::pyformat = "Zd";

const char *dtype_names[] = {"int", "float", "complex"};

const char *format_names[] = {
    "int32 little endian", "int32 big endian",
    "int64 little endian", "int64 big endian",
    "float64 little endian", "float64 big endian",
    "complex128 little endian", "complex128 big endian",
    "unknown"};

Format format_by_dtype[NONE];

namespace {

bool is_big_endian()
{
    union {
        unsigned int i;
        char c[4];
    } bint = {0x0a0b0c0d};

    if (bint.c[0] == 0x0a) {
        assert(bint.c[1] == 0x0b);
        assert(bint.c[2] == 0x0c);
        assert(bint.c[3] == 0x0d);
        return true;
    } else {
        assert(bint.c[0] == 0x0d);
        assert(bint.c[1] == 0x0c);
        assert(bint.c[2] == 0x0b);
        assert(bint.c[3] == 0x0a);
        return false;
    }
}

PyObject *int_str, *long_str, *float_str, *complex_str, *index_str,
    *reconstruct;

Dtype dtype_of_scalar(PyObject *obj)
{
    if (PyComplex_Check(obj)) return COMPLEX;
    if (PyFloat_Check(obj)) return DOUBLE;
    if (PyInt_Check(obj)) return LONG;
    // TODO: The following line should be removed for Python 3.
    if (PyLong_Check(obj)) return LONG;
    if (PyObject_HasAttr(obj, index_str)) return LONG;

    // I'm not sure about this paragraph.  Does the existence of a __complex__
    // method already signify that the number is to be interpreted as a complex
    // number?  What about __float__?  Perhaps the following code is useless.
    // In practice (with built-in and numpy numerical types) it never plays a
    // role anyway.
    if (PyObject_HasAttr(obj, complex_str)) return COMPLEX;
    if (PyObject_HasAttr(obj, float_str)) return DOUBLE;
    if (PyObject_HasAttr(obj, int_str)) return LONG;
    // TODO: The following line should be removed for Python 3.
    if (PyObject_HasAttr(obj, long_str)) return LONG;

    return NONE;
}

Michael Wimmer's avatar
Michael Wimmer committed
87
88
89
Dtype dtype_of_buffer(Py_buffer *view)
{
    char *fmt = view->format;
Christoph Groth's avatar
Christoph Groth committed
90
    Dtype dtype = NONE;
Michael Wimmer's avatar
Michael Wimmer committed
91

Christoph Groth's avatar
Christoph Groth committed
92
93
    // Currently, we only understand native endianness and alignment.
    if (*fmt == '@') fmt++;
Michael Wimmer's avatar
Michael Wimmer committed
94

Christoph Groth's avatar
Christoph Groth committed
95
96
    if (strchr("cbB?hHiIlL", *fmt)) {
        dtype = LONG;
Michael Wimmer's avatar
Michael Wimmer committed
97
        fmt++;
Christoph Groth's avatar
Christoph Groth committed
98
99
    } else if (strchr("fdg", *fmt)) {
        dtype = DOUBLE;
Michael Wimmer's avatar
Michael Wimmer committed
100
        fmt++;
Christoph Groth's avatar
Christoph Groth committed
101
    } else if (*fmt == 'Z') {
Michael Wimmer's avatar
Michael Wimmer committed
102
        fmt++;
Christoph Groth's avatar
Christoph Groth committed
103
104
        if (strchr("fdg", *fmt)) {
            dtype = COMPLEX;
Michael Wimmer's avatar
Michael Wimmer committed
105
106
107
108
        }
        fmt++;
    }

Christoph Groth's avatar
Christoph Groth committed
109
110
111
    // Right now, no composite data structures are supported; if we found a
    // single supported data type, we should be a the end of the string.
    if (*fmt != '\0') return NONE;
Michael Wimmer's avatar
Michael Wimmer committed
112
113
114
115

    return dtype;
}

Christoph Groth's avatar
Christoph Groth committed
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
template<typename O, typename I>
PyObject *convert_array(PyObject *in_, int ndim, size_t *shape)
{
    assert(Array<I>::check_exact(in_)); Array<I> *in = (Array<I>*)in_;
    size_t size;
    if (ndim == -1) {
        assert(shape == 0);
        in->ndim_shape(&ndim, &shape);
    } else {
#ifndef NDEBUG
        int in_ndim;
        size_t *in_shape;
        in->ndim_shape(&in_ndim, &in_shape);
        assert(shape);
        assert(calc_size(ndim, shape) == calc_size(in_ndim, in_shape));
#endif
    }
    Array<O> *out = Array<O>::make(ndim, shape, &size);
    I *src = in->data();
    O *dest = out->data();
    for (size_t i = 0; i < size; ++i) dest[i] = src[i];
    return (PyObject*)out;
}

typedef PyObject *Convert_array(PyObject*, int, size_t*);

Convert_array *convert_array_dtable[][3] = {
    {convert_array<long, long>,
     convert_array<long, double>,
     0},
    {convert_array<double, long>,
     convert_array<double, double>,
     0},
    {convert_array<Complex, long>,
     convert_array<Complex, double>,
     convert_array<Complex, Complex>}
};

PyObject *convert_array(Dtype dtype_out, PyObject *in, Dtype dtype_in,
                        int ndim = -1, size_t *shape = 0)
{
    if (dtype_in == NONE)
        dtype_in = get_dtype(in);
    assert(get_dtype(in) == get_dtype(in));
    Convert_array *func = convert_array_dtable[int(dtype_out)][int(dtype_in)];
    if (!func) {
        PyErr_Format(PyExc_TypeError, "Cannot convert %s to %s.",
                     dtype_names[int(dtype_in)], dtype_names[int(dtype_out)]);
        return 0;
    }
    return func(in, ndim, shape);
}

const char *seq_err_msg =
    "A sequence does not support sequence protocol - "
    "this is probably due to a bug in numpy for 0-d arrays.";

// This function determines the shape of an array like sequence (or sequence of
// sequences or number) given to it as first parameter.  `dtype_guess' is the
// dtype of the first element of the sequence.
//
// All four arguments after the first one are written to. `shape' and `seqs'
// must have space for at least `max_ndim' elements.
//
// After successful execution `seqs' will contain `ndim' new references
// returned by PySequence_Fast.
int examine_arraylike(PyObject *arraylike, int *ndim, size_t *shape,
                      PyObject **seqs, Dtype *dtype_guess)
{
    PyObject *p = arraylike;
    int d = -1;
    while (true) {
        if (PySequence_Check(p)) {
            ++d;
            if (d == ptrdiff_t(max_ndim)) {
                // Strings are, in a way, infinitely nested sequences because
                // the first element of a string is a again a string.
                if (PyString_Check(p))
                    PyErr_SetString(PyExc_TypeError, "Expecting a number.");
                else
                    PyErr_SetString(PyExc_ValueError, "Too many dimensions.");
                --d;
                goto fail;
            }
        } else {
            // We are in the innermost sequence.  Determine the dtype if
            // requested.
            if (dtype_guess) {
                *dtype_guess = dtype_of_scalar(p);
                if (*dtype_guess == NONE) {
                    PyErr_SetString(PyExc_TypeError, "Expecting a number.");
                    goto fail;
                }
            }
            break;
        }

        // See http://projects.scipy.org/numpy/ticket/2199.
        seqs[d] = PySequence_Fast(p, seq_err_msg);
        if (!seqs[d]) {--d; goto fail;}

        if ((shape[d] = PySequence_Fast_GET_SIZE(seqs[d]))) {
            p = *PySequence_Fast_ITEMS(seqs[d]);
        } else {
            // We are in the innermost sequence which is empty.
            if (dtype_guess) *dtype_guess = NONE;
            break;
        }
    }
    *ndim = d + 1;
    return 0;

fail:
    for (; d >= 0; --d) Py_DECREF(seqs[d]);
    return -1;
}

// This function is designed to be run after examine_arraylike.  It takes care
// of releasing the references passed to it in seqs.
template <typename T>
int readin_arraylike(T *dest, int ndim, const size_t *shape,
                     PyObject *arraylike, PyObject **seqs, bool exact)
{
    if (ndim == 0) {
        T value;
        if (exact)
            value = number_from_pyobject_exact<T>(arraylike);
        else
            value = number_from_pyobject<T>(arraylike);
        if (value == T(-1) && PyErr_Occurred()) return -1;
        *dest++ = value;
        return 0;
    }

    // seqs is the stack of sequences being processed, all returned by
    // PySequence_Fast.  ps[d] and es[d] are the begin and end of the elements
    // of seqs[d - 1].
    PyObject **ps[max_ndim], **es[max_ndim];
    es[0] = ps[0] = 0;

    for (int d = 1; d < ndim; ++d) {
        PyObject **p = PySequence_Fast_ITEMS(seqs[d - 1]);
        ps[d] = p + 1;
        es[d] = p + shape[d - 1];
    }

    int d = ndim - 1;
    size_t len = shape[d];
    PyObject **p = PySequence_Fast_ITEMS(seqs[d]), **e = p + len;
    while (true) {
        if (len && PySequence_Check(p[0])) {
            if (d + 1 == ndim) {
                PyErr_SetString(PyExc_ValueError,
                                "Input has irregular nesting depth.");
                goto fail;
            }
            ++d;
            ps[d] = p;
            es[d] = e;
        } else {
            // Read-in a leaf sequence.
            while (p < e) {
                T value;
                if (exact)
                    value = number_from_pyobject_exact<T>(*p++);
                else
                    value = number_from_pyobject<T>(*p++);
                if (value == T(-1) && PyErr_Occurred()) goto fail;
                *dest++ = value;
            }
            Py_DECREF(seqs[d]);

            while (ps[d] == es[d]) {
                if (d == 0) {
                    // Success!
                    return 0;
                }
                --d;
                Py_DECREF(seqs[d]);
            }
            if (!PySequence_Check(*ps[d])) {
                --d;
                PyErr_SetString(PyExc_ValueError,
                                "Input has irregular nesting depth.");
                goto fail;
            }
        }

        // See http://projects.scipy.org/numpy/ticket/2199.
        seqs[d] = PySequence_Fast(*ps[d]++, seq_err_msg);
        if (!seqs[d]) {--d; goto fail;}
        len = PySequence_Fast_GET_SIZE(seqs[d]);

        // Verify that the length of the current sequence agrees with the
        // shape.
        if (len != shape[d]) {
            PyErr_SetString(PyExc_ValueError,
                            "Input has irregular shape.");
            goto fail;
        }

        p = PySequence_Fast_ITEMS(seqs[d]);
        e = p + len;
    }

fail:
    while (true) {
        Py_DECREF(seqs[d]);
        if (d == 0) break;
        --d;
    }
    return -1;
}

template <typename T>
PyObject *make_and_readin_array(PyObject *in, int ndim_in, int ndim_out,
                                const size_t *shape_out, PyObject **seqs,
                                bool exact)
{
    Array<T> *result = Array<T>::make(ndim_out, shape_out);
    assert(ndim_out >= ndim_in);
#ifndef NDEBUG
    for (int d = 0, e = ndim_out - ndim_in; d < e; ++d)
        assert(shape_out[d] == 1);
#endif
    if (result == 0) return 0;
    if (readin_arraylike<T>(result->data(), ndim_in,
                            shape_out + ndim_out - ndim_in, in, seqs, exact)
        == -1) {
        Py_DECREF(result);
        return 0;
    }
    return (PyObject*)result;
}

PyObject *(*make_and_readin_array_dtable[])(
    PyObject*, int, int, const size_t*, PyObject**, bool) =
    DTYPE_DISPATCH(make_and_readin_array);

Michael Wimmer's avatar
Michael Wimmer committed
355
356
int examine_buffer(PyObject *in, Py_buffer *view, Dtype *dtype)
{
Christoph Groth's avatar
Christoph Groth committed
357
    Dtype dt = NONE;
Michael Wimmer's avatar
Michael Wimmer committed
358
    memset(view, 0, sizeof(Py_buffer));
Christoph Groth's avatar
Christoph Groth committed
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
    if (!PyObject_CheckBuffer(in)) return -1;

    // I don't know if the following makes much sense: I try to get the buffer
    // using less and less demanding flags. NumPy does the same.
    if (PyObject_GetBuffer(in, view, PyBUF_ND | PyBUF_FORMAT) == 0)
        dt = dtype_of_buffer(view);
    else if (PyObject_GetBuffer(in, view, PyBUF_STRIDES | PyBUF_FORMAT) == 0)
        dt = dtype_of_buffer(view);
    else if (PyObject_GetBuffer(in, view, PyBUF_FULL_RO) == 0)
        dt = dtype_of_buffer(view);
    PyErr_Clear();

    // Check if the buffer can actually be converted into one of our
    // formats.
    if (dt == NONE) return -1;

    if (dtype) *dtype = dt;
    return 0;
Michael Wimmer's avatar
Michael Wimmer committed
377
378
379
380
381
382
}

template<typename T>
T (*get_buffer_converter_complex(const char *fmt))(const void *);

template<>
Christoph Groth's avatar
Christoph Groth committed
383
long (*get_buffer_converter_complex(const char *))(const void *)
Michael Wimmer's avatar
Michael Wimmer committed
384
{
Christoph Groth's avatar
Christoph Groth committed
385
    // Complex can only be cast to complex.
Michael Wimmer's avatar
Michael Wimmer committed
386
387
388
389
390
391
    PyErr_Format(PyExc_TypeError, "Complex cannot be cast to int.");

    return 0;
}

template<>
Christoph Groth's avatar
Christoph Groth committed
392
double (*get_buffer_converter_complex(const char *))(const void *)
Michael Wimmer's avatar
Michael Wimmer committed
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
{
    // complex can only be cast to complex
    PyErr_Format(PyExc_TypeError, "Complex cannot be cast to float.");

    return 0;
}

template<>
Complex (*get_buffer_converter_complex(const char *fmt))(const void *)
{
    switch(*(fmt + 1)){
    case 'f':
        return number_from_ptr<Complex,  std::complex<float> >;
    case 'd':
        return number_from_ptr<Complex, std::complex<double> >;
    case 'g':
        return number_from_ptr<Complex, std::complex<long double> >;
    }

    return 0;
}

template<typename T>
T (*get_buffer_converter(Py_buffer *view))(const void *)
{
    // currently, we only understand native endianness and alignment
    char *fmt = view->format;

Christoph Groth's avatar
Christoph Groth committed
421
    if (*fmt == '@') {
Michael Wimmer's avatar
Michael Wimmer committed
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
        fmt++;
    }

    switch(*fmt) {
    case 'c':
        return number_from_ptr<T, char>;
    case 'b':
        return number_from_ptr<T, signed char>;
    case 'B':
        return number_from_ptr<T, unsigned char>;
    case '?':
        return number_from_ptr<T, bool>;
    case 'h':
        return number_from_ptr<T, short>;
    case 'H':
        return number_from_ptr<T, unsigned short>;
    case 'i':
        return number_from_ptr<T, int>;
    case 'I':
        return number_from_ptr<T, unsigned int>;
    case 'l':
        return number_from_ptr<T, long>;
    case 'L':
        return number_from_ptr<T, unsigned long>;
    case 'q':
        return number_from_ptr<T, long long>;
    case 'Q':
        return number_from_ptr<T, unsigned long long>;
    case 'f':
        return number_from_ptr<T, float>;
    case 'd':
        return number_from_ptr<T, double>;
    case 'g':
        return number_from_ptr<T, long double>;
    case 'Z':
        return get_buffer_converter_complex<T>(fmt);
    }

    return 0;
}

template<typename T>
Christoph Groth's avatar
Christoph Groth committed
464
int readin_buffer(T *dest, Py_buffer *view)
Michael Wimmer's avatar
Michael Wimmer committed
465
466
{
    T (*number_from_ptr)(const void *) = get_buffer_converter<T>(view);
Christoph Groth's avatar
Christoph Groth committed
467
    if (!number_from_ptr) return -1;
Michael Wimmer's avatar
Michael Wimmer committed
468

Christoph Groth's avatar
Christoph Groth committed
469
    if (view->ndim == 0) {
Michael Wimmer's avatar
Michael Wimmer committed
470
        *dest = (*number_from_ptr)(view->buf);
Christoph Groth's avatar
Christoph Groth committed
471
        if (PyErr_Occurred()) return -1;
Michael Wimmer's avatar
Michael Wimmer committed
472
473
474
        else return 0;
    }

Christoph Groth's avatar
Christoph Groth committed
475
476
    Py_ssize_t indices[max_ndim];
    for (int i = 0; i < view->ndim; i++) {
Michael Wimmer's avatar
Michael Wimmer committed
477
478
479
        indices[i] = 0;
    }

Christoph Groth's avatar
Christoph Groth committed
480
    if (view->suboffsets) {
Michael Wimmer's avatar
Michael Wimmer committed
481
482
483
484
485
486
487
488
489
490
        while(indices[0] < view->shape[0]) {
            char *pointer = (char*)view->buf;
            for (int i = 0; i < view->ndim; i++) {
                pointer += view->strides[i] * indices[i];
                if (view->suboffsets[i] >=0 ) {
                    pointer = *((char**)pointer) + view->suboffsets[i];
                }
            }

            *dest++ = (*number_from_ptr)(pointer);
Christoph Groth's avatar
Christoph Groth committed
491
            if (PyErr_Occurred()) return -1;
Michael Wimmer's avatar
Michael Wimmer committed
492
493
494

            indices[view->ndim-1]++;

Christoph Groth's avatar
Christoph Groth committed
495
496
            for (int i = view->ndim - 1; i > 0; i--) {
                if (indices[i] < view->shape[i]) break;
Michael Wimmer's avatar
Michael Wimmer committed
497
498
499
500
                indices[i-1]++;
                indices[i] = 0;
            }
        }
Christoph Groth's avatar
Christoph Groth committed
501
    } else if (view->strides) {
Michael Wimmer's avatar
Michael Wimmer committed
502
503
504
505
        char *ptr = (char *)view->buf;

        while(indices[0] < view->shape[0]) {
            *dest++ = (*number_from_ptr)(ptr);
Christoph Groth's avatar
Christoph Groth committed
506
            if (PyErr_Occurred()) return -1;
Michael Wimmer's avatar
Michael Wimmer committed
507
508
509
510

            indices[view->ndim-1] ++;
            ptr += view->strides[view->ndim-1];

Christoph Groth's avatar
Christoph Groth committed
511
512
            for (int i = view->ndim - 1; i > 0; i--) {
                if (indices[i] < view->shape[i]) break;
Michael Wimmer's avatar
Michael Wimmer committed
513
514
515
516
517
518
                indices[i-1]++;
                ptr += view->strides[i-1];
                indices[i] = 0;
                ptr -= view->strides[i] * view->shape[i];
            }
        }
Christoph Groth's avatar
Christoph Groth committed
519
520
    } else {
        // Must be C-contiguous.
Michael Wimmer's avatar
Michael Wimmer committed
521
522
523
524
        char *end = (char *)view->buf + view->len;
        char *p = (char *)view->buf;
        while(p < end) {
            *dest++ = (*number_from_ptr)(p);
Christoph Groth's avatar
Christoph Groth committed
525
            if (PyErr_Occurred()) return -1;
Michael Wimmer's avatar
Michael Wimmer committed
526
527
528
529
530
531
532
533
534
535

            p += view->itemsize;
        }
    }

    return 0;
}

template <typename T>
PyObject *make_and_readin_buffer(Py_buffer *view, int ndim_out,
Christoph Groth's avatar
Christoph Groth committed
536
                                 const size_t *shape_out)
Michael Wimmer's avatar
Michael Wimmer committed
537
538
{
    Array<T> *result = Array<T>::make(ndim_out, shape_out);
Christoph Groth's avatar
Christoph Groth committed
539
    assert(ndim_out >= view->ndim);
Michael Wimmer's avatar
Michael Wimmer committed
540
541
542
543
544
#ifndef NDEBUG
    for (int d = 0, e = ndim_out - view->ndim; d < e; ++d)
        assert(shape_out[d] == 1);
#endif
    if (result == 0) return 0;
Christoph Groth's avatar
Christoph Groth committed
545
    if (readin_buffer<T>(result->data(), view) == -1) {
Michael Wimmer's avatar
Michael Wimmer committed
546
547
548
549
550
551
552
        Py_DECREF(result);
        return 0;
    }
    return (PyObject*)result;
}

PyObject *(*make_and_readin_buffer_dtable[])(
Christoph Groth's avatar
Christoph Groth committed
553
    Py_buffer *, int, const size_t*) = DTYPE_DISPATCH(make_and_readin_buffer);
Michael Wimmer's avatar
Michael Wimmer committed
554

Christoph Groth's avatar
Christoph Groth committed
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
template <typename T>
PyObject *to_pystring(Array<T> *self, PyObject* to_str(PyObject *),
                      const char *header, const char *trailer,
                      const char *indent, const char *separator)
{
    int ndim;
    size_t *shape;
    self->ndim_shape(&ndim, &shape);

    std::ostringstream o;
    o << header;

    const T *p = self->data();
    if (ndim > 0) {
        int d = 0;
        size_t i[max_ndim];
        i[0] = shape[0];

        o << '[';
        while (true) {
            if (i[d]) {
                --i[d];
                if (d < ndim - 1) {
                    o << '[';
                    ++d;
                    i[d] = shape[d];
                } else {
                    PyObject *num = pyobject_from_number(*p++);
                    PyObject *str = to_str(num);
                    o << PyString_AsString(str);
                    Py_DECREF(str);
                    Py_DECREF(num);
                    if (i[d] > 0) o << separator << ' ';
                }
            } else {
                o << ']';
                if (d == 0) break;
                --d;
                if (i[d]) {
                    o << separator << "\n " << indent;
                    for (int i = 0; i < d; ++i) o << ' ';
                }
            }
        }
    } else {
        PyObject *num = pyobject_from_number(*p);
        PyObject *str = to_str(num);
        o << PyString_AsString(str);
        Py_DECREF(str);
        Py_DECREF(num);
    }
    o << trailer;

    return PyString_FromString(o.str().c_str());
}

template <typename T>
Michael Wimmer's avatar
Michael Wimmer committed
612
PyObject *repr(PyObject *obj)
Christoph Groth's avatar
Christoph Groth committed
613
{
Michael Wimmer's avatar
Michael Wimmer committed
614
    Array<T> *self = reinterpret_cast<Array<T> *>(obj);
Christoph Groth's avatar
Christoph Groth committed
615
616
617
618
    return to_pystring(self, PyObject_Repr, "array(", ")", "      ", ",");
}

template <typename T>
Michael Wimmer's avatar
Michael Wimmer committed
619
PyObject *str(PyObject *obj)
Christoph Groth's avatar
Christoph Groth committed
620
{
Michael Wimmer's avatar
Michael Wimmer committed
621
    Array<T> *self = reinterpret_cast<Array<T> *>(obj);
Christoph Groth's avatar
Christoph Groth committed
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
    return to_pystring(self, PyObject_Str, "", "", "", "");
}

Py_ssize_t len(Array_base *self)
{
    int ndim;
    size_t *shape;
    self->ndim_shape(&ndim, &shape);
    if (ndim == 0) {
        PyErr_SetString(PyExc_TypeError, "len() of unsized object.");
        return -1;
    }
    return shape[0];
}

Py_ssize_t index_from_key(int ndim, const size_t *shape, PyObject *key)
{
    long indices[max_ndim];
Michael Wimmer's avatar
Michael Wimmer committed
640
    int res = load_index_seq_as_long(key, indices, max_ndim);
Christoph Groth's avatar
Christoph Groth committed
641
642
643
644
    if (res == -1) {
        PyErr_SetString(PyExc_IndexError, "Invalid index.");
        return -1;
    }
Michael Wimmer's avatar
Michael Wimmer committed
645
    if (res != ndim) {
Christoph Groth's avatar
Christoph Groth committed
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
        PyErr_SetString(PyExc_IndexError, "Number of indices "
                        "must be equal to number of dimensions.");
        return -1;
    }

    int d = 0;
    Py_ssize_t s = shape[0];
    Py_ssize_t index = indices[0];
    if (index < 0) index += s;
    if (index < 0 || index >= s) goto out_of_range;
    for (d = 1; d < ndim; ++d) {
        s = shape[d];
        Py_ssize_t i = indices[d];
        if (i < 0) i += s;
        if (i < 0 || i >= s) goto out_of_range;
        index *= s;
        index += i;
    }
    return index;

out_of_range:
    PyErr_Format(PyExc_IndexError, "Index %ld out of range "
                 "(-%lu <= index < %lu) in dimension %d.",
                 indices[d], (unsigned long)s, (unsigned long)s, d);
    return -1;
}

template <typename T>
Michael Wimmer's avatar
Michael Wimmer committed
674
PyObject *getitem(PyObject *obj, PyObject *key)
Christoph Groth's avatar
Christoph Groth committed
675
{
Michael Wimmer's avatar
Michael Wimmer committed
676
677
    Array<T> *self = reinterpret_cast<Array<T> *>(obj);

Christoph Groth's avatar
Christoph Groth committed
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
    if (PySlice_Check(key)) {
        PyErr_SetString(PyExc_NotImplementedError,
                        "Slices are not implemented.");
        return 0;
    } else {
        int ndim;
        size_t *shape;
        self->ndim_shape(&ndim, &shape);
        T *data = self->data();
        Py_ssize_t index = index_from_key(ndim, shape, key);
        if (index == -1) return 0;
        return pyobject_from_number(data[index]);
    }
}

template <typename T>
Michael Wimmer's avatar
Michael Wimmer committed
694
PyObject *seq_getitem(PyObject *obj, Py_ssize_t index)
Christoph Groth's avatar
Christoph Groth committed
695
696
697
{
    int ndim;
    size_t *shape;
Michael Wimmer's avatar
Michael Wimmer committed
698
    Array<T> *self = reinterpret_cast<Array<T> *>(obj);
Christoph Groth's avatar
Christoph Groth committed
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
    self->ndim_shape(&ndim, &shape);
    assert(ndim != 0);

    if (index < 0) index += shape[0];
    if (size_t(index) >= shape[0]) {
        PyErr_SetString(PyExc_IndexError, "Invalid index.");
        return 0;
    }

    T *src = self->data();
    if (ndim == 1) {
        assert(index >= 0);
        assert(size_t(index) < shape[0]);
        return pyobject_from_number(src[index]);
    }

    assert(ndim > 1);
    size_t item_size;
    Array<T> *result = Array<T>::make(ndim - 1, shape + 1, &item_size);
    if (!result) return 0;
    src += index * item_size;
    T *dest = result->data();
    for (size_t i = 0; i < item_size; ++i) dest[i] = src[i];
    return (PyObject*)result;
}

template <typename T>
Michael Wimmer's avatar
Michael Wimmer committed
726
int getbuffer(PyObject *obj, Py_buffer *view, int flags)
Christoph Groth's avatar
Christoph Groth committed
727
728
729
{
    int ndim;
    size_t *shape, size;
Michael Wimmer's avatar
Michael Wimmer committed
730
    Array<T> *self = reinterpret_cast<Array<T> *>(obj);
Christoph Groth's avatar
Christoph Groth committed
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800

    assert(view);
    if ((flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) {
        PyErr_SetString(PyExc_BufferError,
                        "Tinyarrays are not Fortran contiguous.");
        goto fail;
    }
    // The following has been commented out as a workaround for Cython's
    // inability to deal with read-only buffers.
    //
    // if ((flags & PyBUF_WRITEABLE) == PyBUF_WRITEABLE) {
    //     PyErr_SetString(PyExc_BufferError, "Tinyarrays are not writeable");
    //     goto fail;
    // }

    self->ndim_shape(&ndim, &shape);
    size = calc_size(ndim, shape);

    view->buf = self->data();
    view->itemsize = sizeof(T);
    view->len = size * view->itemsize;
    view->readonly = 1;
    if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
        view->format = (char*)Array<T>::pyformat;
    else
        view->format = 0;
    if ((flags & PyBUF_ND) == PyBUF_ND) {
        view->ndim = ndim;
        view->shape = (Py_ssize_t*)shape;
        // From the documentation it's not clear whether it is allowed not to
        // set strides to NULL (for C continuous arrays), but it works, so we
        // do it.  However, there is a bug in current numpy
        // (http://projects.scipy.org/numpy/ticket/2197) which requires strides
        // if view->len == 0.  Because we don't have proper strides, we just
        // set strides to shape.  This dirty trick seems to work well -- no one
        // looks at strides when len == 0.
        if (size != 0)
            view->strides = 0;
        else
            view->strides = (Py_ssize_t*)shape;
    } else {
        view->ndim = 0;
        view->shape = 0;
        view->strides = 0;
    }
    view->internal = 0;
    view->suboffsets = 0;

    // Success.
    Py_INCREF(self);
    view->obj = (PyObject*)self;

    return 0;

fail:
    view->obj = 0;
    return -1;
}

// Given the same input, these hash functions return the same result as those
// in Python.  As tinyarrays compare equal to equivalent tuples it is important
// for the hashes to agree.  If not, there will be problems with dictionaries.

long hash(long x)
{
    return x != -1 ? x : -2;
}

long hash(double x)
{
Christoph Groth's avatar
Christoph Groth committed
801
    const double two_to_31st = 2147483648.0;
Christoph Groth's avatar
Christoph Groth committed
802
    double intpart, fractpart;
Christoph Groth's avatar
Christoph Groth committed
803

804
805
806
807
808
809
    if (x == std::numeric_limits<double>::infinity())
        return 314159;
    else if (x == -std::numeric_limits<double>::infinity())
        return -271828;
    else if (x != x)
        return 0;               // NaN
Christoph Groth's avatar
Christoph Groth committed
810

Christoph Groth's avatar
Christoph Groth committed
811
    fractpart = modf(x, &intpart);
Christoph Groth's avatar
Christoph Groth committed
812
813
814
815
816
817
818
819
820
821
822
823
    if (fractpart == 0) {
        // This must return the same hash as an equal int or long.

        if (intpart > std::numeric_limits<long>::max() ||
            intpart < std::numeric_limits<long>::min()) {
            // Convert to Python long and use its hash.
            PyObject *plong = PyLong_FromDouble(x);
            if (plong == NULL) return -1;
            long result = PyObject_Hash(plong);
            Py_DECREF(plong);
            return result;
        }
Christoph Groth's avatar
Christoph Groth committed
824
825
826
827
828
        return hash(long(intpart));
    }

    int expo;
    x = frexp(x, &expo) * two_to_31st;
Christoph Groth's avatar
Christoph Groth committed
829
830
831
    long hipart = x;                        // Take the top 32 bits.
    x = (x - double(hipart)) * two_to_31st; // Get the next 32 bits.
    return hash(hipart + long(x) + (expo << 15));
Christoph Groth's avatar
Christoph Groth committed
832
833
834
835
836
837
838
839
840
841
842
}

long hash(Complex x)
{
    // x.imag == 0  =>  hash(x.imag) == 0  =>  hash(x) == hash(x.real)
    return hash(x.real()) + 1000003L * hash(x.imag());
}

// This routine calculates the hash of a multi-dimensional array.  The hash is
// equal to that of an arrangement of nested tuples equivalent to the array.
template <typename T>
Michael Wimmer's avatar
Michael Wimmer committed
843
long hash(PyObject *obj)
Christoph Groth's avatar
Christoph Groth committed
844
845
846
{
    int ndim;
    size_t *shape;
Michael Wimmer's avatar
Michael Wimmer committed
847
    Array<T> *self = reinterpret_cast<Array<T> *>(obj);
Christoph Groth's avatar
Christoph Groth committed
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
    self->ndim_shape(&ndim, &shape);
    T *p = self->data();
    if (ndim == 0) return hash(*p);

    const long mult_init = 1000003, r_init = 0x345678;
    const long mul_addend = 82520, r_addend = 97531;
    Py_ssize_t i[max_ndim];
    long mult[max_ndim], r[max_ndim];
    --ndim;                     // For convenience.
    int d = 0;
    i[0] = shape[0];
    mult[0] = mult_init;
    r[0] = r_init;
    while (true) {
        if (i[d]) {
            --i[d];
            if (d == ndim) {
                r[d] = (r[d] ^ hash(*p++)) * mult[d];
                mult[d] += mul_addend + 2 * i[d];
            } else {
                ++d;
                i[d] = shape[d];
                mult[d] = mult_init;
                r[d] = r_init;
            }
        } else {
            if (d == 0) return hash(r[0] + r_addend);
            --d;
            r[d] = (r[d] ^ hash(r[d+1] + r_addend)) * mult[d];
            mult[d] += mul_addend + 2 * i[d];
        }
    }
}

template <typename T>
bool is_equal_data(PyObject *a_, PyObject *b_, size_t size)
{
    assert(Array<T>::check_exact(a_)); Array<T> *a = (Array<T>*)a_;
    assert(Array<T>::check_exact(b_)); Array<T> *b = (Array<T>*)b_;
    T *data_a = a->data();
    T *data_b = b->data();
    for (size_t i = 0; i < size; ++i)
        if (data_a[i] != data_b[i]) return false;
    return true;
}

bool (*is_equal_data_dtable[])(PyObject*, PyObject*, size_t) =
    DTYPE_DISPATCH(is_equal_data);

PyObject *richcompare(PyObject *a, PyObject *b, int op)
{
    if (op != Py_EQ && op != Py_NE) {
        Py_INCREF(Py_NotImplemented);
        return Py_NotImplemented;
    }

    bool equal = (a == b);
    if (equal) goto done;

    Dtype dtype;
    if (coerce_to_arrays(&a, &b, &dtype) < 0) return 0;

    int ndim_a, ndim_b;
    size_t *shape_a, *shape_b;
    reinterpret_cast<Array_base*>(a)->ndim_shape(&ndim_a, &shape_a);
    reinterpret_cast<Array_base*>(b)->ndim_shape(&ndim_b, &shape_b);
    if (ndim_a != ndim_b) goto decref_then_done;
    for (int d = 0; d < ndim_a; ++d)
        if (shape_a[d] != shape_b[d]) goto decref_then_done;
    equal = is_equal_data_dtable[int(dtype)](a, b, calc_size(ndim_a, shape_a));

decref_then_done:
    Py_DECREF(a);
    Py_DECREF(b);

done:
    PyObject *result = ((op == Py_EQ) == equal) ? Py_True : Py_False;
    Py_INCREF(result);
    return result;
}

PyObject *get_dtype_py(PyObject *self, void *)
{
    static PyObject *dtypes[] = {
        (PyObject*)&PyInt_Type,
        (PyObject*)&PyFloat_Type,
        (PyObject*)&PyComplex_Type
    };
    int dtype = int(get_dtype(self));
    assert(dtype < int(NONE));
    return dtypes[dtype];
}

PyObject *get_ndim(Array_base *self, void *)
{
    int ndim;
    self->ndim_shape(&ndim, 0);
    return PyLong_FromLong(ndim);
}

PyObject *get_size(Array_base *self, void *)
{
    int ndim;
    size_t *shape;
    self->ndim_shape(&ndim, &shape);
    return PyLong_FromSize_t(calc_size(ndim, shape));
}

PyObject *get_shape(Array_base *self, void *)
{
    int ndim;
    size_t *shape;
    self->ndim_shape(&ndim, &shape);
    size_t result_shape = ndim;
    Array<long> *result = Array<long>::make(1, &result_shape);
    if (!result) return 0;
    long *data = result->data();
    for (int d = 0; d < ndim; ++d) data[d] = shape[d];
    return (PyObject*)result;
}

PyGetSetDef getset[] = {
    {(char*)"dtype", get_dtype_py, 0, 0, 0},
    {(char*)"ndim", (getter)get_ndim, 0, 0, 0},
    {(char*)"size", (getter)get_size, 0, 0, 0},
    {(char*)"shape", (getter)get_shape, 0, 0, 0},
    {0, 0, 0, 0, 0}               // Sentinel
};

// **************** Iterator ****************

template <typename T>
class Array_iter {
public:
    static Array_iter *make(Array<T> *array);
    static void dealloc(Array_iter<T> *self);
    static PyObject *next(Array_iter<T> *self);
    static PyObject *len(Array_iter<T> *self);
private:
    static PyMethodDef methods[];
    static PyTypeObject pytype;
    static const char *pyname;
    PyObject ob_base;
    size_t index;
    Array<T> *array;            // Set to 0 when iterator is exhausted.
};

template <>
const char *Array_iter<long>::pyname = "tinyarray.ndarrayiter_int";
template <>
const char *Array_iter<double>::pyname = "tinyarray.ndarrayiter_float";
template <>
const char *Array_iter<Complex>::pyname = "tinyarray.ndarrayiter_complex";
For faster browsing, not all history is shown. View entire blame