Commit e937ddbc authored by Christoph Groth's avatar Christoph Groth
Browse files

restore compatibility with tuple hash from Python 3.8

parent efbbc9c3
...@@ -850,8 +850,19 @@ Py_hash_t hash(Complex x) ...@@ -850,8 +850,19 @@ Py_hash_t hash(Complex x)
return hash(x.real()) + HASH_IMAG * hash(x.imag()); return hash(x.real()) + HASH_IMAG * hash(x.imag());
} }
// This routine calculates the hash of a multi-dimensional array. The hash is // The following routine calculates the hash of a multi-dimensional array. The
// equal to that of an arrangement of nested tuples equivalent to the array. // hash is equal to that of an arrangement of nested tuples equivalent to the
// array.
//
// It exists in two versions because Python's tuplehash has changed in Python
// 3.8 with the following motivation: "The hash function for tuples is now
// based on xxHash which gives better collision results on (formerly)
// pathological cases. Additionally, on 64-bit systems it improves tuple hashes
// in general."
#if (PY_MAJOR_VERSION < 3 || PY_MINOR_VERSION < 8) && PY_MAJOR_VERSION < 4
// Version for Python < 3.8
template <typename T> template <typename T>
Py_hash_t hash(PyObject *obj) Py_hash_t hash(PyObject *obj)
{ {
...@@ -875,16 +886,20 @@ Py_hash_t hash(PyObject *obj) ...@@ -875,16 +886,20 @@ Py_hash_t hash(PyObject *obj)
if (i[d]) { if (i[d]) {
--i[d]; --i[d];
if (d == ndim) { if (d == ndim) {
// Innermost loop body.
r[d] = (r[d] ^ hash(*p++)) * mult[d]; r[d] = (r[d] ^ hash(*p++)) * mult[d];
mult[d] += mul_addend + 2 * i[d]; mult[d] += mul_addend + 2 * i[d];
} else { } else {
// Entering a loop.
++d; ++d;
i[d] = shape[d]; i[d] = shape[d];
mult[d] = mult_init; mult[d] = mult_init;
r[d] = r_init; r[d] = r_init;
} }
} else { } else {
// Exiting a loop.
if (d == 0) { if (d == 0) {
// Exiting the outermost loop.
Py_uhash_t r_next = r[0] + r_addend; Py_uhash_t r_next = r[0] + r_addend;
return r_next == Py_uhash_t(-1) ? -2 : r_next; return r_next == Py_uhash_t(-1) ? -2 : r_next;
} }
...@@ -897,6 +912,77 @@ Py_hash_t hash(PyObject *obj) ...@@ -897,6 +912,77 @@ Py_hash_t hash(PyObject *obj)
} }
} }
#else
#if SIZEOF_PY_UHASH_T > 4
const Py_uhash_t _hash_init = 2870177450012600261U;
inline void _hash_inner_loop(Py_uhash_t &acc, Py_uhash_t lane)
{
acc += lane * 14029467366897019727U;
acc = ((acc << 31) | (acc >> 33)); // Rotate left 31 bits.
acc *= 11400714785074694791U;
}
#else
const Py_uhash_t _hash_init = 374761393U;
inline void _hash_inner_loop(Py_uhash_t &acc, Py_uhash_t lane)
{
acc += lane * 2246822519U;
acc = ((acc << 13) | (acc >> 19)); // Rotate left 13 bits.
acc *= 2654435761U;
}
#endif
inline Py_uhash_t _hash_loop_end(Py_uhash_t acc, Py_uhash_t len)
{
acc += len ^ (_hash_init ^ 3527539UL);
if (acc == Py_uhash_t(-1)) return 1546275796;
return acc;
}
// Version for Python >= 3.8
template <typename T>
Py_hash_t hash(PyObject *obj)
{
int ndim;
size_t *shape;
Array<T> *self = reinterpret_cast<Array<T> *>(obj);
self->ndim_shape(&ndim, &shape);
T *p = self->data();
if (ndim == 0) return hash(*p);
Py_ssize_t i[max_ndim];
Py_uhash_t acc[max_ndim];
--ndim; // For convenience.
int d = 0;
i[0] = shape[0];
acc[0] = _hash_init;
// The following is equivalent to 'ndim' (the original value) nested loops.
while (true) {
if (i[d]) {
--i[d];
if (d == ndim) {
_hash_inner_loop(acc[d], hash(*p++));
} else {
++d;
i[d] = shape[d];
acc[d] = _hash_init;
}
} else {
if (d == 0) return _hash_loop_end(acc[0], shape[0]);
--d;
_hash_inner_loop(acc[d], _hash_loop_end(acc[d+1], shape[d+1]));
}
}
}
#endif
template <typename T> template <typename T>
bool compare_scalar(const int op, const T a, const T b) { bool compare_scalar(const int op, const T a, const T b) {
switch(op){ switch(op){
......
...@@ -11,7 +11,7 @@ import operator, warnings ...@@ -11,7 +11,7 @@ import operator, warnings
import platform import platform
import itertools as it import itertools as it
import tinyarray as ta import tinyarray as ta
from pytest import raises, xfail from pytest import raises
import numpy as np import numpy as np
from numpy.testing import assert_equal, assert_almost_equal from numpy.testing import assert_equal, assert_almost_equal
import sys import sys
...@@ -264,10 +264,6 @@ def test_iteration(): ...@@ -264,10 +264,6 @@ def test_iteration():
def test_as_dict_key(): def test_as_dict_key():
# TODO: remove this once gitlab issue 16 is closed
if sys.version_info >= (3, 8, 0):
xfail('New version of tuple hash not supported yet.')
n = 100 n = 100
d = {} d = {}
for dtype in dtypes + dtypes: for dtype in dtypes + dtypes:
...@@ -279,10 +275,6 @@ def test_as_dict_key(): ...@@ -279,10 +275,6 @@ def test_as_dict_key():
def test_hash_equality(): def test_hash_equality():
# TODO: remove this once gitlab issue 16 is closed
if sys.version_info >= (3, 8, 0):
xfail('New version of tuple hash not supported yet.')
random.seed(123) random.seed(123)
# These refer to the width of integers stored in a tinyarray.ndarray_int. # These refer to the width of integers stored in a tinyarray.ndarray_int.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment