Commit e937ddbc authored by Christoph Groth's avatar Christoph Groth
Browse files

restore compatibility with tuple hash from Python 3.8

parent efbbc9c3
......@@ -850,8 +850,19 @@ Py_hash_t hash(Complex x)
return hash(x.real()) + HASH_IMAG * hash(x.imag());
}
// This routine calculates the hash of a multi-dimensional array. The hash is
// equal to that of an arrangement of nested tuples equivalent to the array.
// The following routine calculates the hash of a multi-dimensional array. The
// hash is equal to that of an arrangement of nested tuples equivalent to the
// array.
//
// It exists in two versions because Python's tuplehash has changed in Python
// 3.8 with the following motivation: "The hash function for tuples is now
// based on xxHash which gives better collision results on (formerly)
// pathological cases. Additionally, on 64-bit systems it improves tuple hashes
// in general."
#if (PY_MAJOR_VERSION < 3 || PY_MINOR_VERSION < 8) && PY_MAJOR_VERSION < 4
// Version for Python < 3.8
template <typename T>
Py_hash_t hash(PyObject *obj)
{
......@@ -875,16 +886,20 @@ Py_hash_t hash(PyObject *obj)
if (i[d]) {
--i[d];
if (d == ndim) {
// Innermost loop body.
r[d] = (r[d] ^ hash(*p++)) * mult[d];
mult[d] += mul_addend + 2 * i[d];
} else {
// Entering a loop.
++d;
i[d] = shape[d];
mult[d] = mult_init;
r[d] = r_init;
}
} else {
// Exiting a loop.
if (d == 0) {
// Exiting the outermost loop.
Py_uhash_t r_next = r[0] + r_addend;
return r_next == Py_uhash_t(-1) ? -2 : r_next;
}
......@@ -897,6 +912,77 @@ Py_hash_t hash(PyObject *obj)
}
}
#else
#if SIZEOF_PY_UHASH_T > 4
const Py_uhash_t _hash_init = 2870177450012600261U;
inline void _hash_inner_loop(Py_uhash_t &acc, Py_uhash_t lane)
{
acc += lane * 14029467366897019727U;
acc = ((acc << 31) | (acc >> 33)); // Rotate left 31 bits.
acc *= 11400714785074694791U;
}
#else
const Py_uhash_t _hash_init = 374761393U;
inline void _hash_inner_loop(Py_uhash_t &acc, Py_uhash_t lane)
{
acc += lane * 2246822519U;
acc = ((acc << 13) | (acc >> 19)); // Rotate left 13 bits.
acc *= 2654435761U;
}
#endif
inline Py_uhash_t _hash_loop_end(Py_uhash_t acc, Py_uhash_t len)
{
acc += len ^ (_hash_init ^ 3527539UL);
if (acc == Py_uhash_t(-1)) return 1546275796;
return acc;
}
// Version for Python >= 3.8
template <typename T>
Py_hash_t hash(PyObject *obj)
{
int ndim;
size_t *shape;
Array<T> *self = reinterpret_cast<Array<T> *>(obj);
self->ndim_shape(&ndim, &shape);
T *p = self->data();
if (ndim == 0) return hash(*p);
Py_ssize_t i[max_ndim];
Py_uhash_t acc[max_ndim];
--ndim; // For convenience.
int d = 0;
i[0] = shape[0];
acc[0] = _hash_init;
// The following is equivalent to 'ndim' (the original value) nested loops.
while (true) {
if (i[d]) {
--i[d];
if (d == ndim) {
_hash_inner_loop(acc[d], hash(*p++));
} else {
++d;
i[d] = shape[d];
acc[d] = _hash_init;
}
} else {
if (d == 0) return _hash_loop_end(acc[0], shape[0]);
--d;
_hash_inner_loop(acc[d], _hash_loop_end(acc[d+1], shape[d+1]));
}
}
}
#endif
template <typename T>
bool compare_scalar(const int op, const T a, const T b) {
switch(op){
......
......@@ -11,7 +11,7 @@ import operator, warnings
import platform
import itertools as it
import tinyarray as ta
from pytest import raises, xfail
from pytest import raises
import numpy as np
from numpy.testing import assert_equal, assert_almost_equal
import sys
......@@ -264,10 +264,6 @@ def test_iteration():
def test_as_dict_key():
# TODO: remove this once gitlab issue 16 is closed
if sys.version_info >= (3, 8, 0):
xfail('New version of tuple hash not supported yet.')
n = 100
d = {}
for dtype in dtypes + dtypes:
......@@ -279,10 +275,6 @@ def test_as_dict_key():
def test_hash_equality():
# TODO: remove this once gitlab issue 16 is closed
if sys.version_info >= (3, 8, 0):
xfail('New version of tuple hash not supported yet.')
random.seed(123)
# These refer to the width of integers stored in a tinyarray.ndarray_int.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment