diff --git a/kwant/linalg/decomp_ev.py b/kwant/linalg/decomp_ev.py index 8e7b95d68b0eae6701f3fe38a624653a5aa67f68..bd92ce9335d2ba31edc4535f0a8a12a5828e2943 100644 --- a/kwant/linalg/decomp_ev.py +++ b/kwant/linalg/decomp_ev.py @@ -50,5 +50,18 @@ def gen_eig(a, b, left=False, right=True, overwrite_ab=False): The right eigenvector corresponding to the eigenvalue ``alpha[i]/beta[i]`` is the column ``vr[:,i]``. """ - a, b = lapack.prepare_for_lapack(overwrite_ab, a, b) - return lapack.ggev(a, b, left, right) + + ltype, a, b = lapack.prepare_for_lapack(overwrite_ab, a, b) + + if a.ndim != 2 or b.ndim != 2: + raise ValueError("gen_eig requires both a and be to be matrices") + + if a.shape[0] != a.shape[1]: + raise ValueError("gen_eig requires square matrix input") + + if b.shape[0] != a.shape[0] or b.shape[1] != a.shape[1]: + raise ValueError("gen_eig requires a and be to have the same shape") + + ggev = getattr(lapack, ltype + "ggev") + + return ggev(a, b, left, right) diff --git a/kwant/linalg/decomp_lu.py b/kwant/linalg/decomp_lu.py index 639b547bfdb082eb0e45df3180bb31ceeb67937f..79accc254feaa53206a9ea2f4a9d977a0d904138 100644 --- a/kwant/linalg/decomp_lu.py +++ b/kwant/linalg/decomp_lu.py @@ -45,8 +45,20 @@ def lu_factor(a, overwrite_a=False): singular : boolean Whether the matrix a is singular (up to machine precision) """ - a = lapack.prepare_for_lapack(overwrite_a, a) - return lapack.getrf(a) + + ltype, a = lapack.prepare_for_lapack(overwrite_a, a) + + if a.ndim != 2: + raise ValueError("lu_factor expects a matrix") + + if ltype == 'd': + return lapack.dgetrf(a) + elif ltype == 'z': + return lapack.zgetrf(a) + elif ltype == 's': + return lapack.sgetrf(a) + else: + return lapack.cgetrf(a) def lu_solve(matrix_factorization, b): @@ -71,9 +83,23 @@ def lu_solve(matrix_factorization, b): "a singular matrix. Result of solve step " "are probably unreliable") - lu, b = lapack.prepare_for_lapack(False, lu, b) + ltype, lu, b = lapack.prepare_for_lapack(False, lu, b) ipiv = np.ascontiguousarray(np.asanyarray(ipiv), dtype=lapack.int_dtype) - return lapack.getrs(lu, ipiv, b) + + if b.ndim > 2: + raise ValueError("lu_solve: b must be a vector or matrix") + + if lu.shape[0] != b.shape[0]: + raise ValueError("lu_solve: incompatible dimensions of b") + + if ltype == 'd': + return lapack.dgetrs(lu, ipiv, b) + elif ltype == 'z': + return lapack.zgetrs(lu, ipiv, b) + elif ltype == 's': + return lapack.sgetrs(lu, ipiv, b) + else: + return lapack.cgetrs(lu, ipiv, b) def rcond_from_lu(matrix_factorization, norm_a, norm="1"): @@ -101,6 +127,17 @@ def rcond_from_lu(matrix_factorization, norm_a, norm="1"): norm specified in norm """ (lu, ipiv, singular) = matrix_factorization + if not norm in ("1", "I"): + raise ValueError("norm in rcond_from_lu must be either '1' or 'I'") norm = norm.encode('utf8') # lapack expects bytes - lu = lapack.prepare_for_lapack(False, lu) - return lapack.gecon(lu, norm_a, norm) + + ltype, lu = lapack.prepare_for_lapack(False, lu) + + if ltype == 'd': + return lapack.dgecon(lu, norm_a, norm) + elif ltype == 'z': + return lapack.zgecon(lu, norm_a, norm) + elif ltype == 's': + return lapack.sgecon(lu, norm_a, norm) + else: + return lapack.cgecon(lu, norm_a, norm) diff --git a/kwant/linalg/decomp_schur.py b/kwant/linalg/decomp_schur.py index d65432132efd3084e26a25643dbc36b82f48b39c..4accc17ff0f1643c9532ada85a29dfa08a2b67fa 100644 --- a/kwant/linalg/decomp_schur.py +++ b/kwant/linalg/decomp_schur.py @@ -62,8 +62,18 @@ def schur(a, calc_q=True, calc_ev=True, overwrite_a=False): LinAlgError If the underlying QR iteration fails to converge. """ - a = lapack.prepare_for_lapack(overwrite_a, a) - return lapack.gees(a, calc_q, calc_ev) + + ltype, a = lapack.prepare_for_lapack(overwrite_a, a) + + if a.ndim != 2: + raise ValueError("Expect matrix as input") + + if a.shape[0] != a.shape[1]: + raise ValueError("Expect square matrix") + + gees = getattr(lapack, ltype + "gees") + + return gees(a, calc_q, calc_ev) def convert_r2c_schur(t, q): @@ -182,7 +192,9 @@ def order_schur(select, t, q, calc_ev=True, overwrite_tq=False): ``calc_ev == True`` """ - t, q = lapack.prepare_for_lapack(overwrite_tq, t, q) + ltype, t, q = lapack.prepare_for_lapack(overwrite_tq, t, q) + + trsen = getattr(lapack, ltype + "trsen") # Figure out if select is a function or array. isfun = isarray = True @@ -211,7 +223,7 @@ def order_schur(select, t, q, calc_ev=True, overwrite_tq=False): t, q = convert_r2c_schur(t, q) return order_schur(select, t, q, calc_ev, True) - return lapack.trsen(select, t, q, calc_ev) + return trsen(select, t, q, calc_ev) def evecs_from_schur(t, q, select=None, left=False, right=True, @@ -255,7 +267,13 @@ def evecs_from_schur(t, q, select=None, left=False, right=True, ``right == True``. """ - t, q = lapack.prepare_for_lapack(overwrite_tq, t, q) + ltype, t, q = lapack.prepare_for_lapack(overwrite_tq, t, q) + + if (t.shape[0] != t.shape[1] or q.shape[0] != q.shape[1] + or t.shape[0] != q.shape[0]): + raise ValueError("Invalid Schur decomposition as input") + + trevc = getattr(lapack, ltype + "trevc") # check if select is a function or an array if select is not None: @@ -282,7 +300,7 @@ def evecs_from_schur(t, q, select=None, left=False, right=True, else: selectarr = None - return lapack.trevc(t, q, selectarr, left, right) + return trevc(t, q, selectarr, left, right) def gen_schur(a, b, calc_q=True, calc_z=True, calc_ev=True, @@ -346,8 +364,21 @@ def gen_schur(a, b, calc_q=True, calc_z=True, calc_ev=True, LinAlError If the underlying QZ iteration fails to converge. """ - a, b = lapack.prepare_for_lapack(overwrite_ab, a, b) - return lapack.gges(a, b, calc_q, calc_z, calc_ev) + + ltype, a, b = lapack.prepare_for_lapack(overwrite_ab, a, b) + + if a.ndim != 2 or b.ndim != 2: + raise ValueError("Expect matrices as input") + + if a.shape[0] != a.shape[1]: + raise ValueError("Expect square matrix a") + + if a.shape[0] != b.shape[0] or a.shape[0] != b.shape[1]: + raise ValueError("Shape of b is incompatible to matrix a") + + gges = getattr(lapack, ltype + "gges") + + return gges(a, b, calc_q, calc_z, calc_ev) def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True, @@ -408,8 +439,22 @@ def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True, LinAlError If the problem is too ill-conditioned. """ - s, t, q, z = lapack.prepare_for_lapack(overwrite_stqz, s, t, q, z) + ltype, s, t, q, z = lapack.prepare_for_lapack(overwrite_stqz, s, t, q, z) + if (s.ndim != 2 or t.ndim != 2 or + (q is not None and q.ndim != 2) or + (z is not None and z.ndim != 2)): + raise ValueError("Expect matrices as input") + + if ((s.shape[0] != s.shape[1] or t.shape[0] != t.shape[1] or + s.shape[0] != t.shape[0]) or + (q is not None and (q.shape[0] != q.shape[1] or + s.shape[0] != q.shape[0])) or + (z is not None and (z.shape[0] != z.shape[1] or + s.shape[0] != z.shape[0]))): + raise ValueError("Invalid Schur decomposition as input") + + tgsen = getattr(lapack, ltype + "tgsen") # Figure out if select is a function or array. isfun = isarray = True @@ -447,7 +492,7 @@ def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True, return order_gen_schur(select, s, t, q, z, calc_ev, True) - return lapack.tgsen(select, s, t, q, z, calc_ev) + return tgsen(select, s, t, q, z, calc_ev) def convert_r2c_gen_schur(s, t, q=None, z=None): @@ -491,7 +536,7 @@ def convert_r2c_gen_schur(s, t, q=None, z=None): If it fails to convert a 2x2 block into complex form (unlikely). """ - s, t, q, z = lapack.prepare_for_lapack(True, s, t, q, z) + ltype, s, t, q, z = lapack.prepare_for_lapack(True, s, t, q, z) # Note: overwrite=True does not mean much here, the arrays are all copied if (s.ndim != 2 or t.ndim != 2 or @@ -611,7 +656,20 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None, """ - s, t, q, z = lapack.prepare_for_lapack(overwrite_qz, s, t, q, z) + ltype, s, t, q, z = lapack.prepare_for_lapack(overwrite_qz, s, t, q, z) + + if (s.ndim != 2 or t.ndim != 2 or + (q is not None and q.ndim != 2) or + (z is not None and z.ndim != 2)): + raise ValueError("Expect matrices as input") + + if ((s.shape[0] != s.shape[1] or t.shape[0] != t.shape[1] or + s.shape[0] != t.shape[0]) or + (q is not None and (q.shape[0] != q.shape[1] or + s.shape[0] != q.shape[0])) or + (z is not None and (z.shape[0] != z.shape[1] or + s.shape[0] != z.shape[0]))): + raise ValueError("Invalid Schur decomposition as input") if left and q is None: raise ValueError("Matrix q must be provided for left eigenvectors") @@ -619,6 +677,8 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None, if right and z is None: raise ValueError("Matrix z must be provided for right eigenvectors") + tgevc = getattr(lapack, ltype + "tgevc") + # Check if select is a function or an array. if select is not None: isfun = isarray = True @@ -644,4 +704,4 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None, else: selectarr = None - return lapack.tgevc(s, t, q, z, selectarr, left, right) + return tgevc(s, t, q, z, selectarr, left, right) diff --git a/kwant/linalg/f_lapack.pxd b/kwant/linalg/f_lapack.pxd new file mode 100644 index 0000000000000000000000000000000000000000..1a0304af871c6bb75994eb0668389e0d6ad2b54d --- /dev/null +++ b/kwant/linalg/f_lapack.pxd @@ -0,0 +1,217 @@ +# Copyright 2011-2013 Kwant authors. +# +# This file is part of Kwant. It is subject to the license terms in the file +# LICENSE.rst found in the top-level directory of this distribution and at +# http://kwant-project.org/license. A list of Kwant authors can be found in +# the file AUTHORS.rst at the top-level directory of this distribution and at +# http://kwant-project.org/authors. + +ctypedef int l_int +ctypedef int l_logical + +cdef extern: + void sgetrf_(l_int *, l_int *, float *, l_int *, l_int *, l_int *) + void dgetrf_(l_int *, l_int *, double *, l_int *, l_int *, l_int *) + void cgetrf_(l_int *, l_int *, float complex *, l_int *, l_int *, + l_int *) + void zgetrf_(l_int *, l_int *, double complex *, l_int *, l_int *, + l_int *) + + void sgetrs_(char *, l_int *, l_int *, float *, l_int *, l_int *, + float *, l_int *, l_int *) + void dgetrs_(char *, l_int *, l_int *, double *, l_int *, l_int *, + double *, l_int *, l_int *) + void cgetrs_(char *, l_int *, l_int *, float complex *, l_int *, + l_int *, float complex *, l_int *, l_int *) + void zgetrs_(char *, l_int *, l_int *, double complex *, l_int *, + l_int *, double complex *, l_int *, l_int *) + + void sgecon_(char *, l_int *, float *, l_int *, float *, float *, + float *, l_int *, l_int *) + void dgecon_(char *, l_int *, double *, l_int *, double *, double *, + double *, l_int *, l_int *) + void cgecon_(char *, l_int *, float complex *, l_int *, float *, + float *, float complex *, float *, l_int *) + void zgecon_(char *, l_int *, double complex *, l_int *, double *, + double *, double complex *, double *, l_int *) + + void sggev_(char *, char *, l_int *, float *, l_int *, float *, l_int *, + float *, float *, float *, float *, l_int *, float *, l_int *, + float *, l_int *, l_int *) + void dggev_(char *, char *, l_int *, double *, l_int *, double *, l_int *, + double *, double *, double *, double *, l_int *, + double *, l_int *, double *, l_int *, l_int *) + void cggev_(char *, char *, l_int *, float complex *, l_int *, + float complex *, l_int *, float complex *, float complex *, + float complex *, l_int *, float complex *, l_int *, + float complex *, l_int *, float *, l_int *) + void zggev_(char *, char *, l_int *, double complex *, l_int *, + double complex *, l_int *, double complex *, + double complex *, double complex *, l_int *, + double complex *, l_int *, double complex *, l_int *, + double *, l_int *) + + void sgees_(char *, char *, l_logical (*)(float *, float *), + l_int *, float *, l_int *, l_int *, + float *, float *, float *, l_int *, + float *, l_int *, l_logical *, l_int *) + void dgees_(char *, char *, l_logical (*)(double *, double *), + l_int *, double *, l_int *, l_int *, + double *, double *, double *, l_int *, + double *, l_int *, l_logical *, l_int *) + void cgees_(char *, char *, + l_logical (*)(float complex *), + l_int *, float complex *, + l_int *, l_int *, float complex *, + float complex *, l_int *, + float complex *, l_int *, float *, + l_logical *, l_int *) + void zgees_(char *, char *, + l_logical (*)(double complex *), + l_int *, double complex *, + l_int *, l_int *, double complex *, + double complex *, l_int *, + double complex *, l_int *, + double *, l_logical *, l_int *) + + void strsen_(char *, char *, l_logical *, l_int *, + float *, l_int *, float *, + l_int *, float *, float *, l_int *, + float *, float *, float *, l_int *, + l_int *, l_int *, l_int *) + void dtrsen_(char *, char *, l_logical *, + l_int *, double *, l_int *, + double *, l_int *, double *, double *, + l_int *, double *, double *, double *, + l_int *, l_int *, l_int *, l_int *) + void ctrsen_(char *, char *, l_logical *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, float complex *, l_int *, + float *, float *, float complex *, + l_int *, l_int *) + void ztrsen_(char *, char *, l_logical *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, double complex *, l_int *, + double *, double *, double complex *, + l_int *, l_int *) + + void strevc_(char *, char *, l_logical *, + l_int *, float *, l_int *, + float *, l_int *, float *, l_int *, + l_int *, l_int *, float *, l_int *) + void dtrevc_(char *, char *, l_logical *, + l_int *, double *, l_int *, + double *, l_int *, double *, + l_int *, l_int *, l_int *, double *, + l_int *) + void ctrevc_(char *, char *, l_logical *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, l_int *, l_int *, + float complex *, float *, l_int *) + void ztrevc_(char *, char *, l_logical *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, l_int *, l_int *, + double complex *, double *, l_int *) + + void sgges_(char *, char *, char *, + l_logical (*)(float *, float *, float *), + l_int *, float *, l_int *, float *, + l_int *, l_int *, float *, float *, + float *, float *, l_int *, float *, + l_int *, float *, l_int *, l_logical *, + l_int *) + void dgges_(char *, char *, char *, + l_logical (*)(double *, double *, double *), + l_int *, double *, l_int *, double *, + l_int *, l_int *, double *, double *, + double *, double *, l_int *, double *, + l_int *, double *, l_int *, + l_logical *, l_int *) + void cgges_(char *, char *, char *, + l_logical (*)(float complex *, float complex *), + l_int *, float complex *, + l_int *, float complex *, + l_int *, l_int *, float complex *, + float complex *, float complex *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, float *, l_logical *, l_int *) + void zgges_(char *, char *, char *, + l_logical (*)(double complex *, double complex *), + l_int *, double complex *, + l_int *, double complex *, + l_int *, l_int *, double complex *, + double complex *, + double complex *, l_int *, + double complex *, l_int *, + double complex *, l_int *, + double *, l_logical *, l_int *) + + void stgsen_(l_int *, l_logical *, + l_logical *, l_logical *, + l_int *, float *, l_int *, float *, + l_int *, float *, float *, float *, + float *, l_int *, float *, l_int *, + l_int *, float *, float *, float *, float *, + l_int *, l_int *, l_int *, l_int *) + void dtgsen_(l_int *, l_logical *, + l_logical *, l_logical *, + l_int *, double *, l_int *, + double *, l_int *, double *, double *, + double *, double *, l_int *, double *, + l_int *, l_int *, double *, double *, + double *, double *, l_int *, l_int *, + l_int *, l_int *) + void ctgsen_(l_int *, l_logical *, + l_logical *, l_logical *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, float complex *, + float complex *, + float complex *, l_int *, + float complex *, l_int *, l_int *, + float *, float *, float *, + float complex *, l_int *, l_int *, + l_int *, l_int *) + void ztgsen_(l_int *, l_logical *, + l_logical *, l_logical *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, double complex *, + double complex *, + double complex *, l_int *, + double complex *, l_int *, l_int *, + double *, double *, double *, + double complex *, l_int *, l_int *, + l_int *, l_int *) + + void stgevc_(char *, char *, l_logical *, + l_int *, float *, l_int *, + float *, l_int *, float *, + l_int *, float *, l_int *, + l_int *, l_int *, float *, l_int *) + void dtgevc_(char *, char *, l_logical *, + l_int *, double *, l_int *, + double *, l_int *, double *, + l_int *, double *, l_int *, + l_int *, l_int *, double *, l_int *) + void ctgevc_(char *, char *, l_logical *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, float complex *, + l_int *, l_int *, l_int *, + float complex *, float *, l_int *) + void ztgevc_(char *, char *, l_logical *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, double complex *, + l_int *, l_int *, l_int *, + double complex *, double *, l_int *) diff --git a/kwant/linalg/f_lapack.py b/kwant/linalg/f_lapack.py new file mode 100644 index 0000000000000000000000000000000000000000..39b702b5394dafb773744dfa1d0faa05c17a419c --- /dev/null +++ b/kwant/linalg/f_lapack.py @@ -0,0 +1,14 @@ +# Copyright 2011-2013 Kwant authors. +# +# This file is part of Kwant. It is subject to the license terms in the file +# LICENSE.rst found in the top-level directory of this distribution and at +# http://kwant-project.org/license. A list of Kwant authors can be found in +# the file AUTHORS.rst at the top-level directory of this distribution and at +# http://kwant-project.org/authors. + +__all__ = ['l_int_dtype', 'l_logical_dtype'] + +import numpy as np + +l_int_dtype = np.int32 +l_logical_dtype = np.int32 diff --git a/kwant/linalg/lapack.pyx b/kwant/linalg/lapack.pyx index e173bc0ce28e860f93ee58d36c725d020fd95c72..d6699a0e11ea912506d417d1e1ab59270c3270a3 100644 --- a/kwant/linalg/lapack.pyx +++ b/kwant/linalg/lapack.pyx @@ -1,4 +1,4 @@ -# Copyright 2011-2017 Kwant authors. +# Copyright 2011-2013 Kwant authors. # # This file is part of Kwant. It is subject to the license terms in the file # LICENSE.rst found in the top-level directory of this distribution and at @@ -8,53 +8,29 @@ """Low-level access to LAPACK functions. """ -__all__ = ['getrf', - 'getrs', - 'gecon', - 'ggev', - 'gees', - 'trsen', - 'trevc', - 'gges', - 'tgsen', - 'tgevc', +__all__ = ['sgetrf', 'dgetrf', 'cgetrf', 'zgetrf', + 'sgetrs', 'dgetrs', 'cgetrs', 'zgetrs', + 'sgecon', 'dgecon', 'cgecon', 'zgecon', + 'sggev', 'dggev', 'cggev', 'zggev', + 'sgees', 'dgees', 'cgees', 'zgees', + 'strsen', 'dtrsen', 'ctrsen', 'ztrsen', + 'strevc', 'dtrevc', 'ctrevc', 'ztrevc', + 'sgges', 'dgges', 'cgges', 'zgges', + 'stgsen', 'dtgsen', 'ctgsen', 'ztgsen', + 'stgevc', 'dtgevc', 'ctgevc', 'ztgevc', 'prepare_for_lapack'] import numpy as np cimport numpy as np -cimport scipy.linalg.cython_lapack as lapack +# Strangely absolute imports from kwant.linalg.f_lapack don't work here. So +# continue using traditional implicit relative imports. +from . import f_lapack +from . cimport f_lapack +from .f_lapack cimport l_int, l_logical -ctypedef int l_int -ctypedef bint l_logical - -int_dtype = np.int32 -logical_dtype = np.int32 - -ctypedef float complex float_complex -ctypedef double complex double_complex - -ctypedef fused scalar: - float - double - float complex - double complex - -ctypedef fused single_precision: - float - float complex - -ctypedef fused double_precision: - double - double complex - -ctypedef fused cmplx: - float complex - double complex - -ctypedef fused floating: - float - double +int_dtype = f_lapack.l_int_dtype +logical_dtype = f_lapack.l_logical_dtype # exceptions @@ -75,161 +51,284 @@ def assert_fortran_mat(*mats): raise ValueError("Input matrix must be Fortran contiguous") -cdef np.ndarray maybe_complex(scalar selector, - np.ndarray real, np.ndarray imag): - cdef np.ndarray r - r = real - if scalar in floating: - if imag.nonzero()[0].size: - r = real + 1j * imag - return r +# Wrappers for xGETRF +def sgetrf(np.ndarray[np.float32_t, ndim=2] A): + cdef l_int M, N, info + cdef np.ndarray[l_int, ndim=1] ipiv + + assert_fortran_mat(A) + + M = A.shape[0] + N = A.shape[1] + ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype) + + f_lapack.sgetrf_(&M, &N, <float *>A.data, &M, + <l_int *>ipiv.data, &info) + assert info >= 0, "Argument error in sgetrf" -cdef l_int lwork_from_qwork(scalar qwork): - if scalar in floating: - return <l_int>qwork - else: - return <l_int>qwork.real + return (A, ipiv, info > 0 or M != N) + +def dgetrf(np.ndarray[np.float64_t, ndim=2] A): + cdef l_int M, N, info + cdef np.ndarray[l_int, ndim=1] ipiv + + assert_fortran_mat(A) + + M = A.shape[0] + N = A.shape[1] + ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype) + + f_lapack.dgetrf_(&M, &N, <double *>A.data, &M, + <l_int *>ipiv.data, &info) + + assert info >= 0, "Argument error in dgetrf" + + return (A, ipiv, info > 0 or M != N) + +def cgetrf(np.ndarray[np.complex64_t, ndim=2] A): + cdef l_int M, N, info + cdef np.ndarray[l_int, ndim=1] ipiv + + assert_fortran_mat(A) + + M = A.shape[0] + N = A.shape[1] + ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype) + + f_lapack.cgetrf_(&M, &N, <float complex *>A.data, &M, + <l_int *>ipiv.data, &info) + + assert info >= 0, "Argument error in cgetrf" + return (A, ipiv, info > 0 or M != N) -def getrf(np.ndarray[scalar, ndim=2] A): +def zgetrf(np.ndarray[np.complex128_t, ndim=2] A): cdef l_int M, N, info - cdef np.ndarray[l_int] ipiv + cdef np.ndarray[l_int, ndim=1] ipiv assert_fortran_mat(A) M = A.shape[0] N = A.shape[1] - ipiv = np.empty(min(M,N), dtype = int_dtype) - - if scalar is float: - lapack.sgetrf(&M, &N, <float *>A.data, &M, - <l_int *>ipiv.data, &info) - elif scalar is double: - lapack.dgetrf(&M, &N, <double *>A.data, &M, - <l_int *>ipiv.data, &info) - elif scalar is float_complex: - lapack.cgetrf(&M, &N, <float complex *>A.data, &M, - <l_int *>ipiv.data, &info) - elif scalar is double_complex: - lapack.zgetrf(&M, &N, <double complex *>A.data, &M, - <l_int *>ipiv.data, &info) - - assert info >= 0, "Argument error in getrf" + ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype) + + f_lapack.zgetrf_(&M, &N, <double complex *>A.data, &M, + <l_int *>ipiv.data, &info) + + assert info >= 0, "Argument error in zgetrf" return (A, ipiv, info > 0 or M != N) +# Wrappers for xGETRS -def getrs(np.ndarray[scalar, ndim=2] LU, np.ndarray[l_int] IPIV, - np.ndarray B): +def sgetrs(np.ndarray[np.float32_t, ndim=2] LU, + np.ndarray[l_int, ndim=1] IPIV, B): cdef l_int N, NRHS, info + cdef np.ndarray b assert_fortran_mat(LU) - # Consistency checks for LU and B + # again: workaround for 1x1-Fortran bug in NumPy < v2.0 + if (not isinstance(B, np.ndarray) or + (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and + not B.flags["F_CONTIGUOUS"])): + raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array") + + b = B + N = LU.shape[0] + if b.ndim == 1: + NRHS = 1 + elif b.ndim == 2: + NRHS = B.shape[1] + else: + raise ValueError("In sgetrs: B must be a vector or matrix") + + f_lapack.sgetrs_("N", &N, &NRHS, <float *>LU.data, &N, + <l_int *>IPIV.data, <float *>b.data, &N, + &info) + + assert info == 0, "Argument error in sgetrs" + + return b - if B.descr.type_num != LU.descr.type_num: - raise TypeError('B must have same dtype as LU') +def dgetrs(np.ndarray[np.float64_t, ndim=2] LU, + np.ndarray[l_int, ndim=1] IPIV, B): + cdef l_int N, NRHS, info + cdef np.ndarray b + + assert_fortran_mat(LU) - # Workaround for 1x1-Fortran bug in NumPy < v2.0 - if ((B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and + # again: workaround for 1x1-Fortran bug in NumPy < v2.0 + if (not isinstance(B, np.ndarray) or + (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and not B.flags["F_CONTIGUOUS"])): - raise ValueError("B must be Fortran ordered") + raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array") + + b = B + N = LU.shape[0] + if b.ndim == 1: + NRHS = 1 + elif b.ndim == 2: + NRHS = b.shape[1] + else: + raise ValueError("In dgetrs: B must be a vector or matrix") + + f_lapack.dgetrs_("N", &N, &NRHS, <double *>LU.data, &N, + <l_int *>IPIV.data, <double *>b.data, &N, + &info) - if B.ndim > 2: - raise ValueError("B must be a vector or matrix") + assert info == 0, "Argument error in dgetrs" - if LU.shape[0] != B.shape[0]: - raise ValueError('LU and B have incompatible shapes') + return b +def cgetrs(np.ndarray[np.complex64_t, ndim=2] LU, + np.ndarray[l_int, ndim=1] IPIV, B): + cdef l_int N, NRHS, info + cdef np.ndarray b + + assert_fortran_mat(LU) + + # again: workaround for 1x1-Fortran bug in NumPy < v2.0 + if (not isinstance(B, np.ndarray) or + (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and + not B.flags["F_CONTIGUOUS"])): + raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array") + + b = B N = LU.shape[0] + if b.ndim == 1: + NRHS = 1 + elif b.ndim == 2: + NRHS = b.shape[1] + else: + raise ValueError("In cgetrs: B must be a vector or matrix") - if B.ndim == 1: + f_lapack.cgetrs_("N", &N, &NRHS, <float complex *>LU.data, &N, + <l_int *>IPIV.data, <float complex *>b.data, &N, + &info) + + assert info == 0, "Argument error in cgetrs" + + return b + +def zgetrs(np.ndarray[np.complex128_t, ndim=2] LU, + np.ndarray[l_int, ndim=1] IPIV, B): + cdef l_int N, NRHS, info + cdef np.ndarray b + + assert_fortran_mat(LU) + + # again: workaround for 1x1-Fortran bug in NumPy < v2.0 + if (not isinstance(B, np.ndarray) or + (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and + not B.flags["F_CONTIGUOUS"])): + raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array") + + b = B + N = LU.shape[0] + if b.ndim == 1: NRHS = 1 - elif B.ndim == 2: - NRHS = B.shape[1] + elif b.ndim == 2: + NRHS = b.shape[1] + else: + raise ValueError("In zgetrs: B must be a vector or matrix") + + f_lapack.zgetrs_("N", &N, &NRHS, <double complex *>LU.data, &N, + <l_int *>IPIV.data, <double complex *>b.data, &N, + &info) + + assert info == 0, "Argument error in zgetrs" + + return b + +# Wrappers for xGECON - if scalar is float: - lapack.sgetrs("N", &N, &NRHS, <float *>LU.data, &N, - <l_int *>IPIV.data, <float *>B.data, &N, - &info) - elif scalar is double: - lapack.dgetrs("N", &N, &NRHS, <double *>LU.data, &N, - <l_int *>IPIV.data, <double *>B.data, &N, - &info) - elif scalar is float_complex: - lapack.cgetrs("N", &N, &NRHS, <float complex *>LU.data, &N, - <l_int *>IPIV.data, <float complex *>B.data, &N, - &info) - elif scalar is double_complex: - lapack.zgetrs("N", &N, &NRHS, <double complex *>LU.data, &N, - <l_int *>IPIV.data, <double complex *>B.data, &N, - &info) - - assert info == 0, "Argument error in getrs" - - return B - - -def gecon(np.ndarray[scalar, ndim=2] LU, double normA, char *norm = b"1"): +def sgecon(np.ndarray[np.float32_t, ndim=2] LU, + float normA, char *norm = b"1"): cdef l_int N, info - cdef float srcond, snormA - cdef double drcond + cdef float rcond + cdef np.ndarray[np.float32_t, ndim=1] work + cdef np.ndarray[l_int, ndim=1] iwork - # Parameter checks + assert_fortran_mat(LU) + + N = LU.shape[0] + work = np.empty(4*N, dtype = np.float32) + iwork = np.empty(N, dtype = f_lapack.l_int_dtype) + + f_lapack.sgecon_(norm, &N, <float *>LU.data, &N, &normA, + &rcond, <float *>work.data, + <l_int *>iwork.data, &info) + + assert info == 0, "Argument error in sgecon" + + return rcond + +def dgecon(np.ndarray[np.float64_t, ndim=2] LU, + double normA, char *norm = b"1"): + cdef l_int N, info + cdef double rcond + cdef np.ndarray[np.float64_t, ndim=1] work + cdef np.ndarray[l_int, ndim=1] iwork assert_fortran_mat(LU) - if norm[0] != b"1" and norm[0] != b"I": - raise ValueError("'norm' must be either '1' or 'I'") - if scalar in single_precision: - snormA = normA - # Allocate workspaces + N = LU.shape[0] + work = np.empty(4*N, dtype = np.float64) + iwork = np.empty(N, dtype = f_lapack.l_int_dtype) + + f_lapack.dgecon_(norm, &N, <double *>LU.data, &N, &normA, + &rcond, <double *>work.data, + <l_int *>iwork.data, &info) + + assert info == 0, "Argument error in dgecon" + + return rcond + +def cgecon(np.ndarray[np.complex64_t, ndim=2] LU, + float normA, char *norm = b"1"): + cdef l_int N, info + cdef float rcond + cdef np.ndarray[np.complex64_t, ndim=1] work + cdef np.ndarray[np.float32_t, ndim=1] rwork + + assert_fortran_mat(LU) N = LU.shape[0] + work = np.empty(2*N, dtype = np.complex64) + rwork = np.empty(2*N, dtype = np.float32) - cdef np.ndarray[l_int] iwork - if scalar in floating: - iwork = np.empty(N, dtype=int_dtype) + f_lapack.cgecon_(norm, &N, <float complex *>LU.data, &N, &normA, + &rcond, <float complex *>work.data, + <float *>rwork.data, &info) - cdef np.ndarray[scalar] work - if scalar in floating: - work = np.empty(4 * N, dtype=LU.dtype) - else: - work = np.empty(2 * N, dtype=LU.dtype) + assert info == 0, "Argument error in cgecon" - cdef np.ndarray rwork - if scalar is float_complex: - rwork = np.empty(2 * N, dtype=np.float32) - elif scalar is double_complex: - rwork = np.empty(2 * N, dtype=np.float64) + return rcond - # The actual calculation +def zgecon(np.ndarray[np.complex128_t, ndim=2] LU, + double normA, char *norm = b"1"): + cdef l_int N, info + cdef double rcond + cdef np.ndarray[np.complex128_t, ndim=1] work + cdef np.ndarray[np.float64_t, ndim=1] rwork - if scalar is float: - lapack.sgecon(norm, &N, <float *>LU.data, &N, &snormA, - &srcond, <float *>work.data, - <l_int *>iwork.data, &info) - elif scalar is double: - lapack.dgecon(norm, &N, <double *>LU.data, &N, &normA, - &drcond, <double *>work.data, - <l_int *>iwork.data, &info) - elif scalar is float_complex: - lapack.cgecon(norm, &N, <float complex *>LU.data, &N, &snormA, - &srcond, <float complex *>work.data, - <float *>rwork.data, &info) - elif scalar is double_complex: - lapack.zgecon(norm, &N, <double complex *>LU.data, &N, &normA, - &drcond, <double complex *>work.data, - <double *>rwork.data, &info) + assert_fortran_mat(LU) - assert info == 0, "Argument error in gecon" + N = LU.shape[0] + work = np.empty(2*N, dtype = np.complex128) + rwork = np.empty(2*N, dtype = np.float64) - if scalar in single_precision: - return srcond - else: - return drcond + f_lapack.zgecon_(norm, &N, <double complex *>LU.data, &N, &normA, + &rcond, <double complex *>work.data, + <double *>rwork.data, &info) + assert info == 0, "Argument error in zgecon" + + return rcond + +# Wrappers for xGGEV # Helper function for xGGEV def ggev_postprocess(dtype, alphar, alphai, vl_r=None, vr_r=None): @@ -264,363 +363,707 @@ def ggev_postprocess(dtype, alphar, alphai, vl_r=None, vr_r=None): return (alpha, vl, vr) -def ggev(np.ndarray[scalar, ndim=2] A, np.ndarray[scalar, ndim=2] B, - left=False, right=True): +def sggev(np.ndarray[np.float32_t, ndim=2] A, + np.ndarray[np.float32_t, ndim=2] B, + left=False, right=True): cdef l_int N, info, lwork - - # Parameter checks + cdef char *jobvl + cdef char *jobvr + cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r + cdef float *vl_ptr + cdef float *vr_ptr + cdef float qwork + cdef np.ndarray[np.float32_t, ndim=1] work, alphar, alphai, beta assert_fortran_mat(A, B) - if A.ndim != 2 or A.ndim != 2: - raise ValueError("gen_eig requires both a and be to be matrices") + N = A.shape[0] + alphar = np.empty(N, dtype = np.float32) + alphai = np.empty(N, dtype = np.float32) + beta = np.empty(N, dtype = np.float32) + + if left: + vl_r = np.empty((N,N), dtype = np.float32, order='F') + vl_ptr = <float *>vl_r.data + jobvl = "V" + else: + vl_r = None + vl_ptr = NULL + jobvl = "N" + + if right: + vr_r = np.empty((N,N), dtype = np.float32, order='F') + vr_ptr = <float *>vr_r.data + jobvr = "V" + else: + vr_r = None + vr_ptr = NULL + jobvr = "N" + + # workspace query + lwork = -1 + + f_lapack.sggev_(jobvl, jobvr, &N, <float *>A.data, &N, + <float *>B.data, &N, + <float *>alphar.data, <float *> alphai.data, + <float *>beta.data, + vl_ptr, &N, vr_ptr, &N, + &qwork, &lwork, &info) + + assert info == 0, "Argument error in sggev" + + lwork = <l_int>qwork + work = np.empty(lwork, dtype = np.float32) - if A.shape[0] != A.shape[1]: - raise ValueError("gen_eig requires square matrix input") + # Now the real calculation + f_lapack.sggev_(jobvl, jobvr, &N, <float *>A.data, &N, + <float *>B.data, &N, + <float *>alphar.data, <float *> alphai.data, + <float *>beta.data, + vl_ptr, &N, vr_ptr, &N, + <float *>work.data, &lwork, &info) - if A.shape[0] != B.shape[0] or A.shape[1] != B.shape[1]: - raise ValueError("A and B do not have the same shape") + if info > 0: + raise LinAlgError("QZ iteration failed to converge in sggev") + + assert info == 0, "Argument error in sggev" + + alpha, vl, vr = ggev_postprocess(np.complex64, alphar, alphai, vl_r, vr_r) + + return filter_args((True, True, left, right), (alpha, beta, vl, vr)) + + +def dggev(np.ndarray[np.float64_t, ndim=2] A, + np.ndarray[np.float64_t, ndim=2] B, + left=False, right=True): + cdef l_int N, info, lwork + cdef char *jobvl + cdef char *jobvr + cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r + cdef double *vl_ptr + cdef double *vr_ptr + cdef double qwork + cdef np.ndarray[np.float64_t, ndim=1] work, alphar, alphai, beta - # Allocate workspaces + assert_fortran_mat(A, B) N = A.shape[0] + alphar = np.empty(N, dtype = np.float64) + alphai = np.empty(N, dtype = np.float64) + beta = np.empty(N, dtype = np.float64) + + if left: + vl_r = np.empty((N,N), dtype = np.float64, order='F') + vl_ptr = <double *>vl_r.data + jobvl = "V" + else: + vl_r = None + vl_ptr = NULL + jobvl = "N" - cdef np.ndarray[scalar] alphar, alphai - if scalar in cmplx: - alphar = np.empty(N, dtype=A.dtype) - alphai = None + if right: + vr_r = np.empty((N,N), dtype = np.float64, order='F') + vr_ptr = <double *>vr_r.data + jobvr = "V" else: - alphar = np.empty(N, dtype=A.dtype) - alphai = np.empty(N, dtype=A.dtype) + vr_r = None + vr_ptr = NULL + jobvr = "N" + + # workspace query + lwork = -1 + + f_lapack.dggev_(jobvl, jobvr, &N, <double *>A.data, &N, + <double *>B.data, &N, + <double *>alphar.data, <double *> alphai.data, + <double *>beta.data, + vl_ptr, &N, vr_ptr, &N, + &qwork, &lwork, &info) + + assert info == 0, "Argument error in dggev" + + lwork = <l_int>qwork + work = np.empty(lwork, dtype = np.float64) + + # Now the real calculation + f_lapack.dggev_(jobvl, jobvr, &N, <double *>A.data, &N, + <double *>B.data, &N, + <double *>alphar.data, <double *> alphai.data, + <double *>beta.data, + vl_ptr, &N, vr_ptr, &N, + <double *>work.data, &lwork, &info) + + if info > 0: + raise LinAlgError("QZ iteration failed to converge in dggev") + + assert info == 0, "Argument error in dggev" - cdef np.ndarray[scalar] beta = np.empty(N, dtype=A.dtype) + alpha, vl, vr = ggev_postprocess(np.complex128, alphar, alphai, vl_r, vr_r) + + return filter_args((True, True, left, right), (alpha, beta, vl, vr)) - cdef np.ndarray rwork = None - if scalar is float_complex: - rwork = np.empty(8 * N, dtype=np.float32) - elif scalar is double_complex: - rwork = np.empty(8 * N, dtype=np.float64) - cdef np.ndarray vl - cdef scalar *vl_ptr +def cggev(np.ndarray[np.complex64_t, ndim=2] A, + np.ndarray[np.complex64_t, ndim=2] B, + left=False, right=True): + cdef l_int N, info, lwork cdef char *jobvl + cdef char *jobvr + cdef np.ndarray[np.complex64_t, ndim=2] vl, vr + cdef float complex *vl_ptr + cdef float complex *vr_ptr + cdef float complex qwork + cdef np.ndarray[np.complex64_t, ndim=1] work, alpha, beta + cdef np.ndarray[np.float32_t, ndim=1] rwork + + assert_fortran_mat(A, B) + + N = A.shape[0] + alpha = np.empty(N, dtype = np.complex64) + beta = np.empty(N, dtype = np.complex64) + if left: - vl = np.empty((N,N), dtype=A.dtype, order='F') - vl_ptr = <scalar *>vl.data + vl = np.empty((N,N), dtype = np.complex64, order='F') + vl_ptr = <float complex *>vl.data jobvl = "V" else: - vl = None vl_ptr = NULL jobvl = "N" - cdef np.ndarray vr - cdef scalar *vr_ptr - cdef char *jobvr if right: - vr = np.empty((N,N), dtype=A.dtype, order='F') - vr_ptr = <scalar *>vr.data + vr = np.empty((N,N), dtype = np.complex64, order='F') + vr_ptr = <float complex *>vr.data jobvr = "V" else: - vr = None vr_ptr = NULL jobvr = "N" - # Workspace query - # Xggev expects &qwork as a <scalar *> (even though it's an integer) + rwork = np.empty(8 * N, dtype = np.float32) + + # workspace query lwork = -1 - cdef scalar qwork + work = np.empty(1, dtype = np.complex64) - if scalar is float: - lapack.sggev(jobvl, jobvr, &N, <float *>A.data, &N, - <float *>B.data, &N, - <float *>alphar.data, <float *> alphai.data, - <float *>beta.data, - vl_ptr, &N, vr_ptr, &N, - &qwork, &lwork, &info) - elif scalar is double: - lapack.dggev(jobvl, jobvr, &N, <double *>A.data, &N, - <double *>B.data, &N, - <double *>alphar.data, <double *> alphai.data, - <double *>beta.data, - vl_ptr, &N, vr_ptr, &N, - &qwork, &lwork, &info) - elif scalar is float_complex: - lapack.cggev(jobvl, jobvr, &N, <float complex *>A.data, &N, - <float complex *>B.data, &N, - <float complex *>alphar.data, <float complex *>beta.data, - vl_ptr, &N, vr_ptr, &N, - &qwork, &lwork, - <float *>rwork.data, &info) - elif scalar is double_complex: - lapack.zggev(jobvl, jobvr, &N, <double complex *>A.data, &N, - <double complex *>B.data, &N, - <double complex *>alphar.data, <double complex *>beta.data, - vl_ptr, &N, vr_ptr, &N, - &qwork, &lwork, - <double *>rwork.data, &info) + f_lapack.cggev_(jobvl, jobvr, &N, <float complex *>A.data, &N, + <float complex *>B.data, &N, + <float complex *>alpha.data, <float complex *>beta.data, + vl_ptr, &N, vr_ptr, &N, + &qwork, &lwork, + <float *>rwork.data, &info) - assert info == 0, "Argument error in ggev" + assert info == 0, "Argument error in cggev" - lwork = lwork_from_qwork(qwork) - cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype) + lwork = <l_int>qwork.real - # The actual calculation + work = np.empty(lwork, dtype = np.complex64) - if scalar is float: - lapack.sggev(jobvl, jobvr, &N, <float *>A.data, &N, - <float *>B.data, &N, - <float *>alphar.data, <float *> alphai.data, - <float *>beta.data, - vl_ptr, &N, vr_ptr, &N, - <float *>work.data, &lwork, &info) - elif scalar is double: - lapack.dggev(jobvl, jobvr, &N, <double *>A.data, &N, - <double *>B.data, &N, - <double *>alphar.data, <double *> alphai.data, - <double *>beta.data, - vl_ptr, &N, vr_ptr, &N, - <double *>work.data, &lwork, &info) - elif scalar is float_complex: - lapack.cggev(jobvl, jobvr, &N, <float complex *>A.data, &N, - <float complex *>B.data, &N, - <float complex *>alphar.data, <float complex *>beta.data, - vl_ptr, &N, vr_ptr, &N, - <float complex *>work.data, &lwork, - <float *>rwork.data, &info) - elif scalar is double_complex: - lapack.zggev(jobvl, jobvr, &N, <double complex *>A.data, &N, - <double complex *>B.data, &N, - <double complex *>alphar.data, <double complex *>beta.data, - vl_ptr, &N, vr_ptr, &N, - <double complex *>work.data, &lwork, - <double *>rwork.data, &info) + # Now the real calculation + f_lapack.cggev_(jobvl, jobvr, &N, <float complex *>A.data, &N, + <float complex *>B.data, &N, + <float complex *>alpha.data, <float complex *>beta.data, + vl_ptr, &N, vr_ptr, &N, + <float complex *>work.data, &lwork, + <float *>rwork.data, &info) if info > 0: - raise LinAlgError("QZ iteration failed to converge in sggev") + raise LinAlgError("QZ iteration failed to converge in cggev") - assert info == 0, "Argument error in ggev" + assert info == 0, "Argument error in cggev" - if scalar is float: - post_dtype = np.complex64 - elif scalar is double: - post_dtype = np.complex128 + return filter_args((True, True, left, right), (alpha, beta, vl, vr)) - cdef np.ndarray alpha - alpha = alphar - if scalar in floating: - alpha, vl, vr = ggev_postprocess(post_dtype, alphar, alphai, vl, vr) - return filter_args((True, True, left, right), (alpha, beta, vl, vr)) +def zggev(np.ndarray[np.complex128_t, ndim=2] A, + np.ndarray[np.complex128_t, ndim=2] B, + left=False, right=True): + cdef l_int N, info, lwork + cdef char *jobvl + cdef char *jobvr + cdef np.ndarray[np.complex128_t, ndim=2] vl, vr + cdef double complex *vl_ptr + cdef double complex *vr_ptr + cdef double complex qwork + cdef np.ndarray[np.complex128_t, ndim=1] work, alpha, beta + cdef np.ndarray[np.float64_t, ndim=1] rwork + + assert_fortran_mat(A, B) + N = A.shape[0] + alpha = np.empty(N, dtype = np.complex128) + beta = np.empty(N, dtype = np.complex128) -def gees(np.ndarray[scalar, ndim=2] A, calc_q=True, calc_ev=True): - cdef l_int N, lwork, sdim, info + if left: + vl = np.empty((N,N), dtype = np.complex128, order='F') + vl_ptr = <double complex *>vl.data + jobvl = "V" + else: + vl_ptr = NULL + jobvl = "N" - assert_fortran_mat(A) + if right: + vr = np.empty((N,N), dtype = np.complex128, order='F') + vr_ptr = <double complex *>vr.data + jobvr = "V" + else: + vr_ptr = NULL + jobvr = "N" - if A.ndim != 2: - raise ValueError("Expect matrix as input") + rwork = np.empty(8 * N, dtype = np.float64) - if A.shape[0] != A.shape[1]: - raise ValueError("Expect square matrix") + # workspace query + lwork = -1 + work = np.empty(1, dtype = np.complex128) - # Allocate workspaces + f_lapack.zggev_(jobvl, jobvr, &N, <double complex *>A.data, &N, + <double complex *>B.data, &N, + <double complex *>alpha.data, <double complex *>beta.data, + vl_ptr, &N, vr_ptr, &N, + &qwork, &lwork, + <double *>rwork.data, &info) - N = A.shape[0] + assert info == 0, "Argument error in zggev" - cdef np.ndarray[scalar] wr, wi - if scalar in cmplx: - wr = np.empty(N, dtype=A.dtype) - wi = None - else: - wr = np.empty(N, dtype=A.dtype) - wi = np.empty(N, dtype=A.dtype) + lwork = <l_int>qwork.real + work = np.empty(lwork, dtype = np.complex128) + + # Now the real calculation + f_lapack.zggev_(jobvl, jobvr, &N, <double complex *>A.data, &N, + <double complex *>B.data, &N, + <double complex *>alpha.data, <double complex *>beta.data, + vl_ptr, &N, vr_ptr, &N, + <double complex *>work.data, &lwork, + <double *>rwork.data, &info) + + if info > 0: + raise LinAlgError("QZ iteration failed to converge in zggev") - cdef np.ndarray rwork - if scalar is float_complex: - rwork = np.empty(N, dtype=np.float32) - elif scalar is double_complex: - rwork = np.empty(N, dtype=np.float64) + assert info == 0, "Argument error in zggev" + return filter_args((True, True, left, right), (alpha, beta, vl, vr)) + + +# Wrapper for xGEES +def sgees(np.ndarray[np.float32_t, ndim=2] A, + calc_q=True, calc_ev=True): + cdef l_int N, lwork, sdim, info cdef char *jobvs - cdef scalar *vs_ptr - cdef np.ndarray[scalar, ndim=2] vs + cdef float *vs_ptr + cdef float qwork + cdef np.ndarray[np.float32_t, ndim=2] vs + cdef np.ndarray[np.float32_t] wr, wi, work + + assert_fortran_mat(A) + + N = A.shape[0] + wr = np.empty(N, dtype = np.float32) + wi = np.empty(N, dtype = np.float32) + if calc_q: - vs = np.empty((N,N), dtype=A.dtype, order='F') - vs_ptr = <scalar *>vs.data + vs = np.empty((N,N), dtype = np.float32, order='F') + vs_ptr = <float *>vs.data jobvs = "V" else: - vs = None vs_ptr = NULL jobvs = "N" - # Workspace query - # Xgees expects &qwork as a <scalar *> (even though it's an integer) + # workspace query lwork = -1 - cdef scalar qwork - - if scalar is float: - lapack.sgees(jobvs, "N", NULL, &N, <float *>A.data, &N, - &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N, - &qwork, &lwork, NULL, &info) - elif scalar is double: - lapack.dgees(jobvs, "N", NULL, &N, <double *>A.data, &N, - &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N, - &qwork, &lwork, NULL, &info) - elif scalar is float_complex: - lapack.cgees(jobvs, "N", NULL, &N, <float complex *>A.data, &N, - &sdim, <float complex *>wr.data, vs_ptr, &N, - &qwork, &lwork, <float *>rwork.data, NULL, &info) - elif scalar is double_complex: - lapack.zgees(jobvs, "N", NULL, &N, <double complex *>A.data, &N, - &sdim, <double complex *>wr.data, vs_ptr, &N, - &qwork, &lwork, <double *>rwork.data, NULL, &info) + f_lapack.sgees_(jobvs, "N", NULL, &N, <float *>A.data, &N, + &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N, + &qwork, &lwork, NULL, &info) assert info == 0, "Argument error in sgees" - lwork = lwork_from_qwork(qwork) - cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype) - - # The actual calculation - - if scalar is float: - lapack.sgees(jobvs, "N", NULL, &N, <float *>A.data, &N, - &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N, - <float *>work.data, &lwork, NULL, &info) - elif scalar is double: - lapack.dgees(jobvs, "N", NULL, &N, <double *>A.data, &N, - &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N, - <double *>work.data, &lwork, NULL, &info) - elif scalar is float_complex: - lapack.cgees(jobvs, "N", NULL, &N, <float complex *>A.data, &N, - &sdim, <float complex *>wr.data, vs_ptr, &N, - <float complex *>work.data, &lwork, - <float *>rwork.data, NULL, &info) - elif scalar is double_complex: - lapack.zgees(jobvs, "N", NULL, &N, <double complex *>A.data, &N, - &sdim, <double complex *>wr.data, vs_ptr, &N, - <double complex *>work.data, &lwork, - <double *>rwork.data, NULL, &info) + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float32) + + # Now the real calculation + f_lapack.sgees_(jobvs, "N", NULL, &N, <float *>A.data, &N, + &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N, + <float *>work.data, &lwork, NULL, &info) if info > 0: - raise LinAlgError("QR iteration failed to converge in gees") + raise LinAlgError("QR iteration failed to converge in sgees") - assert info == 0, "Argument error in gees" + assert info == 0, "Argument error in sgees" - # Real inputs possibly produce complex output - cdef np.ndarray w = maybe_complex[scalar](0, wr, wi) + if wi.nonzero()[0].size: + w = wr + 1j * wi + else: + w = wr return filter_args((True, calc_q, calc_ev), (A, vs, w)) -def trsen(np.ndarray[l_logical] select, - np.ndarray[scalar, ndim=2] T, - np.ndarray[scalar, ndim=2] Q, - calc_ev=True): - cdef l_int N, M, lwork, liwork, qiwork, info - - assert_fortran_mat(T, Q) +def dgees(np.ndarray[np.float64_t, ndim=2] A, + calc_q=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvs + cdef double *vs_ptr + cdef double qwork + cdef np.ndarray[np.float64_t, ndim=2] vs + cdef np.ndarray[np.float64_t] wr, wi, work - # Allocate workspaces + assert_fortran_mat(A) - N = T.shape[0] + N = A.shape[0] + wr = np.empty(N, dtype = np.float64) + wi = np.empty(N, dtype = np.float64) - cdef np.ndarray[scalar] wr, wi - if scalar in cmplx: - wr = np.empty(N, dtype=T.dtype) - wi = None + if calc_q: + vs = np.empty((N,N), dtype = np.float64, order='F') + vs_ptr = <double *>vs.data + jobvs = "V" else: - wr = np.empty(N, dtype=T.dtype) - wi = np.empty(N, dtype=T.dtype) + vs_ptr = NULL + jobvs = "N" - cdef char *compq - cdef scalar *q_ptr - if Q is not None: - compq = "V" - q_ptr = <scalar *>Q.data - else: - compq = "N" - q_ptr = NULL + # workspace query + lwork = -1 + f_lapack.dgees_(jobvs, "N", NULL, &N, <double *>A.data, &N, + &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N, + &qwork, &lwork, NULL, &info) - # Workspace query - # Xtrsen expects &qwork as a <scalar *> (even though it's an integer) - cdef scalar qwork - lwork = liwork = -1 + assert info == 0, "Argument error in dgees" - if scalar is float: - lapack.strsen("N", compq, <l_logical *>select.data, - &N, <float *>T.data, &N, q_ptr, &N, - <float *>wr.data, <float *>wi.data, &M, NULL, NULL, - &qwork, &lwork, &qiwork, &liwork, &info) - elif scalar is double: - lapack.dtrsen("N", compq, <l_logical *>select.data, - &N, <double *>T.data, &N, q_ptr, &N, - <double *>wr.data, <double *>wi.data, &M, NULL, NULL, - &qwork, &lwork, &qiwork, &liwork, &info) - elif scalar is float_complex: - lapack.ctrsen("N", compq, <l_logical *>select.data, - &N, <float complex *>T.data, &N, q_ptr, &N, - <float complex *>wr.data, &M, NULL, NULL, - &qwork, &lwork, &info) - elif scalar is double_complex: - lapack.ztrsen("N", compq, <l_logical *>select.data, - &N, <double complex *>T.data, &N, q_ptr, &N, - <double complex *>wr.data, &M, NULL, NULL, - &qwork, &lwork, &info) - - assert info == 0, "Argument error in trsen" - - lwork = lwork_from_qwork(qwork) - cdef np.ndarray[scalar] work = np.empty(lwork, dtype=T.dtype) - - cdef np.ndarray[l_int] iwork = None - if scalar in floating: - liwork = qiwork - iwork = np.empty(liwork, dtype=int_dtype) - - # Tha actual calculation - - if scalar is float: - lapack.strsen("N", compq, <l_logical *>select.data, - &N, <float *>T.data, &N, q_ptr, &N, - <float *>wr.data, <float *>wi.data, &M, NULL, NULL, - <float *>work.data, &lwork, - <l_int *>iwork.data, &liwork, &info) - elif scalar is double: - lapack.dtrsen("N", compq, <l_logical *>select.data, - &N, <double *>T.data, &N, q_ptr, &N, - <double *>wr.data, <double *>wi.data, &M, NULL, NULL, - <double *>work.data, &lwork, - <l_int *>iwork.data, &liwork, &info) - elif scalar is float_complex: - lapack.ctrsen("N", compq, <l_logical *>select.data, - &N, <float complex *>T.data, &N, q_ptr, &N, - <float complex *>wr.data, &M, NULL, NULL, - <float complex *>work.data, &lwork, &info) - elif scalar is double_complex: - lapack.ztrsen("N", compq, <l_logical *>select.data, - &N, <double complex *>T.data, &N, q_ptr, &N, - <double complex *>wr.data, &M, NULL, NULL, - <double complex *>work.data, &lwork, &info) + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float64) + + # Now the real calculation + f_lapack.dgees_(jobvs, "N", NULL, &N, <double *>A.data, &N, + &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N, + <double *>work.data, &lwork, NULL, &info) if info > 0: - raise LinAlgError("Reordering failed; problem is very ill-conditioned") + raise LinAlgError("QR iteration failed to converge in dgees") - assert info == 0, "Argument error in trsen" + assert info == 0, "Argument error in dgees" - # Real inputs possibly produce complex output - cdef np.ndarray w = maybe_complex[scalar](0, wr, wi) + if wi.nonzero()[0].size: + w = wr + 1j * wi + else: + w = wr - return filter_args((True, Q is not None, calc_ev), (T, Q, w)) + return filter_args((True, calc_q, calc_ev), (A, vs, w)) -# Helper function for xTREVC and xTGEVC -def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select): - cdef int N, M, i, m, indx +def cgees(np.ndarray[np.complex64_t, ndim=2] A, + calc_q=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvs + cdef float complex *vs_ptr + cdef float complex qwork + cdef np.ndarray[np.complex64_t, ndim=2] vs + cdef np.ndarray[np.complex64_t] w, work + cdef np.ndarray[np.float32_t] rwork + + assert_fortran_mat(A) + + N = A.shape[0] + w = np.empty(N, dtype = np.complex64) + rwork = np.empty(N, dtype = np.float32) + + if calc_q: + vs = np.empty((N,N), dtype = np.complex64, order='F') + vs_ptr = <float complex *>vs.data + jobvs = "V" + else: + vs_ptr = NULL + jobvs = "N" + + # workspace query + lwork = -1 + f_lapack.cgees_(jobvs, "N", NULL, &N, <float complex *>A.data, &N, + &sdim, <float complex *>w.data, vs_ptr, &N, + &qwork, &lwork, <float *>rwork.data, NULL, &info) + + assert info == 0, "Argument error in cgees" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex64) + + # Now the real calculation + f_lapack.cgees_(jobvs, "N", NULL, &N, <float complex *>A.data, &N, + &sdim, <float complex *>w.data, vs_ptr, &N, + <float complex *>work.data, &lwork, + <float *>rwork.data, NULL, &info) + + if info > 0: + raise LinAlgError("QR iteration failed to converge in cgees") + + assert info == 0, "Argument error in cgees" + + return filter_args((True, calc_q, calc_ev), (A, vs, w)) + + +def zgees(np.ndarray[np.complex128_t, ndim=2] A, + calc_q=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvs + cdef double complex *vs_ptr + cdef double complex qwork + cdef np.ndarray[np.complex128_t, ndim=2] vs + cdef np.ndarray[np.complex128_t] w, work + cdef np.ndarray[np.float64_t] rwork + + assert_fortran_mat(A) + + N = A.shape[0] + w = np.empty(N, dtype = np.complex128) + rwork = np.empty(N, dtype = np.float64) + + if calc_q: + vs = np.empty((N,N), dtype = np.complex128, order='F') + vs_ptr = <double complex *>vs.data + jobvs = "V" + else: + vs_ptr = NULL + jobvs = "N" + + # workspace query + lwork = -1 + f_lapack.zgees_(jobvs, "N", NULL, &N, <double complex *>A.data, &N, + &sdim, <double complex *>w.data, vs_ptr, &N, + &qwork, &lwork, <double *>rwork.data, NULL, &info) + + assert info == 0, "Argument error in zgees" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex128) + + # Now the real calculation + f_lapack.zgees_(jobvs, "N", NULL, &N, <double complex *>A.data, &N, + &sdim, <double complex *>w.data, vs_ptr, &N, + <double complex *>work.data, &lwork, + <double *>rwork.data, NULL, &info) + + if info > 0: + raise LinAlgError("QR iteration failed to converge in zgees") + + assert info == 0, "Argument error in zgees" + + return filter_args((True, calc_q, calc_ev), (A, vs, w)) + +# Wrapper for xTRSEN +def strsen(np.ndarray[l_logical] select, + np.ndarray[np.float32_t, ndim=2] T, + np.ndarray[np.float32_t, ndim=2] Q=None, + calc_ev=True): + cdef l_int N, M, lwork, liwork, qiwork, info + cdef char *compq + cdef float qwork + cdef float *q_ptr + cdef np.ndarray[np.float32_t] wr, wi, work + cdef np.ndarray[l_int] iwork + + assert_fortran_mat(T, Q) + + N = T.shape[0] + wr = np.empty(N, dtype = np.float32) + wi = np.empty(N, dtype = np.float32) + + if Q is not None: + compq = "V" + q_ptr = <float *>Q.data + else: + compq = "N" + q_ptr = NULL + + # workspace query + lwork = liwork = -1 + f_lapack.strsen_("N", compq, <l_logical *>select.data, + &N, <float *>T.data, &N, q_ptr, &N, + <float *>wr.data, <float *>wi.data, &M, NULL, NULL, + &qwork, &lwork, &qiwork, &liwork, &info) + + assert info == 0, "Argument error in strsen" + + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float32) + liwork = qiwork + iwork = np.empty(liwork, dtype = f_lapack.l_int_dtype) + + # Now the real calculation + f_lapack.strsen_("N", compq, <l_logical *>select.data, + &N, <float *>T.data, &N, q_ptr, &N, + <float *>wr.data, <float *>wi.data, &M, NULL, NULL, + <float *>work.data, &lwork, + <int *>iwork.data, &liwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in strsen" + + if wi.nonzero()[0].size: + w = wr + 1j * wi + else: + w = wr + + return filter_args((True, Q is not None, calc_ev), (T, Q, w)) + + +def dtrsen(np.ndarray[l_logical] select, + np.ndarray[np.float64_t, ndim=2] T, + np.ndarray[np.float64_t, ndim=2] Q=None, + calc_ev=True): + cdef l_int N, M, lwork, liwork, qiwork, info + cdef char *compq + cdef double qwork + cdef double *q_ptr + cdef np.ndarray[np.float64_t] wr, wi, work + cdef np.ndarray[l_int] iwork + + assert_fortran_mat(T, Q) + + N = T.shape[0] + wr = np.empty(N, dtype = np.float64) + wi = np.empty(N, dtype = np.float64) + + if Q is not None: + compq = "V" + q_ptr = <double *>Q.data + else: + compq = "N" + q_ptr = NULL + + # workspace query + lwork = liwork = -1 + f_lapack.dtrsen_("N", compq, <l_logical *>select.data, + &N, <double *>T.data, &N, q_ptr, &N, + <double *>wr.data, <double *>wi.data, &M, NULL, NULL, + &qwork, &lwork, &qiwork, &liwork, &info) + + assert info == 0, "Argument error in dtrsen" + + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float64) + liwork = qiwork + iwork = np.empty(liwork, dtype = f_lapack.l_int_dtype) + + # Now the real calculation + f_lapack.dtrsen_("N", compq, <l_logical *>select.data, + &N, <double *>T.data, &N, q_ptr, &N, + <double *>wr.data, <double *>wi.data, &M, NULL, NULL, + <double *>work.data, &lwork, + <int *>iwork.data, &liwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in dtrsen" + + if wi.nonzero()[0].size: + w = wr + 1j * wi + else: + w = wr + + return filter_args((True, Q is not None, calc_ev), (T, Q, w)) + + +def ctrsen(np.ndarray[l_logical] select, + np.ndarray[np.complex64_t, ndim=2] T, + np.ndarray[np.complex64_t, ndim=2] Q=None, + calc_ev=True): + cdef l_int N, M, lwork, info + cdef char *compq + cdef float complex qwork + cdef float complex *q_ptr + cdef np.ndarray[np.complex64_t] w, work + + assert_fortran_mat(T, Q) + + N = T.shape[0] + w = np.empty(N, dtype = np.complex64) + + if Q is not None: + compq = "V" + q_ptr = <float complex *>Q.data + else: + compq = "N" + q_ptr = NULL + + # workspace query + lwork = -1 + f_lapack.ctrsen_("N", compq, <l_logical *>select.data, + &N, <float complex *>T.data, &N, q_ptr, &N, + <float complex *>w.data, &M, NULL, NULL, + &qwork, &lwork, &info) + + assert info == 0, "Argument error in ctrsen" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex64) + + # Now the real calculation + f_lapack.ctrsen_("N", compq, <l_logical *>select.data, + &N, <float complex *>T.data, &N, q_ptr, &N, + <float complex *>w.data, &M, NULL, NULL, + <float complex *>work.data, &lwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in ctrsen" + + return filter_args((True, Q is not None, calc_ev), (T, Q, w)) + + +def ztrsen(np.ndarray[l_logical] select, + np.ndarray[np.complex128_t, ndim=2] T, + np.ndarray[np.complex128_t, ndim=2] Q=None, + calc_ev=True): + cdef l_int N, MM, M, lwork, info + cdef char *compq + cdef double complex qwork + cdef double complex *q_ptr + cdef np.ndarray[np.complex128_t] w, work + + assert_fortran_mat(T, Q) + + N = T.shape[0] + w = np.empty(N, dtype = np.complex128) + + if Q is not None: + compq = "V" + q_ptr = <double complex *>Q.data + else: + compq = "N" + q_ptr = NULL + + # workspace query + lwork = -1 + f_lapack.ztrsen_("N", compq, <l_logical *>select.data, + &N, <double complex *>T.data, &N, q_ptr, &N, + <double complex *>w.data, &M, NULL, NULL, + &qwork, &lwork, &info) + + assert info == 0, "Argument error in ztrsen" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex128) + + # Now the real calculation + f_lapack.ztrsen_("N", compq, <l_logical *>select.data, + &N, <double complex *>T.data, &N, q_ptr, &N, + <double complex *>w.data, &M, NULL, NULL, + <double complex *>work.data, &lwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in ztrsen" + + return filter_args((True, Q is not None, calc_ev), (T, Q, w)) + + +# Helper function for xTREVC and xTGEVC +def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select): + cdef int N, M, i, m, indx N = T.shape[0] if select is None: - select = np.ones(N, dtype = logical_dtype) + select = np.ones(N, dtype = f_lapack.l_logical_dtype) selindx = select.nonzero()[0] M = selindx.size @@ -653,37 +1096,989 @@ def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select): return v -def trevc(np.ndarray[scalar, ndim=2] T, - np.ndarray[scalar, ndim=2] Q, - np.ndarray[l_logical] select, - left=False, right=True): - cdef l_int N, info, M, MM - cdef char *side - cdef char *howmny +# Wrappers for xTREVC +def strevc(np.ndarray[np.float32_t, ndim=2] T, + np.ndarray[np.float32_t, ndim=2] Q=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r + cdef float *vl_r_ptr + cdef float *vr_r_ptr + cdef np.ndarray[l_logical] select_cpy + cdef l_logical *select_ptr + cdef np.ndarray[np.float32_t] work + + assert_fortran_mat(T, Q) + + N = T.shape[0] + work = np.empty(4*N, dtype = np.float32) + + if left and right: + side = "B" + elif left: + side = "L" + elif right: + side = "R" + else: + return + + if select is not None: + howmny = "S" + MM = select.nonzero()[0].size + # Correct for possible additional storage if a single complex + # eigenvalue is selected. + # For that: Figure out the positions of the 2x2 blocks. + cmplxindx = np.diagonal(T, -1).nonzero()[0] + for i in cmplxindx: + if bool(select[i]) != bool(select[i+1]): + MM += 1 + + # Select is overwritten in strevc. + select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype, + order = 'F') + select_ptr = <l_logical *>select_cpy.data + else: + MM = N + select_ptr = NULL + if Q is not None: + howmny = "B" + else: + howmny = "A" + + if left: + if Q is not None and select is None: + vl_r = np.asfortranarray(Q.copy()) + else: + vl_r = np.empty((N, MM), dtype = np.float32, order='F') + vl_r_ptr = <float *>vl_r.data + else: + vl_r_ptr = NULL + + if right: + if Q is not None and select is None: + vr_r = np.asfortranarray(Q.copy()) + else: + vr_r = np.empty((N, MM), dtype = np.float32, order='F') + vr_r_ptr = <float *>vr_r.data + else: + vr_r_ptr = NULL + + f_lapack.strevc_(side, howmny, select_ptr, + &N, <float *>T.data, &N, + vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, + <float *>work.data, &info) + + assert info == 0, "Argument error in strevc" + assert MM == M, "Unexpected number of eigenvectors returned in strevc" + + if select is not None and Q is not None: + if left: + vl_r = np.asfortranarray(np.dot(Q, vl_r)) + if right: + vr_r = np.asfortranarray(np.dot(Q, vr_r)) + + # If there are complex eigenvalues, we need to postprocess the + # eigenvectors. + if np.diagonal(T, -1).nonzero()[0].size: + if left: + vl = txevc_postprocess(np.complex64, T, vl_r, select) + if right: + vr = txevc_postprocess(np.complex64, T, vr_r, select) + else: + if left: + vl = vl_r + if right: + vr = vr_r + + if left and right: + return (vl, vr) + elif left: + return vl + else: + return vr + + +def dtrevc(np.ndarray[np.float64_t, ndim=2] T, + np.ndarray[np.float64_t, ndim=2] Q=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r + cdef double *vl_r_ptr + cdef double *vr_r_ptr + cdef np.ndarray[l_logical] select_cpy + cdef l_logical *select_ptr + cdef np.ndarray[np.float64_t] work + + assert_fortran_mat(T, Q) + + N = T.shape[0] + work = np.empty(4*N, dtype = np.float64) + + if left and right: + side = "B" + elif left: + side = "L" + elif right: + side = "R" + else: + return + + if select is not None: + howmny = "S" + MM = select.nonzero()[0].size + # Correct for possible additional storage if a single complex + # eigenvalue is selected. + # For that: Figure out the positions of the 2x2 blocks. + cmplxindx = np.diagonal(T, -1).nonzero()[0] + for i in cmplxindx: + if bool(select[i]) != bool(select[i+1]): + MM += 1 + + # Select is overwritten in dtrevc. + select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype, + order = 'F') + select_ptr = <l_logical *>select_cpy.data + else: + MM = N + select_ptr = NULL + if Q is not None: + howmny = "B" + else: + howmny = "A" + + if left: + if Q is not None and select is None: + vl_r = np.asfortranarray(Q.copy()) + else: + vl_r = np.empty((N, MM), dtype = np.float64, order='F') + vl_r_ptr = <double *>vl_r.data + else: + vl_r_ptr = NULL + + if right: + if Q is not None and select is None: + vr_r = np.asfortranarray(Q.copy()) + else: + vr_r = np.empty((N, MM), dtype = np.float64, order='F') + vr_r_ptr = <double *>vr_r.data + else: + vr_r_ptr = NULL + + f_lapack.dtrevc_(side, howmny, select_ptr, + &N, <double *>T.data, &N, + vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, + <double *>work.data, &info) + + assert info == 0, "Argument error in dtrevc" + assert MM == M, "Unexpected number of eigenvectors returned in dtrevc" + + if select is not None and Q is not None: + if left: + vl_r = np.asfortranarray(np.dot(Q, vl_r)) + if right: + vr_r = np.asfortranarray(np.dot(Q, vr_r)) + + # If there are complex eigenvalues, we need to postprocess the eigenvectors + if np.diagonal(T, -1).nonzero()[0].size: + if left: + vl = txevc_postprocess(np.complex128, T, vl_r, select) + if right: + vr = txevc_postprocess(np.complex128, T, vr_r, select) + else: + if left: + vl = vl_r + if right: + vr = vr_r + + if left and right: + return (vl, vr) + elif left: + return vl + else: + return vr + + +def ctrevc(np.ndarray[np.complex64_t, ndim=2] T, + np.ndarray[np.complex64_t, ndim=2] Q=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.complex64_t, ndim=2] vl, vr + cdef float complex *vl_ptr + cdef float complex *vr_ptr + cdef l_logical *select_ptr + cdef np.ndarray[np.complex64_t] work + cdef np.ndarray[np.float32_t] rwork + + assert_fortran_mat(T, Q) + + N = T.shape[0] + work = np.empty(2*N, dtype = np.complex64) + rwork = np.empty(N, dtype = np.float32) + + if left and right: + side = "B" + elif left: + side = "L" + elif right: + side = "R" + else: + return + + if select is not None: + howmny = "S" + MM = select.nonzero()[0].size + select_ptr = <l_logical *>select.data + else: + MM = N + select_ptr = NULL + if Q is not None: + howmny = "B" + else: + howmny = "A" + + if left: + if Q is not None and select is None: + vl = np.asfortranarray(Q.copy()) + else: + vl = np.empty((N, MM), dtype = np.complex64, order='F') + vl_ptr = <float complex *>vl.data + else: + vl_ptr = NULL + + if right: + if Q is not None and select is None: + vr = np.asfortranarray(Q.copy()) + else: + vr = np.empty((N, MM), dtype = np.complex64, order='F') + vr_ptr = <float complex *>vr.data + else: + vr_ptr = NULL + + f_lapack.ctrevc_(side, howmny, select_ptr, + &N, <float complex *>T.data, &N, + vl_ptr, &N, vr_ptr, &N, &MM, &M, + <float complex *>work.data, <float *>rwork.data, &info) + + assert info == 0, "Argument error in ctrevc" + assert MM == M, "Unexpected number of eigenvectors returned in ctrevc" + + if select is not None and Q is not None: + if left: + vl = np.asfortranarray(np.dot(Q, vl)) + if right: + vr = np.asfortranarray(np.dot(Q, vr)) + + if left and right: + return (vl, vr) + elif left: + return vl + else: + return vr + + +def ztrevc(np.ndarray[np.complex128_t, ndim=2] T, + np.ndarray[np.complex128_t, ndim=2] Q=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.complex128_t, ndim=2] vl, vr + cdef double complex *vl_ptr + cdef double complex *vr_ptr + cdef l_logical *select_ptr + cdef np.ndarray[np.complex128_t] work + cdef np.ndarray[np.float64_t] rwork + + assert_fortran_mat(T, Q) + + N = T.shape[0] + work = np.empty(2*N, dtype = np.complex128) + rwork = np.empty(N, dtype = np.float64) + + if left and right: + side = "B" + elif left: + side = "L" + elif right: + side = "R" + else: + return + + if select is not None: + howmny = "S" + MM = select.nonzero()[0].size + select_ptr = <l_logical *>select.data + else: + MM = N + select_ptr = NULL + if Q is not None: + howmny = "B" + else: + howmny = "A" + + if left: + if Q is not None and select is None: + vl = np.asfortranarray(Q.copy()) + else: + vl = np.empty((N, MM), dtype = np.complex128, order='F') + vl_ptr = <double complex *>vl.data + else: + vl_ptr = NULL + + if right: + if Q is not None and select is None: + vr = np.asfortranarray(Q.copy()) + else: + vr = np.empty((N, MM), dtype = np.complex128, order='F') + vr_ptr = <double complex *>vr.data + else: + vr_ptr = NULL + + f_lapack.ztrevc_(side, howmny, select_ptr, + &N, <double complex *>T.data, &N, + vl_ptr, &N, vr_ptr, &N, &MM, &M, + <double complex *>work.data, <double *>rwork.data, &info) + + assert info == 0, "Argument error in ztrevc" + assert MM == M, "Unexpected number of eigenvectors returned in ztrevc" + + if select is not None and Q is not None: + if left: + vl = np.asfortranarray(np.dot(Q, vl)) + if right: + vr = np.asfortranarray(np.dot(Q, vr)) + + if left and right: + return (vl, vr) + elif left: + return vl + else: + return vr + + +# wrappers for xGGES +def sgges(np.ndarray[np.float32_t, ndim=2] A, + np.ndarray[np.float32_t, ndim=2] B, + calc_q=True, calc_z=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvsl + cdef char *jobvsr + cdef float *vsl_ptr + cdef float *vsr_ptr + cdef float qwork + cdef np.ndarray[np.float32_t, ndim=2] vsl, vsr + cdef np.ndarray[np.float32_t] alphar, alphai, beta, work + + assert_fortran_mat(A, B) + + N = A.shape[0] + alphar = np.empty(N, dtype = np.float32) + alphai = np.empty(N, dtype = np.float32) + beta = np.empty(N, dtype = np.float32) + + if calc_q: + vsl = np.empty((N,N), dtype = np.float32, order='F') + vsl_ptr = <float *>vsl.data + jobvsl = "V" + else: + vsl = None + vsl_ptr = NULL + jobvsl = "N" + + if calc_z: + vsr = np.empty((N,N), dtype = np.float32, order='F') + vsr_ptr = <float *>vsr.data + jobvsr = "V" + else: + vsr = None + vsr_ptr = NULL + jobvsr = "N" + + # workspace query + lwork = -1 + f_lapack.sgges_(jobvsl, jobvsr, "N", NULL, + &N, <float *>A.data, &N, + <float *>B.data, &N, &sdim, + <float *>alphar.data, <float *>alphai.data, + <float *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + &qwork, &lwork, NULL, &info) + + assert info == 0, "Argument error in zgees" + + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float32) + + # Now the real calculation + f_lapack.sgges_(jobvsl, jobvsr, "N", NULL, + &N, <float *>A.data, &N, + <float *>B.data, &N, &sdim, + <float *>alphar.data, <float *>alphai.data, + <float *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + <float *>work.data, &lwork, NULL, &info) + + if info > 0: + raise LinAlgError("QZ iteration failed to converge in sgges") + + assert info == 0, "Argument error in zgees" + + if alphai.nonzero()[0].size: + alpha = alphar + 1j * alphai + else: + alpha = alphar + + return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev), + (A, B, vsl, vsr, alpha, beta)) + + +def dgges(np.ndarray[np.float64_t, ndim=2] A, + np.ndarray[np.float64_t, ndim=2] B, + calc_q=True, calc_z=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvsl + cdef char *jobvsr + cdef double *vsl_ptr + cdef double *vsr_ptr + cdef double qwork + cdef np.ndarray[np.float64_t, ndim=2] vsl, vsr + cdef np.ndarray[np.float64_t] alphar, alphai, beta, work + + assert_fortran_mat(A, B) + + N = A.shape[0] + alphar = np.empty(N, dtype = np.float64) + alphai = np.empty(N, dtype = np.float64) + beta = np.empty(N, dtype = np.float64) + + if calc_q: + vsl = np.empty((N,N), dtype = np.float64, order='F') + vsl_ptr = <double *>vsl.data + jobvsl = "V" + else: + vsl = None + vsl_ptr = NULL + jobvsl = "N" + + if calc_z: + vsr = np.empty((N,N), dtype = np.float64, order='F') + vsr_ptr = <double *>vsr.data + jobvsr = "V" + else: + vsr = None + vsr_ptr = NULL + jobvsr = "N" + + # workspace query + lwork = -1 + f_lapack.dgges_(jobvsl, jobvsr, "N", NULL, + &N, <double *>A.data, &N, + <double *>B.data, &N, &sdim, + <double *>alphar.data, <double *>alphai.data, + <double *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + &qwork, &lwork, NULL, &info) + + assert info == 0, "Argument error in zgees" + + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float64) + + # Now the real calculation + f_lapack.dgges_(jobvsl, jobvsr, "N", NULL, + &N, <double *>A.data, &N, + <double *>B.data, &N, &sdim, + <double *>alphar.data, <double *>alphai.data, + <double *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + <double *>work.data, &lwork, NULL, &info) + + if info > 0: + raise LinAlgError("QZ iteration failed to converge in dgges") + + assert info == 0, "Argument error in zgees" + + if alphai.nonzero()[0].size: + alpha = alphar + 1j * alphai + else: + alpha = alphar + + return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev), + (A, B, vsl, vsr, alpha, beta)) + + +def cgges(np.ndarray[np.complex64_t, ndim=2] A, + np.ndarray[np.complex64_t, ndim=2] B, + calc_q=True, calc_z=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvsl + cdef char *jobvsr + cdef float complex *vsl_ptr + cdef float complex *vsr_ptr + cdef float complex qwork + cdef np.ndarray[np.complex64_t, ndim=2] vsl, vsr + cdef np.ndarray[np.complex64_t] alpha, beta, work + cdef np.ndarray[np.float32_t] rwork + + assert_fortran_mat(A, B) + + N = A.shape[0] + alpha = np.empty(N, dtype = np.complex64) + beta = np.empty(N, dtype = np.complex64) + rwork = np.empty(8*N, dtype = np.float32) + + if calc_q: + vsl = np.empty((N,N), dtype = np.complex64, order='F') + vsl_ptr = <float complex *>vsl.data + jobvsl = "V" + else: + vsl = None + vsl_ptr = NULL + jobvsl = "N" + + if calc_z: + vsr = np.empty((N,N), dtype = np.complex64, order='F') + vsr_ptr = <float complex *>vsr.data + jobvsr = "V" + else: + vsr = None + vsr_ptr = NULL + jobvsr = "N" + + # workspace query + lwork = -1 + f_lapack.cgges_(jobvsl, jobvsr, "N", NULL, + &N, <float complex *>A.data, &N, + <float complex *>B.data, &N, &sdim, + <float complex *>alpha.data, <float complex *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + &qwork, &lwork, <float *>rwork.data, NULL, &info) + + assert info == 0, "Argument error in zgees" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex64) + + # Now the real calculation + f_lapack.cgges_(jobvsl, jobvsr, "N", NULL, + &N, <float complex *>A.data, &N, + <float complex *>B.data, &N, &sdim, + <float complex *>alpha.data, <float complex *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + <float complex *>work.data, &lwork, + <float *>rwork.data, NULL, &info) + + if info > 0: + raise LinAlgError("QZ iteration failed to converge in cgges") + + assert info == 0, "Argument error in zgees" + + return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev), + (A, B, vsl, vsr, alpha, beta)) + + +def zgges(np.ndarray[np.complex128_t, ndim=2] A, + np.ndarray[np.complex128_t, ndim=2] B, + calc_q=True, calc_z=True, calc_ev=True): + cdef l_int N, lwork, sdim, info + cdef char *jobvsl + cdef char *jobvsr + cdef double complex *vsl_ptr + cdef double complex *vsr_ptr + cdef double complex qwork + cdef np.ndarray[np.complex128_t, ndim=2] vsl, vsr + cdef np.ndarray[np.complex128_t] alpha, beta, work + cdef np.ndarray[np.float64_t] rwork + + assert_fortran_mat(A, B) + + N = A.shape[0] + alpha = np.empty(N, dtype = np.complex128) + beta = np.empty(N, dtype = np.complex128) + rwork = np.empty(8*N, dtype = np.float64) + + if calc_q: + vsl = np.empty((N,N), dtype = np.complex128, order='F') + vsl_ptr = <double complex *>vsl.data + jobvsl = "V" + else: + vsl = None + vsl_ptr = NULL + jobvsl = "N" + + if calc_z: + vsr = np.empty((N,N), dtype = np.complex128, order='F') + vsr_ptr = <double complex *>vsr.data + jobvsr = "V" + else: + vsr = None + vsr_ptr = NULL + jobvsr = "N" + + # workspace query + lwork = -1 + f_lapack.zgges_(jobvsl, jobvsr, "N", NULL, + &N, <double complex *>A.data, &N, + <double complex *>B.data, &N, &sdim, + <double complex *>alpha.data, <double complex *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + &qwork, &lwork, <double *>rwork.data, NULL, &info) + + assert info == 0, "Argument error in zgees" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex128) + + # Now the real calculation + f_lapack.zgges_(jobvsl, jobvsr, "N", NULL, + &N, <double complex *>A.data, &N, + <double complex *>B.data, &N, &sdim, + <double complex *>alpha.data, <double complex *>beta.data, + vsl_ptr, &N, vsr_ptr, &N, + <double complex *>work.data, &lwork, + <double *>rwork.data, NULL, &info) + + if info > 0: + raise LinAlgError("QZ iteration failed to converge in zgges") + + assert info == 0, "Argument error in zgees" + + return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev), + (A, B, vsl, vsr, alpha, beta)) + + +# wrappers for xTGSEN +def stgsen(np.ndarray[l_logical] select, + np.ndarray[np.float32_t, ndim=2] S, + np.ndarray[np.float32_t, ndim=2] T, + np.ndarray[np.float32_t, ndim=2] Q=None, + np.ndarray[np.float32_t, ndim=2] Z=None, + calc_ev=True): + cdef l_int N, M, lwork, liwork, qiwork, info, ijob + cdef l_logical wantq, wantz + cdef float qwork + cdef float *q_ptr + cdef float *z_ptr + cdef np.ndarray[np.float32_t] alphar, alphai, beta, work + cdef np.ndarray[l_int] iwork + + assert_fortran_mat(S, T, Q, Z) + + N = S.shape[0] + alphar = np.empty(N, dtype = np.float32) + alphai = np.empty(N, dtype = np.float32) + beta = np.empty(N, dtype = np.float32) + ijob = 0 + + if Q is not None: + wantq = 1 + q_ptr = <float *>Q.data + else: + wantq = 0 + q_ptr = NULL + + if Z is not None: + wantz = 1 + z_ptr = <float *>Z.data + else: + wantz = 0 + z_ptr = NULL + + # workspace query + lwork = -1 + liwork = -1 + f_lapack.stgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <float *>S.data, &N, + <float *>T.data, &N, + <float *>alphar.data, <float *>alphai.data, + <float *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + &qwork, &lwork, &qiwork, &liwork, &info) + + assert info == 0, "Argument error in stgsen" + + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float32) + liwork = qiwork + iwork = np.empty(liwork, dtype = int_dtype) + + # Now the real calculation + f_lapack.stgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <float *>S.data, &N, + <float *>T.data, &N, + <float *>alphar.data, <float *>alphai.data, + <float *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + <float *>work.data, &lwork, + <l_int *>iwork.data, &liwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in stgsen" + + if alphai.nonzero()[0].size: + alpha = alphar + 1j * alphai + else: + alpha = alphar + + return filter_args((True, True, Q is not None, Z is not None, + calc_ev, calc_ev), + (S, T, Q, Z, alpha, beta)) + + +def dtgsen(np.ndarray[l_logical] select, + np.ndarray[np.float64_t, ndim=2] S, + np.ndarray[np.float64_t, ndim=2] T, + np.ndarray[np.float64_t, ndim=2] Q=None, + np.ndarray[np.float64_t, ndim=2] Z=None, + calc_ev=True): + cdef l_int N, M, lwork, liwork, qiwork, info, ijob + cdef l_logical wantq, wantz + cdef double qwork + cdef double *q_ptr + cdef double *z_ptr + cdef np.ndarray[np.float64_t] alphar, alphai, beta, work + cdef np.ndarray[l_int] iwork + + assert_fortran_mat(S, T, Q, Z) + + N = S.shape[0] + alphar = np.empty(N, dtype = np.float64) + alphai = np.empty(N, dtype = np.float64) + beta = np.empty(N, dtype = np.float64) + ijob = 0 + + if Q is not None: + wantq = 1 + q_ptr = <double *>Q.data + else: + wantq = 0 + q_ptr = NULL + + if Z is not None: + wantz = 1 + z_ptr = <double *>Z.data + else: + wantz = 0 + z_ptr = NULL + + # workspace query + lwork = -1 + liwork = -1 + f_lapack.dtgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <double *>S.data, &N, + <double *>T.data, &N, + <double *>alphar.data, <double *>alphai.data, + <double *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + &qwork, &lwork, &qiwork, &liwork, &info) + + assert info == 0, "Argument error in dtgsen" + + lwork = <int>qwork + work = np.empty(lwork, dtype = np.float64) + liwork = qiwork + iwork = np.empty(liwork, dtype = int_dtype) + + # Now the real calculation + f_lapack.dtgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <double *>S.data, &N, + <double *>T.data, &N, + <double *>alphar.data, <double *>alphai.data, + <double *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + <double *>work.data, &lwork, + <l_int *>iwork.data, &liwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in dtgsen" + + if alphai.nonzero()[0].size: + alpha = alphar + 1j * alphai + else: + alpha = alphar + + return filter_args((True, True, Q is not None, Z is not None, + calc_ev, calc_ev), + (S, T, Q, Z, alpha, beta)) + + +def ctgsen(np.ndarray[l_logical] select, + np.ndarray[np.complex64_t, ndim=2] S, + np.ndarray[np.complex64_t, ndim=2] T, + np.ndarray[np.complex64_t, ndim=2] Q=None, + np.ndarray[np.complex64_t, ndim=2] Z=None, + calc_ev=True): + cdef l_int N, M, lwork, liwork, qiwork, info, ijob + cdef l_logical wantq, wantz + cdef float complex qwork + cdef float complex *q_ptr + cdef float complex *z_ptr + cdef np.ndarray[np.complex64_t] alpha, beta, work + cdef np.ndarray[l_int] iwork + + assert_fortran_mat(S, T, Q, Z) + + N = S.shape[0] + alpha = np.empty(N, dtype = np.complex64) + beta = np.empty(N, dtype = np.complex64) + ijob = 0 + + if Q is not None: + wantq = 1 + q_ptr = <float complex *>Q.data + else: + wantq = 0 + q_ptr = NULL + + if Z is not None: + wantz = 1 + z_ptr = <float complex *>Z.data + else: + wantz = 0 + z_ptr = NULL + + # workspace query + lwork = -1 + liwork = -1 + f_lapack.ctgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <float complex *>S.data, &N, + <float complex *>T.data, &N, + <float complex *>alpha.data, <float complex *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + &qwork, &lwork, &qiwork, &liwork, &info) + + assert info == 0, "Argument error in ctgsen" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex64) + liwork = qiwork + iwork = np.empty(liwork, dtype = int_dtype) + + # Now the real calculation + f_lapack.ctgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <float complex *>S.data, &N, + <float complex *>T.data, &N, + <float complex *>alpha.data, <float complex *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + <float complex *>work.data, &lwork, + <l_int *>iwork.data, &liwork, &info) + + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") + + assert info == 0, "Argument error in ctgsen" + + return filter_args((True, True, Q is not None, Z is not None, + calc_ev, calc_ev), + (S, T, Q, Z, alpha, beta)) + + +def ztgsen(np.ndarray[l_logical] select, + np.ndarray[np.complex128_t, ndim=2] S, + np.ndarray[np.complex128_t, ndim=2] T, + np.ndarray[np.complex128_t, ndim=2] Q=None, + np.ndarray[np.complex128_t, ndim=2] Z=None, + calc_ev=True): + cdef l_int N, M, lwork, liwork, qiwork, info, ijob + cdef l_logical wantq, wantz + cdef double complex qwork + cdef double complex *q_ptr + cdef double complex *z_ptr + cdef np.ndarray[np.complex128_t] alpha, beta, work + cdef np.ndarray[l_int] iwork + + assert_fortran_mat(S, T, Q, Z) + + N = S.shape[0] + alpha = np.empty(N, dtype = np.complex128) + beta = np.empty(N, dtype = np.complex128) + ijob = 0 + + if Q is not None: + wantq = 1 + q_ptr = <double complex *>Q.data + else: + wantq = 0 + q_ptr = NULL + + if Z is not None: + wantz = 1 + z_ptr = <double complex *>Z.data + else: + wantz = 0 + z_ptr = NULL + + # workspace query + lwork = -1 + liwork = -1 + f_lapack.ztgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <double complex *>S.data, &N, + <double complex *>T.data, &N, + <double complex *>alpha.data, <double complex *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + &qwork, &lwork, &qiwork, &liwork, &info) + + assert info == 0, "Argument error in ztgsen" + + lwork = <int>qwork.real + work = np.empty(lwork, dtype = np.complex128) + liwork = qiwork + iwork = np.empty(liwork, dtype = int_dtype) + + # Now the real calculation + f_lapack.ztgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data, + &N, <double complex *>S.data, &N, + <double complex *>T.data, &N, + <double complex *>alpha.data, <double complex *>beta.data, + q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, + <double complex *>work.data, &lwork, + <l_int *>iwork.data, &liwork, &info) - # Parameter checks + if info > 0: + raise LinAlgError("Reordering failed; problem is very ill-conditioned") - if (T.shape[0] != T.shape[1] or Q.shape[0] != Q.shape[1] - or T.shape[0] != Q.shape[0]): - raise ValueError("Invalid Schur decomposition as input") + assert info == 0, "Argument error in ztgsen" - assert_fortran_mat(T, Q) + return filter_args((True, True, Q is not None, Z is not None, + calc_ev, calc_ev), + (S, T, Q, Z, alpha, beta)) - # Workspace allocation - N = T.shape[0] +# xTGEVC +def stgevc(np.ndarray[np.float32_t, ndim=2] S, + np.ndarray[np.float32_t, ndim=2] T, + np.ndarray[np.float32_t, ndim=2] Q=None, + np.ndarray[np.float32_t, ndim=2] Z=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r + cdef float *vl_r_ptr + cdef float *vr_r_ptr + cdef np.ndarray[l_logical] select_cpy + cdef l_logical *select_ptr + cdef np.ndarray[np.float32_t] work - cdef np.ndarray[scalar] work - if scalar in floating: - work = np.empty(4 * N, dtype=T.dtype) - else: - work = np.empty(2 * N, dtype=T.dtype) + assert_fortran_mat(S, T, Q, Z) - cdef np.ndarray rwork = None - if scalar is float_complex: - rwork = np.empty(N, dtype=np.float32) - elif scalar is double_complex: - rwork = np.empty(N, dtype=np.float64) + N = S.shape[0] + work = np.empty(6*N, dtype = np.float32) if left and right: side = "B" @@ -694,102 +2089,78 @@ def trevc(np.ndarray[scalar, ndim=2] T, else: return - cdef np.ndarray[l_logical] select_cpy - cdef l_logical *select_ptr + backtr = False + if select is not None: howmny = "S" MM = select.nonzero()[0].size # Correct for possible additional storage if a single complex # eigenvalue is selected. # For that: Figure out the positions of the 2x2 blocks. - cmplxindx = np.diagonal(T, -1).nonzero()[0] + cmplxindx = np.diagonal(S, -1).nonzero()[0] for i in cmplxindx: if bool(select[i]) != bool(select[i+1]): MM += 1 - # Select is overwritten in strevc. - select_cpy = np.array(select, dtype = logical_dtype, + # select is overwritten in stgevc + select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype, order = 'F') select_ptr = <l_logical *>select_cpy.data else: MM = N select_ptr = NULL - if Q is not None: + if ((left and right and Q is not None and Z is not None) or + (left and not right and Q is not None) or + (right and not left and Z is not None)): howmny = "B" + backtr = True else: howmny = "A" - cdef np.ndarray[scalar, ndim=2] vl_r = None - cdef scalar *vl_r_ptr if left: - if Q is not None and select is None: - vl_r = np.asfortranarray(Q.copy()) + if backtr: + vl_r = Q else: - vl_r = np.empty((N, MM), dtype=T.dtype, order='F') - vl_r_ptr = <scalar *>vl_r.data + vl_r = np.empty((N, MM), dtype = np.float32, order='F') + vl_r_ptr = <float *>vl_r.data else: vl_r_ptr = NULL - cdef np.ndarray[scalar, ndim=2] vr_r = None - cdef scalar *vr_r_ptr if right: - if Q is not None and select is None: - vr_r = np.asfortranarray(Q.copy()) + if backtr: + vr_r = Z else: - vr_r = np.empty((N, MM), dtype=T.dtype, order='F') - vr_r_ptr = <scalar *>vr_r.data + vr_r = np.empty((N, MM), dtype = np.float32, order='F') + vr_r_ptr = <float *>vr_r.data else: vr_r_ptr = NULL - # The actual calculation - - if scalar is float: - lapack.strevc(side, howmny, select_ptr, - &N, <float *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <float *>work.data, &info) - elif scalar is double: - lapack.dtrevc(side, howmny, select_ptr, - &N, <double *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <double *>work.data, &info) - elif scalar is float_complex: - lapack.ctrevc(side, howmny, select_ptr, - &N, <float complex *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <float complex *>work.data, <float *>rwork.data, &info) - elif scalar is double_complex: - lapack.ztrevc(side, howmny, select_ptr, - &N, <double complex *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <double complex *>work.data, <double *>rwork.data, &info) - - assert info == 0, "Argument error in trevc" - assert MM == M, "Unexpected number of eigenvectors returned in strevc" + f_lapack.stgevc_(side, howmny, select_ptr, + &N, <float *>S.data, &N, + <float *>T.data, &N, + vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, + <float *>work.data, &info) - if select is not None and Q is not None: + assert info == 0, "Argument error in stgevc" + assert MM == M, "Unexpected number of eigenvectors returned in stgevc" + + if not backtr: if left: vl_r = np.asfortranarray(np.dot(Q, vl_r)) if right: - vr_r = np.asfortranarray(np.dot(Q, vr_r)) + vr_r = np.asfortranarray(np.dot(Z, vr_r)) - cdef np.ndarray vl, vr - if left: - vl = vl_r - if right: - vr = vr_r - if scalar in floating: - # If there are complex eigenvalues, we need to postprocess the - # eigenvectors. - if scalar is float: - dtype = np.complex64 - else: - dtype = np.complex128 - if np.diagonal(T, -1).nonzero()[0].size: - if left: - vl = txevc_postprocess(dtype, T, vl_r, select) - if right: - vr = txevc_postprocess(dtype, T, vr_r, select) + # If there are complex eigenvalues, we need to postprocess the eigenvectors + if np.diagonal(S, -1).nonzero()[0].size: + if left: + vl = txevc_postprocess(np.complex64, S, vl_r, select) + if right: + vr = txevc_postprocess(np.complex64, S, vr_r, select) + else: + if left: + vl = vl_r + if right: + vr = vr_r if left and right: return (vl, vr) @@ -799,340 +2170,229 @@ def trevc(np.ndarray[scalar, ndim=2] T, return vr -def gges(np.ndarray[scalar, ndim=2] A, - np.ndarray[scalar, ndim=2] B, - calc_q=True, calc_z=True, calc_ev=True): - cdef l_int N, sdim, info - - # Check parameters - - assert_fortran_mat(A, B) - - if A.shape[0] != B.shape[1]: - raise ValueError("Expect square matrix A") - - if A.shape[0] != B.shape[0] or A.shape[0] != B.shape[1]: - raise ValueError("Shape of B is incompatible with matrix A") +def dtgevc(np.ndarray[np.float64_t, ndim=2] S, + np.ndarray[np.float64_t, ndim=2] T, + np.ndarray[np.float64_t, ndim=2] Q=None, + np.ndarray[np.float64_t, ndim=2] Z=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r + cdef double *vl_r_ptr + cdef double *vr_r_ptr + cdef np.ndarray[l_logical] select_cpy + cdef l_logical *select_ptr + cdef np.ndarray[np.float64_t] work - # Allocate workspaces + assert_fortran_mat(S, T, Q, Z) - N = A.shape[0] + N = S.shape[0] + work = np.empty(6*N, dtype = np.float64) - cdef np.ndarray[scalar] alphar, alphai - if scalar in cmplx: - alphar = np.empty(N, dtype=A.dtype) - alphai = None + if left and right: + side = "B" + elif left: + side = "L" + elif right: + side = "R" else: - alphar = np.empty(N, dtype=A.dtype) - alphai = np.empty(N, dtype=A.dtype) + return - cdef np.ndarray[scalar] beta = np.empty(N, dtype=A.dtype) + backtr = False - cdef np.ndarray rwork = None - if scalar is float_complex: - rwork = np.empty(8 * N, dtype=np.float32) - elif scalar is double_complex: - rwork = np.empty(8 * N, dtype=np.float64) + if select is not None: + howmny = "S" + MM = select.nonzero()[0].size + # Correct for possible additional storage if a single complex + # eigenvalue is selected. + # For that: Figure out the positions of the 2x2 blocks. + cmplxindx = np.diagonal(S, -1).nonzero()[0] + for i in cmplxindx: + if bool(select[i]) != bool(select[i+1]): + MM += 1 - cdef char *jobvsl - cdef scalar *vsl_ptr - cdef np.ndarray[scalar, ndim=2] vsl - if calc_q: - vsl = np.empty((N,N), dtype=A.dtype, order='F') - vsl_ptr = <scalar *>vsl.data - jobvsl = "V" + # select is overwritten in dtgevc + select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype, + order = 'F') + select_ptr = <l_logical *>select_cpy.data else: - vsl = None - vsl_ptr = NULL - jobvsl = "N" + MM = N + select_ptr = NULL + if ((left and right and Q is not None and Z is not None) or + (left and not right and Q is not None) or + (right and not left and Z is not None)): + howmny = "B" + backtr = True + else: + howmny = "A" - cdef char *jobvsr - cdef scalar *vsr_ptr - cdef np.ndarray[scalar, ndim=2] vsr - if calc_z: - vsr = np.empty((N,N), dtype=A.dtype, order='F') - vsr_ptr = <scalar *>vsr.data - jobvsr = "V" + if left: + if backtr: + vl_r = Q + else: + vl_r = np.empty((N, MM), dtype = np.float64, order='F') + vl_r_ptr = <double *>vl_r.data else: - vsr = None - vsr_ptr = NULL - jobvsr = "N" - - # Workspace query - # Xgges expects &qwork as a <scalar *> (even though it's an integer) - cdef l_int lwork = -1 - cdef scalar qwork - - if scalar is float: - lapack.sgges(jobvsl, jobvsr, "N", NULL, - &N, <float *>A.data, &N, - <float *>B.data, &N, &sdim, - <float *>alphar.data, <float *>alphai.data, - <float *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - &qwork, &lwork, NULL, &info) - elif scalar is double: - lapack.dgges(jobvsl, jobvsr, "N", NULL, - &N, <double *>A.data, &N, - <double *>B.data, &N, &sdim, - <double *>alphar.data, <double *>alphai.data, - <double *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - &qwork, &lwork, NULL, &info) - elif scalar is float_complex: - lapack.cgges(jobvsl, jobvsr, "N", NULL, - &N, <float complex *>A.data, &N, - <float complex *>B.data, &N, &sdim, - <float complex *>alphar.data, <float complex *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - &qwork, &lwork, <float *>rwork.data, NULL, &info) - elif scalar is double_complex: - lapack.zgges(jobvsl, jobvsr, "N", NULL, - &N, <double complex *>A.data, &N, - <double complex *>B.data, &N, &sdim, - <double complex *>alphar.data, <double complex *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - &qwork, &lwork, <double *>rwork.data, NULL, &info) - - assert info == 0, "Argument error in gges" - - lwork = lwork_from_qwork(qwork) - cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype) - - # The actual calculation - - if scalar is float: - lapack.sgges(jobvsl, jobvsr, "N", NULL, - &N, <float *>A.data, &N, - <float *>B.data, &N, &sdim, - <float *>alphar.data, <float *>alphai.data, - <float *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - <float *>work.data, &lwork, NULL, &info) - elif scalar is double: - lapack.dgges(jobvsl, jobvsr, "N", NULL, - &N, <double *>A.data, &N, - <double *>B.data, &N, &sdim, - <double *>alphar.data, <double *>alphai.data, - <double *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - <double *>work.data, &lwork, NULL, &info) - elif scalar is float_complex: - lapack.cgges(jobvsl, jobvsr, "N", NULL, - &N, <float complex *>A.data, &N, - <float complex *>B.data, &N, &sdim, - <float complex *>alphar.data, <float complex *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - <float complex *>work.data, &lwork, - <float *>rwork.data, NULL, &info) - elif scalar is double_complex: - lapack.zgges(jobvsl, jobvsr, "N", NULL, - &N, <double complex *>A.data, &N, - <double complex *>B.data, &N, &sdim, - <double complex *>alphar.data, <double complex *>beta.data, - vsl_ptr, &N, vsr_ptr, &N, - <double complex *>work.data, &lwork, - <double *>rwork.data, NULL, &info) + vl_r_ptr = NULL - if info > 0: - raise LinAlgError("QZ iteration failed to converge in gges") + if right: + if backtr: + vr_r = Z + else: + vr_r = np.empty((N, MM), dtype = np.float64, order='F') + vr_r_ptr = <double *>vr_r.data + else: + vr_r_ptr = NULL - assert info == 0, "Argument error in gges" + f_lapack.dtgevc_(side, howmny, select_ptr, + &N, <double *>S.data, &N, + <double *>T.data, &N, + vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, + <double *>work.data, &info) - # Real inputs possibly produce complex output - cdef np.ndarray alpha = maybe_complex[scalar](0, alphar, alphai) + assert info == 0, "Argument error in dtgevc" + assert MM == M, "Unexpected number of eigenvectors returned in dtgevc" - return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev), - (A, B, vsl, vsr, alpha, beta)) + if not backtr: + if left: + vl_r = np.asfortranarray(np.dot(Q, vl_r)) + if right: + vr_r = np.asfortranarray(np.dot(Z, vr_r)) + # If there are complex eigenvalues, we need to postprocess the + # eigenvectors. + if np.diagonal(S, -1).nonzero()[0].size: + if left: + vl = txevc_postprocess(np.complex128, S, vl_r, select) + if right: + vr = txevc_postprocess(np.complex128, S, vr_r, select) + else: + if left: + vl = vl_r + if right: + vr = vr_r -def tgsen(np.ndarray[l_logical] select, - np.ndarray[scalar, ndim=2] S, - np.ndarray[scalar, ndim=2] T, - np.ndarray[scalar, ndim=2] Q, - np.ndarray[scalar, ndim=2] Z, - calc_ev=True): - cdef l_int ijob = 0 - cdef l_int N, M, lwork, liwork, info + if left and right: + return (vl, vr) + elif left: + return vl + else: + return vr - # Check parameters - if ((S.shape[0] != S.shape[1] or T.shape[0] != T.shape[1] or - S.shape[0] != T.shape[0]) or - (Q is not None and (Q.shape[0] != Q.shape[1] or - S.shape[0] != Q.shape[0])) or - (Z is not None and (Z.shape[0] != Z.shape[1] or - S.shape[0] != Z.shape[0]))): - raise ValueError("Invalid Schur decomposition as input") +def ctgevc(np.ndarray[np.complex64_t, ndim=2] S, + np.ndarray[np.complex64_t, ndim=2] T, + np.ndarray[np.complex64_t, ndim=2] Q=None, + np.ndarray[np.complex64_t, ndim=2] Z=None, + np.ndarray[l_logical] select=None, + left=False, right=True): + cdef l_int N, info, M, MM + cdef char *side + cdef char *howmny + cdef np.ndarray[np.complex64_t, ndim=2] vl, vr + cdef float complex *vl_ptr + cdef float complex *vr_ptr + cdef l_logical *select_ptr + cdef np.ndarray[np.complex64_t] work + cdef np.ndarray[np.float32_t] rwork assert_fortran_mat(S, T, Q, Z) - # Allocate workspaces - N = S.shape[0] + work = np.empty(2*N, dtype = np.complex64) + rwork = np.empty(2*N, dtype = np.float32) - cdef np.ndarray[scalar] alphar, alphai - if scalar in cmplx: - alphar = np.empty(N, dtype=S.dtype) - alphai = None + if left and right: + side = "B" + elif left: + side = "L" + elif right: + side = "R" else: - alphar = np.empty(N, dtype=S.dtype) - alphai = np.empty(N, dtype=S.dtype) + return - cdef np.ndarray[scalar] beta - beta = np.empty(N, dtype=S.dtype) + backtr = False - cdef l_logical wantq - cdef scalar *q_ptr - if Q is not None: - wantq = 1 - q_ptr = <scalar *>Q.data + if select is not None: + howmny = "S" + MM = select.nonzero()[0].size + select_ptr = <l_logical *>select.data else: - wantq = 0 - q_ptr = NULL + MM = N + select_ptr = NULL + if ((left and right and Q is not None and Z is not None) or + (left and not right and Q is not None) or + (right and not left and Z is not None)): + howmny = "B" + backtr = True + else: + howmny = "A" - cdef l_logical wantz - cdef scalar *z_ptr - if Z is not None: - wantz = 1 - z_ptr = <scalar *>Z.data + if left: + if backtr: + vl = Q + else: + vl = np.empty((N, MM), dtype = np.complex64, order='F') + vl_ptr = <float complex *>vl.data else: - wantz = 0 - z_ptr = NULL - - # Workspace query - # Xtgsen expects &qwork as a <scalar *> (even though it's an integer) - lwork = -1 - liwork = -1 - cdef scalar qwork - cdef l_int qiwork - - if scalar is float: - lapack.stgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <float *>S.data, &N, - <float *>T.data, &N, - <float *>alphar.data, <float *>alphai.data, - <float *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - &qwork, &lwork, &qiwork, &liwork, &info) - elif scalar is double: - lapack.dtgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <double *>S.data, &N, - <double *>T.data, &N, - <double *>alphar.data, <double *>alphai.data, - <double *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - &qwork, &lwork, &qiwork, &liwork, &info) - elif scalar is float_complex: - lapack.ctgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <float complex *>S.data, &N, - <float complex *>T.data, &N, - <float complex *>alphar.data, <float complex *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - &qwork, &lwork, &qiwork, &liwork, &info) - elif scalar is double_complex: - lapack.ztgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <double complex *>S.data, &N, - <double complex *>T.data, &N, - <double complex *>alphar.data, <double complex *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - &qwork, &lwork, &qiwork, &liwork, &info) - - assert info == 0, "Argument error in tgsen" - - lwork = lwork_from_qwork(qwork) - cdef np.ndarray[scalar] work = np.empty(lwork, dtype=S.dtype) + vl_ptr = NULL - liwork = qiwork - cdef np.ndarray[l_int] iwork = np.empty(liwork, dtype=int_dtype) - - # The actual calculation - - if scalar is float: - lapack.stgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <float *>S.data, &N, - <float *>T.data, &N, - <float *>alphar.data, <float *>alphai.data, - <float *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - <float *>work.data, &lwork, - <l_int *>iwork.data, &liwork, &info) - elif scalar is double: - lapack.dtgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <double *>S.data, &N, - <double *>T.data, &N, - <double *>alphar.data, <double *>alphai.data, - <double *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - <double *>work.data, &lwork, - <l_int *>iwork.data, &liwork, &info) - elif scalar is float_complex: - lapack.ctgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <float complex *>S.data, &N, - <float complex *>T.data, &N, - <float complex *>alphar.data, <float complex *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - <float complex *>work.data, &lwork, - <l_int *>iwork.data, &liwork, &info) - elif scalar is double_complex: - lapack.ztgsen(&ijob, &wantq, &wantz, <l_logical *>select.data, - &N, <double complex *>S.data, &N, - <double complex *>T.data, &N, - <double complex *>alphar.data, <double complex *>beta.data, - q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL, - <double complex *>work.data, &lwork, - <l_int *>iwork.data, &liwork, &info) + if right: + if backtr: + vr = Z + else: + vr = np.empty((N, MM), dtype = np.complex64, order='F') + vr_ptr = <float complex *>vr.data + else: + vr_ptr = NULL - if info > 0: - raise LinAlgError("Reordering failed; problem is very ill-conditioned") + f_lapack.ctgevc_(side, howmny, select_ptr, + &N, <float complex *>S.data, &N, + <float complex *>T.data, &N, + vl_ptr, &N, vr_ptr, &N, &MM, &M, + <float complex *>work.data, <float *>rwork.data, &info) - assert info == 0, "Argument error in tgsen" + assert info == 0, "Argument error in ctgevc" + assert MM == M, "Unexpected number of eigenvectors returned in ctgevc" - # Real inputs possibly produce complex output - cdef np.ndarray alpha = maybe_complex[scalar](0, alphar, alphai) + if not backtr: + if left: + vl = np.asfortranarray(np.dot(Q, vl)) + if right: + vr = np.asfortranarray(np.dot(Z, vr)) - return filter_args((True, True, Q is not None, Z is not None, - calc_ev, calc_ev), - (S, T, Q, Z, alpha, beta)) + if left and right: + return (vl, vr) + elif left: + return vl + else: + return vr -def tgevc(np.ndarray[scalar, ndim=2] S, - np.ndarray[scalar, ndim=2] T, - np.ndarray[scalar, ndim=2] Q, - np.ndarray[scalar, ndim=2] Z, - np.ndarray[l_logical] select, - left=False, right=True): +def ztgevc(np.ndarray[np.complex128_t, ndim=2] S, + np.ndarray[np.complex128_t, ndim=2] T, + np.ndarray[np.complex128_t, ndim=2] Q=None, + np.ndarray[np.complex128_t, ndim=2] Z=None, + np.ndarray[l_logical] select=None, + left=False, right=True): cdef l_int N, info, M, MM - - # Check parameters - - if ((S.shape[0] != S.shape[1] or T.shape[0] != T.shape[1] or - S.shape[0] != T.shape[0]) or - (Q is not None and (Q.shape[0] != Q.shape[1] or - S.shape[0] != Q.shape[0])) or - (Z is not None and (Z.shape[0] != Z.shape[1] or - S.shape[0] != Z.shape[0]))): - raise ValueError("Invalid Schur decomposition as input") + cdef char *side + cdef char *howmny + cdef np.ndarray[np.complex128_t, ndim=2] vl, vr + cdef double complex *vl_ptr + cdef double complex *vr_ptr + cdef l_logical *select_ptr + cdef np.ndarray[np.complex128_t] work + cdef np.ndarray[np.float64_t] rwork assert_fortran_mat(S, T, Q, Z) - # Allocate workspaces - N = S.shape[0] + work = np.empty(2*N, dtype = np.complex128) + rwork = np.empty(2*N, dtype = np.float64) - cdef np.ndarray[scalar] work - if scalar in floating: - work = np.empty(6 * N, dtype=S.dtype) - else: - work = np.empty(2 * N, dtype=S.dtype) - - cdef np.ndarray rwork = None - if scalar is float_complex: - rwork = np.empty(2 * N, dtype=np.float32) - elif scalar is double_complex: - rwork = np.empty(2 * N, dtype=np.float64) - - cdef char *side if left and right: side = "B" elif left: @@ -1142,26 +2402,12 @@ def tgevc(np.ndarray[scalar, ndim=2] S, else: return - cdef l_logical backtr = False + backtr = False - cdef char *howmny - cdef np.ndarray[l_logical] select_cpy = None - cdef l_logical *select_ptr if select is not None: howmny = "S" MM = select.nonzero()[0].size - # Correct for possible additional storage if a single complex - # eigenvalue is selected. - # For that: Figure out the positions of the 2x2 blocks. - cmplxindx = np.diagonal(S, -1).nonzero()[0] - for i in cmplxindx: - if bool(select[i]) != bool(select[i+1]): - MM += 1 - - # select is overwritten in tgevc - select_cpy = np.array(select, dtype=logical_dtype, - order = 'F') - select_ptr = <l_logical *>select_cpy.data + select_ptr = <l_logical *>select.data else: MM = N select_ptr = NULL @@ -1173,78 +2419,38 @@ def tgevc(np.ndarray[scalar, ndim=2] S, else: howmny = "A" - cdef np.ndarray[scalar, ndim=2] vl_r - cdef scalar *vl_r_ptr if left: if backtr: - vl_r = Q + vl = Q else: - vl_r = np.empty((N, MM), dtype=S.dtype, order='F') - vl_r_ptr = <scalar *>vl_r.data + vl = np.empty((N, MM), dtype = np.complex128, order='F') + vl_ptr = <double complex *>vl.data else: - vl_r_ptr = NULL + vl_ptr = NULL - cdef np.ndarray[scalar, ndim=2] vr_r - cdef scalar *vr_r_ptr if right: if backtr: - vr_r = Z + vr = Z else: - vr_r = np.empty((N, MM), dtype=S.dtype, order='F') - vr_r_ptr = <scalar *>vr_r.data + vr = np.empty((N, MM), dtype = np.complex128, order='F') + vr_ptr = <double complex *>vr.data else: - vr_r_ptr = NULL + vr_ptr = NULL - if scalar is float: - lapack.stgevc(side, howmny, select_ptr, - &N, <float *>S.data, &N, - <float *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <float *>work.data, &info) - elif scalar is double: - lapack.dtgevc(side, howmny, select_ptr, - &N, <double *>S.data, &N, - <double *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <double *>work.data, &info) - elif scalar is float_complex: - lapack.ctgevc(side, howmny, select_ptr, - &N, <float complex *>S.data, &N, - <float complex *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <float complex *>work.data, <float *>rwork.data, &info) - elif scalar is double_complex: - lapack.ztgevc(side, howmny, select_ptr, - &N, <double complex *>S.data, &N, - <double complex *>T.data, &N, - vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M, - <double complex *>work.data, <double *>rwork.data, &info) - - assert info == 0, "Argument error in tgevc" - assert MM == M, "Unexpected number of eigenvectors returned in tgevc" + f_lapack.ztgevc_(side, howmny, select_ptr, + &N, <double complex *>S.data, &N, + <double complex *>T.data, &N, + vl_ptr, &N, vr_ptr, &N, &MM, &M, + <double complex *>work.data, <double *>rwork.data, &info) + + assert info == 0, "Argument error in ztgevc" + assert MM == M, "Unexpected number of eigenvectors returned in ztgevc" if not backtr: if left: - vl_r = np.asfortranarray(np.dot(Q, vl_r)) + vl = np.asfortranarray(np.dot(Q, vl)) if right: - vr_r = np.asfortranarray(np.dot(Z, vr_r)) - - # If there are complex eigenvalues, we need to postprocess the eigenvectors - cdef np.ndarray vl, vr - if left: - vl = vl_r - if right: - vr = vr_r - if scalar in floating: - if scalar is float: - dtype = np.complex64 - else: - dtype = np.complex128 - if np.diagonal(S, -1).nonzero()[0].size: - if left: - vl = txevc_postprocess(dtype, S, vl_r, select) - if right: - vr = txevc_postprocess(dtype, S, vr_r, select) + vr = np.asfortranarray(np.dot(Z, vr)) if left and right: return (vl, vr) @@ -1274,7 +2480,8 @@ def prepare_for_lapack(overwrite, *args): If an argument is ``None``, it is just passed through and not used to determine the proper LAPACK type. - Returns a list of properly converted arrays. + `prepare_for_lapack` returns a character indicating the proper LAPACK data + type ('s', 'd', 'c', 'z') and a list of properly converted arrays. """ # Make sure we have NumPy arrays @@ -1295,10 +2502,18 @@ def prepare_for_lapack(overwrite, *args): # kind. dtype = np.common_type(*[arr for arr, ovwrt in mats if arr is not None]) - if dtype not in (np.float32, np.float64, np.complex64, np.complex128): + if dtype == np.float32: + lapacktype = 's' + elif dtype == np.float64: + lapacktype = 'd' + elif dtype == np.complex64: + lapacktype = 'c' + elif dtype == np.complex128: + lapacktype = 'z' + else: raise AssertionError("Unexpected data type from common_type") - ret = [] + ret = [ lapacktype ] for npmat, ovwrt in mats: # Now make sure that the array is contiguous, and copy if necessary. if npmat is not None: @@ -1323,7 +2538,4 @@ def prepare_for_lapack(overwrite, *args): ret.append(npmat) - if len(ret) == 1: - return ret[0] - else: - return tuple(ret) + return tuple(ret) diff --git a/setup.py b/setup.py index 04d92ea7a23deb14abd64ef07aa4cc7cdb7ddfd4..197505db81f8a782506519d5a4f70687f713aa84 100755 --- a/setup.py +++ b/setup.py @@ -372,9 +372,8 @@ def search_mumps(): # Conda (via conda-forge). # TODO: remove dependency libs (scotch, metis...) when conda-forge # packaged mumps/scotch are built as properly linked shared libs - # 'openblas' provides Lapack and BLAS symbols ['zmumps', 'mumps_common', 'metis', 'esmumps', 'scotch', - 'scotcherr', 'mpiseq', 'openblas'], + 'scotcherr', 'mpiseq'], ] common_libs = ['pord', 'gfortran'] @@ -385,7 +384,34 @@ def search_mumps(): return [] +def search_lapack(): + """Return the BLAS variant that is installed.""" + lib_sets = [ + # Debian + ['blas', 'lapack'], + # Conda (via conda-forge). Openblas contains lapack symbols + ['openblas', 'gfortran'], + ] + + for libs in lib_sets: + found_libs = search_libs(libs) + if found_libs: + return found_libs + + print('Error: BLAS/LAPACK are required but were not found.', + file=sys.stderr) + sys.exit(1) + + def configure_special_extensions(exts, build_summary): + #### Special config for LAPACK. + lapack = exts['kwant.linalg.lapack'] + if 'libraries' in lapack: + build_summary.append('User-configured LAPACK and BLAS') + else: + lapack['libraries'] = search_lapack() + build_summary.append('Default LAPACK and BLAS') + #### Special config for MUMPS. mumps = exts['kwant.linalg._mumps'] if 'libraries' in mumps: @@ -400,6 +426,12 @@ def configure_special_extensions(exts, build_summary): del exts['kwant.linalg._mumps'] build_summary.append('No MUMPS support') + if mumps: + # Copy config from LAPACK. + for key, value in lapack.items(): + if key not in ['sources', 'depends']: + mumps.setdefault(key, []).extend(value) + return exts @@ -518,7 +550,8 @@ def main(): 'kwant/graph/c_slicer/partitioner.h', 'kwant/graph/c_slicer/slicer.h'])), ('kwant.linalg.lapack', - dict(sources=['kwant/linalg/lapack.pyx'])), + dict(sources=['kwant/linalg/lapack.pyx'], + depends=['kwant/linalg/f_lapack.pxd'])), ('kwant.linalg._mumps', dict(sources=['kwant/linalg/_mumps.pyx'], depends=['kwant/linalg/cmumps.pxd']))]) @@ -534,7 +567,8 @@ def main(): for ext in exts.values(): ext.setdefault('include_dirs', []).append(numpy_include) - aliases = [('mumps', 'kwant.linalg._mumps')] + aliases = [('lapack', 'kwant.linalg.lapack'), + ('mumps', 'kwant.linalg._mumps')] init_cython()