From 3ef0b9fce612f563a58497e011dac74c61f145dc Mon Sep 17 00:00:00 2001
From: Joseph Weston <joseph.weston08@gmail.com>
Date: Wed, 13 Sep 2017 13:20:53 +0200
Subject: [PATCH] Revert "Merge branch 'cleanup/remove-lapack' into 'stable'"

This reverts commit 1e6c086067360dfaab56d81013ab649275ab1756, reversing
changes made to 2c5da944a0b524b23949c438d22f30c7a254742b.

The changes to Kwant's Lapack wrappers depend on features from
Scipy 0.16, so it cannot be merged into stable branch, which
depends on Scipy 0.14.
---
 kwant/linalg/decomp_ev.py    |   17 +-
 kwant/linalg/decomp_lu.py    |   49 +-
 kwant/linalg/decomp_schur.py |   86 +-
 kwant/linalg/f_lapack.pxd    |  217 +++
 kwant/linalg/f_lapack.py     |   14 +
 kwant/linalg/lapack.pyx      | 2982 ++++++++++++++++++++++++----------
 setup.py                     |   42 +-
 7 files changed, 2497 insertions(+), 910 deletions(-)
 create mode 100644 kwant/linalg/f_lapack.pxd
 create mode 100644 kwant/linalg/f_lapack.py

diff --git a/kwant/linalg/decomp_ev.py b/kwant/linalg/decomp_ev.py
index 8e7b95d6..bd92ce93 100644
--- a/kwant/linalg/decomp_ev.py
+++ b/kwant/linalg/decomp_ev.py
@@ -50,5 +50,18 @@ def gen_eig(a, b, left=False, right=True, overwrite_ab=False):
         The right eigenvector corresponding to the eigenvalue
         ``alpha[i]/beta[i]`` is the column ``vr[:,i]``.
     """
-    a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
-    return lapack.ggev(a, b, left, right)
+
+    ltype, a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
+
+    if a.ndim != 2 or b.ndim != 2:
+        raise ValueError("gen_eig requires both a and be to be matrices")
+
+    if a.shape[0] != a.shape[1]:
+        raise ValueError("gen_eig requires square matrix input")
+
+    if b.shape[0] != a.shape[0] or b.shape[1] != a.shape[1]:
+        raise ValueError("gen_eig requires a and be to have the same shape")
+
+    ggev = getattr(lapack, ltype + "ggev")
+
+    return ggev(a, b, left, right)
diff --git a/kwant/linalg/decomp_lu.py b/kwant/linalg/decomp_lu.py
index 639b547b..79accc25 100644
--- a/kwant/linalg/decomp_lu.py
+++ b/kwant/linalg/decomp_lu.py
@@ -45,8 +45,20 @@ def lu_factor(a, overwrite_a=False):
     singular : boolean
         Whether the matrix a is singular (up to machine precision)
     """
-    a = lapack.prepare_for_lapack(overwrite_a, a)
-    return lapack.getrf(a)
+
+    ltype, a = lapack.prepare_for_lapack(overwrite_a, a)
+
+    if a.ndim != 2:
+        raise ValueError("lu_factor expects a matrix")
+
+    if ltype == 'd':
+        return lapack.dgetrf(a)
+    elif ltype == 'z':
+        return lapack.zgetrf(a)
+    elif ltype == 's':
+        return lapack.sgetrf(a)
+    else:
+        return lapack.cgetrf(a)
 
 
 def lu_solve(matrix_factorization, b):
@@ -71,9 +83,23 @@ def lu_solve(matrix_factorization, b):
                              "a singular matrix. Result of solve step "
                              "are probably unreliable")
 
-    lu, b = lapack.prepare_for_lapack(False, lu, b)
+    ltype, lu, b = lapack.prepare_for_lapack(False, lu, b)
     ipiv = np.ascontiguousarray(np.asanyarray(ipiv), dtype=lapack.int_dtype)
-    return lapack.getrs(lu, ipiv, b)
+
+    if b.ndim > 2:
+        raise ValueError("lu_solve: b must be a vector or matrix")
+
+    if lu.shape[0] != b.shape[0]:
+        raise ValueError("lu_solve: incompatible dimensions of b")
+
+    if ltype == 'd':
+        return lapack.dgetrs(lu, ipiv, b)
+    elif ltype == 'z':
+        return lapack.zgetrs(lu, ipiv, b)
+    elif ltype == 's':
+        return lapack.sgetrs(lu, ipiv, b)
+    else:
+        return lapack.cgetrs(lu, ipiv, b)
 
 
 def rcond_from_lu(matrix_factorization, norm_a, norm="1"):
@@ -101,6 +127,17 @@ def rcond_from_lu(matrix_factorization, norm_a, norm="1"):
         norm specified in norm
     """
     (lu, ipiv, singular) = matrix_factorization
+    if not norm in ("1", "I"):
+        raise ValueError("norm in rcond_from_lu must be either '1' or 'I'")
     norm = norm.encode('utf8')  # lapack expects bytes
-    lu = lapack.prepare_for_lapack(False, lu)
-    return lapack.gecon(lu, norm_a, norm)
+
+    ltype, lu = lapack.prepare_for_lapack(False, lu)
+
+    if ltype == 'd':
+        return lapack.dgecon(lu, norm_a, norm)
+    elif ltype == 'z':
+        return lapack.zgecon(lu, norm_a, norm)
+    elif ltype == 's':
+        return lapack.sgecon(lu, norm_a, norm)
+    else:
+        return lapack.cgecon(lu, norm_a, norm)
diff --git a/kwant/linalg/decomp_schur.py b/kwant/linalg/decomp_schur.py
index d6543213..4accc17f 100644
--- a/kwant/linalg/decomp_schur.py
+++ b/kwant/linalg/decomp_schur.py
@@ -62,8 +62,18 @@ def schur(a, calc_q=True, calc_ev=True, overwrite_a=False):
     LinAlgError
         If the underlying QR iteration fails to converge.
     """
-    a = lapack.prepare_for_lapack(overwrite_a, a)
-    return lapack.gees(a, calc_q, calc_ev)
+
+    ltype, a = lapack.prepare_for_lapack(overwrite_a, a)
+
+    if a.ndim != 2:
+        raise ValueError("Expect matrix as input")
+
+    if a.shape[0] != a.shape[1]:
+        raise ValueError("Expect square matrix")
+
+    gees = getattr(lapack, ltype + "gees")
+
+    return gees(a, calc_q, calc_ev)
 
 
 def convert_r2c_schur(t, q):
@@ -182,7 +192,9 @@ def order_schur(select, t, q, calc_ev=True, overwrite_tq=False):
         ``calc_ev == True``
     """
 
-    t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+    ltype, t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+
+    trsen = getattr(lapack, ltype + "trsen")
 
     # Figure out if select is a function or array.
     isfun = isarray = True
@@ -211,7 +223,7 @@ def order_schur(select, t, q, calc_ev=True, overwrite_tq=False):
             t, q = convert_r2c_schur(t, q)
             return order_schur(select, t, q, calc_ev, True)
 
-    return lapack.trsen(select, t, q, calc_ev)
+    return trsen(select, t, q, calc_ev)
 
 
 def evecs_from_schur(t, q, select=None, left=False, right=True,
@@ -255,7 +267,13 @@ def evecs_from_schur(t, q, select=None, left=False, right=True,
         ``right == True``.
     """
 
-    t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+    ltype, t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+
+    if (t.shape[0] != t.shape[1] or q.shape[0] != q.shape[1]
+        or t.shape[0] != q.shape[0]):
+        raise ValueError("Invalid Schur decomposition as input")
+
+    trevc = getattr(lapack, ltype + "trevc")
 
     # check if select is a function or an array
     if select is not None:
@@ -282,7 +300,7 @@ def evecs_from_schur(t, q, select=None, left=False, right=True,
     else:
         selectarr = None
 
-    return lapack.trevc(t, q, selectarr, left, right)
+    return trevc(t, q, selectarr, left, right)
 
 
 def gen_schur(a, b, calc_q=True, calc_z=True, calc_ev=True,
@@ -346,8 +364,21 @@ def gen_schur(a, b, calc_q=True, calc_z=True, calc_ev=True,
     LinAlError
         If the underlying QZ iteration fails to converge.
     """
-    a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
-    return lapack.gges(a, b, calc_q, calc_z, calc_ev)
+
+    ltype, a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
+
+    if a.ndim != 2 or b.ndim != 2:
+        raise ValueError("Expect matrices as input")
+
+    if a.shape[0] != a.shape[1]:
+        raise ValueError("Expect square matrix a")
+
+    if a.shape[0] != b.shape[0] or a.shape[0] != b.shape[1]:
+        raise ValueError("Shape of b is incompatible to matrix a")
+
+    gges = getattr(lapack, ltype + "gges")
+
+    return gges(a, b, calc_q, calc_z, calc_ev)
 
 
 def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True,
@@ -408,8 +439,22 @@ def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True,
     LinAlError
         If the problem is too ill-conditioned.
     """
-    s, t, q, z = lapack.prepare_for_lapack(overwrite_stqz, s, t, q, z)
+    ltype, s, t, q, z = lapack.prepare_for_lapack(overwrite_stqz, s, t, q, z)
 
+    if (s.ndim != 2 or t.ndim != 2 or
+        (q is not None and q.ndim != 2) or
+        (z is not None and z.ndim != 2)):
+        raise ValueError("Expect matrices as input")
+
+    if ((s.shape[0] != s.shape[1] or t.shape[0] != t.shape[1] or
+         s.shape[0] != t.shape[0]) or
+        (q is not None and (q.shape[0] != q.shape[1] or
+                            s.shape[0] != q.shape[0])) or
+        (z is not None and (z.shape[0] != z.shape[1] or
+                            s.shape[0] != z.shape[0]))):
+        raise ValueError("Invalid Schur decomposition as input")
+
+    tgsen = getattr(lapack, ltype + "tgsen")
 
     # Figure out if select is a function or array.
     isfun = isarray = True
@@ -447,7 +492,7 @@ def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True,
 
             return order_gen_schur(select, s, t, q, z, calc_ev, True)
 
-    return lapack.tgsen(select, s, t, q, z, calc_ev)
+    return tgsen(select, s, t, q, z, calc_ev)
 
 
 def convert_r2c_gen_schur(s, t, q=None, z=None):
@@ -491,7 +536,7 @@ def convert_r2c_gen_schur(s, t, q=None, z=None):
         If it fails to convert a 2x2 block into complex form (unlikely).
     """
 
-    s, t, q, z = lapack.prepare_for_lapack(True, s, t, q, z)
+    ltype, s, t, q, z = lapack.prepare_for_lapack(True, s, t, q, z)
     # Note: overwrite=True does not mean much here, the arrays are all copied
 
     if (s.ndim != 2 or t.ndim != 2 or
@@ -611,7 +656,20 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None,
 
     """
 
-    s, t, q, z = lapack.prepare_for_lapack(overwrite_qz, s, t, q, z)
+    ltype, s, t, q, z = lapack.prepare_for_lapack(overwrite_qz, s, t, q, z)
+
+    if (s.ndim != 2 or t.ndim != 2 or
+        (q is not None and q.ndim != 2) or
+        (z is not None and z.ndim != 2)):
+        raise ValueError("Expect matrices as input")
+
+    if ((s.shape[0] != s.shape[1] or t.shape[0] != t.shape[1] or
+         s.shape[0] != t.shape[0]) or
+        (q is not None and (q.shape[0] != q.shape[1] or
+                            s.shape[0] != q.shape[0])) or
+        (z is not None and (z.shape[0] != z.shape[1] or
+                            s.shape[0] != z.shape[0]))):
+        raise ValueError("Invalid Schur decomposition as input")
 
     if left and q is None:
         raise ValueError("Matrix q must be provided for left eigenvectors")
@@ -619,6 +677,8 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None,
     if right and z is None:
         raise ValueError("Matrix z must be provided for right eigenvectors")
 
+    tgevc = getattr(lapack, ltype + "tgevc")
+
     # Check if select is a function or an array.
     if select is not None:
         isfun = isarray = True
@@ -644,4 +704,4 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None,
     else:
         selectarr = None
 
-    return lapack.tgevc(s, t, q, z, selectarr, left, right)
+    return tgevc(s, t, q, z, selectarr, left, right)
diff --git a/kwant/linalg/f_lapack.pxd b/kwant/linalg/f_lapack.pxd
new file mode 100644
index 00000000..1a0304af
--- /dev/null
+++ b/kwant/linalg/f_lapack.pxd
@@ -0,0 +1,217 @@
+# Copyright 2011-2013 Kwant authors.
+#
+# This file is part of Kwant.  It is subject to the license terms in the file
+# LICENSE.rst found in the top-level directory of this distribution and at
+# http://kwant-project.org/license.  A list of Kwant authors can be found in
+# the file AUTHORS.rst at the top-level directory of this distribution and at
+# http://kwant-project.org/authors.
+
+ctypedef int l_int
+ctypedef int l_logical
+
+cdef extern:
+    void sgetrf_(l_int *, l_int *, float *, l_int *, l_int *, l_int *)
+    void dgetrf_(l_int *, l_int *, double *, l_int *, l_int *, l_int *)
+    void cgetrf_(l_int *, l_int *, float complex *, l_int *, l_int *,
+                 l_int *)
+    void zgetrf_(l_int *, l_int *, double complex *, l_int *, l_int *,
+                 l_int *)
+
+    void sgetrs_(char *, l_int *, l_int *, float *, l_int *, l_int *,
+                 float *, l_int *, l_int *)
+    void dgetrs_(char *, l_int *, l_int *, double *, l_int *, l_int *,
+                 double *, l_int *, l_int *)
+    void cgetrs_(char *, l_int *, l_int *, float complex *, l_int *,
+                 l_int *, float complex *, l_int *, l_int *)
+    void zgetrs_(char *, l_int *, l_int *, double complex *, l_int *,
+                 l_int *, double complex *, l_int *, l_int *)
+
+    void sgecon_(char *, l_int *, float *, l_int *, float *, float *,
+                 float *, l_int *, l_int *)
+    void dgecon_(char *, l_int *, double *, l_int *, double *, double *,
+                 double *, l_int *, l_int *)
+    void cgecon_(char *, l_int *, float complex *, l_int *, float *,
+                 float *, float complex *, float *, l_int *)
+    void zgecon_(char *, l_int *, double complex *, l_int *, double *,
+                 double *, double complex *, double *, l_int *)
+
+    void sggev_(char *, char *, l_int *, float *, l_int *, float *, l_int *,
+                float *, float *, float *, float *, l_int *, float *, l_int *,
+                float *, l_int *, l_int *)
+    void dggev_(char *, char *, l_int *, double *, l_int *, double *, l_int *,
+                double *, double *, double *, double *, l_int *,
+                double *, l_int *, double *, l_int *, l_int *)
+    void cggev_(char *, char *, l_int *, float complex *, l_int *,
+                float complex *, l_int *, float complex *, float complex *,
+                float complex *, l_int *, float complex *, l_int *,
+                float complex *, l_int *, float *, l_int *)
+    void zggev_(char *, char *, l_int *, double complex *, l_int *,
+                double complex *, l_int *, double complex *,
+                double complex *, double complex *, l_int *,
+                double complex *, l_int *, double complex *, l_int *,
+                double *, l_int *)
+
+    void sgees_(char *, char *, l_logical (*)(float *, float *),
+                l_int *, float *, l_int *, l_int *,
+                float *, float *, float *, l_int *,
+                float *, l_int *, l_logical *, l_int *)
+    void dgees_(char *, char *, l_logical (*)(double *, double *),
+                l_int *, double *, l_int *, l_int *,
+                double *, double *, double *, l_int *,
+                double *, l_int *, l_logical *, l_int *)
+    void cgees_(char *, char *,
+                l_logical (*)(float complex *),
+                l_int *, float complex *,
+                l_int *, l_int *, float complex *,
+                float complex *, l_int *,
+                float complex *, l_int *, float *,
+                l_logical *, l_int *)
+    void zgees_(char *, char *,
+                l_logical (*)(double complex *),
+                l_int *, double complex *,
+                l_int *, l_int *, double complex *,
+                double complex *, l_int *,
+                double complex *, l_int *,
+                double *, l_logical *, l_int *)
+
+    void strsen_(char *, char *, l_logical *, l_int *,
+                 float *, l_int *, float *,
+                 l_int *, float *, float *, l_int *,
+                 float *, float *, float *, l_int *,
+                 l_int *, l_int *, l_int *)
+    void dtrsen_(char *, char *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *, double *,
+                 l_int *, double *, double *, double *,
+                 l_int *, l_int *, l_int *, l_int *)
+    void ctrsen_(char *, char *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *, l_int *,
+                 float *, float *, float complex *,
+                 l_int *, l_int *)
+    void ztrsen_(char *, char *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *, l_int *,
+                 double *, double *, double complex *,
+                 l_int *, l_int *)
+
+    void strevc_(char *, char *, l_logical *,
+                 l_int *, float *, l_int *,
+                 float *, l_int *, float *, l_int *,
+                 l_int *, l_int *, float *, l_int *)
+    void dtrevc_(char *, char *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *,
+                 l_int *, l_int *, l_int *, double *,
+                 l_int *)
+    void ctrevc_(char *, char *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, l_int *, l_int *,
+                 float complex *, float *, l_int *)
+    void ztrevc_(char *, char *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, l_int *, l_int *,
+                 double complex *, double *, l_int *)
+
+    void sgges_(char *, char *, char *,
+                l_logical (*)(float *, float *, float *),
+                l_int *, float *, l_int *, float *,
+                l_int *, l_int *, float *, float *,
+                float *, float *, l_int *, float *,
+                l_int *, float *, l_int *, l_logical *,
+                l_int *)
+    void dgges_(char *, char *, char *,
+                l_logical (*)(double *, double *, double *),
+                l_int *, double *, l_int *, double *,
+                l_int *, l_int *, double *, double *,
+                double *, double *, l_int *, double *,
+                l_int *, double *, l_int *,
+                l_logical *, l_int *)
+    void cgges_(char *, char *, char *,
+                l_logical (*)(float complex *, float complex *),
+                l_int *, float complex *,
+                l_int *, float complex *,
+                l_int *, l_int *, float complex *,
+                float complex *, float complex *,
+                l_int *, float complex *,
+                l_int *, float complex *,
+                l_int *, float *, l_logical *, l_int *)
+    void zgges_(char *, char *, char *,
+                l_logical (*)(double complex *, double complex *),
+                l_int *, double complex *,
+                l_int *, double complex *,
+                l_int *, l_int *, double complex *,
+                double complex *,
+                double complex *, l_int *,
+                double complex *, l_int *,
+                double complex *, l_int *,
+                double *, l_logical *, l_int *)
+
+    void stgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, float *, l_int *, float *,
+                 l_int *, float *, float *, float *,
+                 float *, l_int *, float *, l_int *,
+                 l_int *, float *, float *, float *, float *,
+                 l_int *, l_int *, l_int *, l_int *)
+    void dtgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *, double *,
+                 double *, double *, l_int *, double *,
+                 l_int *, l_int *, double *, double *,
+                 double *, double *, l_int *, l_int *,
+                 l_int *, l_int *)
+    void ctgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 float complex *,
+                 float complex *, l_int *,
+                 float complex *, l_int *, l_int *,
+                 float *, float *, float *,
+                 float complex *, l_int *, l_int *,
+                 l_int *, l_int *)
+    void ztgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 double complex *,
+                 double complex *, l_int *,
+                 double complex *, l_int *, l_int *,
+                 double *, double *, double *,
+                 double complex *, l_int *, l_int *,
+                 l_int *, l_int *)
+
+    void stgevc_(char *, char *, l_logical *,
+                 l_int *, float *, l_int *,
+                 float *, l_int *, float *,
+                 l_int *, float *, l_int *,
+                 l_int *, l_int *, float *, l_int *)
+    void dtgevc_(char *, char *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *,
+                 l_int *, double *, l_int *,
+                 l_int *, l_int *, double *, l_int *)
+    void ctgevc_(char *, char *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, l_int *, l_int *,
+                 float complex *, float *, l_int *)
+    void ztgevc_(char *, char *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, l_int *, l_int *,
+                 double complex *, double *, l_int *)
diff --git a/kwant/linalg/f_lapack.py b/kwant/linalg/f_lapack.py
new file mode 100644
index 00000000..39b702b5
--- /dev/null
+++ b/kwant/linalg/f_lapack.py
@@ -0,0 +1,14 @@
+# Copyright 2011-2013 Kwant authors.
+#
+# This file is part of Kwant.  It is subject to the license terms in the file
+# LICENSE.rst found in the top-level directory of this distribution and at
+# http://kwant-project.org/license.  A list of Kwant authors can be found in
+# the file AUTHORS.rst at the top-level directory of this distribution and at
+# http://kwant-project.org/authors.
+
+__all__ = ['l_int_dtype', 'l_logical_dtype']
+
+import numpy as np
+
+l_int_dtype = np.int32
+l_logical_dtype = np.int32
diff --git a/kwant/linalg/lapack.pyx b/kwant/linalg/lapack.pyx
index e173bc0c..d6699a0e 100644
--- a/kwant/linalg/lapack.pyx
+++ b/kwant/linalg/lapack.pyx
@@ -1,4 +1,4 @@
-# Copyright 2011-2017 Kwant authors.
+# Copyright 2011-2013 Kwant authors.
 #
 # This file is part of Kwant.  It is subject to the license terms in the file
 # LICENSE.rst found in the top-level directory of this distribution and at
@@ -8,53 +8,29 @@
 
 """Low-level access to LAPACK functions. """
 
-__all__ = ['getrf',
-           'getrs',
-           'gecon',
-           'ggev',
-           'gees',
-           'trsen',
-           'trevc',
-           'gges',
-           'tgsen',
-           'tgevc',
+__all__ = ['sgetrf', 'dgetrf', 'cgetrf', 'zgetrf',
+           'sgetrs', 'dgetrs', 'cgetrs', 'zgetrs',
+           'sgecon', 'dgecon', 'cgecon', 'zgecon',
+           'sggev', 'dggev', 'cggev', 'zggev',
+           'sgees', 'dgees', 'cgees', 'zgees',
+           'strsen', 'dtrsen', 'ctrsen', 'ztrsen',
+           'strevc', 'dtrevc', 'ctrevc', 'ztrevc',
+           'sgges', 'dgges', 'cgges', 'zgges',
+           'stgsen', 'dtgsen', 'ctgsen', 'ztgsen',
+           'stgevc', 'dtgevc', 'ctgevc', 'ztgevc',
            'prepare_for_lapack']
 
 import numpy as np
 cimport numpy as np
 
-cimport scipy.linalg.cython_lapack as lapack
+# Strangely absolute imports from kwant.linalg.f_lapack don't work here.  So
+# continue using traditional implicit relative imports.
+from . import f_lapack
+from . cimport f_lapack
+from .f_lapack cimport l_int, l_logical
 
-ctypedef int l_int
-ctypedef bint l_logical
-
-int_dtype = np.int32
-logical_dtype = np.int32
-
-ctypedef float complex float_complex
-ctypedef double complex double_complex
-
-ctypedef fused scalar:
-    float
-    double
-    float complex
-    double complex
-
-ctypedef fused single_precision:
-    float
-    float complex
-
-ctypedef fused double_precision:
-    double
-    double complex
-
-ctypedef fused cmplx:
-    float complex
-    double complex
-
-ctypedef fused floating:
-    float
-    double
+int_dtype = f_lapack.l_int_dtype
+logical_dtype = f_lapack.l_logical_dtype
 
 # exceptions
 
@@ -75,161 +51,284 @@ def assert_fortran_mat(*mats):
             raise ValueError("Input matrix must be Fortran contiguous")
 
 
-cdef np.ndarray maybe_complex(scalar selector,
-                              np.ndarray real, np.ndarray imag):
-    cdef np.ndarray r
-    r = real
-    if scalar in floating:
-        if imag.nonzero()[0].size:
-            r = real + 1j * imag
-    return r
+# Wrappers for xGETRF
+def sgetrf(np.ndarray[np.float32_t, ndim=2] A):
+    cdef l_int M, N, info
+    cdef np.ndarray[l_int, ndim=1] ipiv
+
+    assert_fortran_mat(A)
+
+    M = A.shape[0]
+    N = A.shape[1]
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.sgetrf_(&M, &N, <float *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
 
+    assert info >= 0, "Argument error in sgetrf"
 
-cdef l_int lwork_from_qwork(scalar qwork):
-    if scalar in floating:
-        return <l_int>qwork
-    else:
-        return <l_int>qwork.real
+    return (A, ipiv, info > 0 or M != N)
+
+def dgetrf(np.ndarray[np.float64_t, ndim=2] A):
+    cdef l_int M, N, info
+    cdef np.ndarray[l_int, ndim=1] ipiv
+
+    assert_fortran_mat(A)
+
+    M = A.shape[0]
+    N = A.shape[1]
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.dgetrf_(&M, &N, <double *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
+
+    assert info >= 0, "Argument error in dgetrf"
+
+    return (A, ipiv, info > 0 or M != N)
+
+def cgetrf(np.ndarray[np.complex64_t, ndim=2] A):
+    cdef l_int M, N, info
+    cdef np.ndarray[l_int, ndim=1] ipiv
+
+    assert_fortran_mat(A)
+
+    M = A.shape[0]
+    N = A.shape[1]
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.cgetrf_(&M, &N, <float complex *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
+
+    assert info >= 0, "Argument error in cgetrf"
 
+    return (A, ipiv, info > 0 or M != N)
 
-def getrf(np.ndarray[scalar, ndim=2] A):
+def zgetrf(np.ndarray[np.complex128_t, ndim=2] A):
     cdef l_int M, N, info
-    cdef np.ndarray[l_int] ipiv
+    cdef np.ndarray[l_int, ndim=1] ipiv
 
     assert_fortran_mat(A)
 
     M = A.shape[0]
     N = A.shape[1]
-    ipiv = np.empty(min(M,N), dtype = int_dtype)
-
-    if scalar is float:
-        lapack.sgetrf(&M, &N, <float *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-    elif scalar is double:
-        lapack.dgetrf(&M, &N, <double *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-    elif scalar is float_complex:
-        lapack.cgetrf(&M, &N, <float complex *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-    elif scalar is double_complex:
-        lapack.zgetrf(&M, &N, <double complex *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-
-    assert info >= 0, "Argument error in getrf"
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.zgetrf_(&M, &N, <double complex *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
+
+    assert info >= 0, "Argument error in zgetrf"
 
     return (A, ipiv, info > 0 or M != N)
 
+# Wrappers for xGETRS
 
-def getrs(np.ndarray[scalar, ndim=2] LU, np.ndarray[l_int] IPIV,
-          np.ndarray B):
+def sgetrs(np.ndarray[np.float32_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
     cdef l_int N, NRHS, info
+    cdef np.ndarray b
 
     assert_fortran_mat(LU)
 
-    # Consistency checks for LU and B
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+         not B.flags["F_CONTIGUOUS"])):
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
+    N = LU.shape[0]
+    if b.ndim == 1:
+        NRHS = 1
+    elif b.ndim == 2:
+        NRHS = B.shape[1]
+    else:
+        raise ValueError("In sgetrs: B must be a vector or matrix")
+
+    f_lapack.sgetrs_("N", &N, &NRHS, <float *>LU.data, &N,
+                     <l_int *>IPIV.data, <float *>b.data, &N,
+                     &info)
+
+    assert info == 0, "Argument error in sgetrs"
+
+    return b
 
-    if B.descr.type_num != LU.descr.type_num:
-        raise TypeError('B must have same dtype as LU')
+def dgetrs(np.ndarray[np.float64_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
+    cdef l_int N, NRHS, info
+    cdef np.ndarray b
+
+    assert_fortran_mat(LU)
 
-    # Workaround for 1x1-Fortran bug in NumPy < v2.0
-    if ((B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
          not B.flags["F_CONTIGUOUS"])):
-        raise ValueError("B must be Fortran ordered")
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
+    N = LU.shape[0]
+    if b.ndim == 1:
+        NRHS = 1
+    elif b.ndim == 2:
+        NRHS = b.shape[1]
+    else:
+        raise ValueError("In dgetrs: B must be a vector or matrix")
+
+    f_lapack.dgetrs_("N", &N, &NRHS, <double *>LU.data, &N,
+                     <l_int *>IPIV.data, <double *>b.data, &N,
+                     &info)
 
-    if B.ndim > 2:
-        raise ValueError("B must be a vector or matrix")
+    assert info == 0, "Argument error in dgetrs"
 
-    if LU.shape[0] != B.shape[0]:
-        raise ValueError('LU and B have incompatible shapes')
+    return b
 
+def cgetrs(np.ndarray[np.complex64_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
+    cdef l_int N, NRHS, info
+    cdef np.ndarray b
+
+    assert_fortran_mat(LU)
+
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+         not B.flags["F_CONTIGUOUS"])):
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
     N = LU.shape[0]
+    if b.ndim == 1:
+        NRHS = 1
+    elif b.ndim == 2:
+        NRHS = b.shape[1]
+    else:
+        raise ValueError("In cgetrs: B must be a vector or matrix")
 
-    if B.ndim == 1:
+    f_lapack.cgetrs_("N", &N, &NRHS, <float complex *>LU.data, &N,
+                     <l_int *>IPIV.data, <float complex *>b.data, &N,
+                     &info)
+
+    assert info == 0, "Argument error in cgetrs"
+
+    return b
+
+def zgetrs(np.ndarray[np.complex128_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
+    cdef l_int N, NRHS, info
+    cdef np.ndarray b
+
+    assert_fortran_mat(LU)
+
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+         not B.flags["F_CONTIGUOUS"])):
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
+    N = LU.shape[0]
+    if b.ndim == 1:
         NRHS = 1
-    elif B.ndim == 2:
-        NRHS = B.shape[1]
+    elif b.ndim == 2:
+        NRHS = b.shape[1]
+    else:
+        raise ValueError("In zgetrs: B must be a vector or matrix")
+
+    f_lapack.zgetrs_("N", &N, &NRHS, <double complex *>LU.data, &N,
+                     <l_int *>IPIV.data, <double complex *>b.data, &N,
+                     &info)
+
+    assert info == 0, "Argument error in zgetrs"
+
+    return b
+
+# Wrappers for xGECON
 
-    if scalar is float:
-        lapack.sgetrs("N", &N, &NRHS, <float *>LU.data, &N,
-                      <l_int *>IPIV.data, <float *>B.data, &N,
-                      &info)
-    elif scalar is double:
-        lapack.dgetrs("N", &N, &NRHS, <double *>LU.data, &N,
-                      <l_int *>IPIV.data, <double *>B.data, &N,
-                      &info)
-    elif scalar is float_complex:
-        lapack.cgetrs("N", &N, &NRHS, <float complex *>LU.data, &N,
-                      <l_int *>IPIV.data, <float complex *>B.data, &N,
-                      &info)
-    elif scalar is double_complex:
-        lapack.zgetrs("N", &N, &NRHS, <double complex *>LU.data, &N,
-                      <l_int *>IPIV.data, <double complex *>B.data, &N,
-                      &info)
-
-    assert info == 0, "Argument error in getrs"
-
-    return B
-
-
-def gecon(np.ndarray[scalar, ndim=2] LU, double normA, char *norm = b"1"):
+def sgecon(np.ndarray[np.float32_t, ndim=2] LU,
+            float normA, char *norm = b"1"):
     cdef l_int N, info
-    cdef float srcond, snormA
-    cdef double drcond
+    cdef float rcond
+    cdef np.ndarray[np.float32_t, ndim=1] work
+    cdef np.ndarray[l_int, ndim=1] iwork
 
-    # Parameter checks
+    assert_fortran_mat(LU)
+
+    N = LU.shape[0]
+    work = np.empty(4*N, dtype = np.float32)
+    iwork = np.empty(N, dtype = f_lapack.l_int_dtype)
+
+    f_lapack.sgecon_(norm, &N, <float *>LU.data, &N, &normA,
+                     &rcond, <float *>work.data,
+                     <l_int *>iwork.data, &info)
+
+    assert info == 0, "Argument error in sgecon"
+
+    return rcond
+
+def dgecon(np.ndarray[np.float64_t, ndim=2] LU,
+            double normA, char *norm = b"1"):
+    cdef l_int N, info
+    cdef double rcond
+    cdef np.ndarray[np.float64_t, ndim=1] work
+    cdef np.ndarray[l_int, ndim=1] iwork
 
     assert_fortran_mat(LU)
-    if norm[0] != b"1" and norm[0] != b"I":
-        raise ValueError("'norm' must be either '1' or 'I'")
-    if scalar in single_precision:
-        snormA = normA
 
-    # Allocate workspaces
+    N = LU.shape[0]
+    work = np.empty(4*N, dtype = np.float64)
+    iwork = np.empty(N, dtype = f_lapack.l_int_dtype)
+
+    f_lapack.dgecon_(norm, &N, <double *>LU.data, &N, &normA,
+                     &rcond, <double *>work.data,
+                     <l_int *>iwork.data, &info)
+
+    assert info == 0, "Argument error in dgecon"
+
+    return rcond
+
+def cgecon(np.ndarray[np.complex64_t, ndim=2] LU,
+            float normA, char *norm = b"1"):
+    cdef l_int N, info
+    cdef float rcond
+    cdef np.ndarray[np.complex64_t, ndim=1] work
+    cdef np.ndarray[np.float32_t, ndim=1] rwork
+
+    assert_fortran_mat(LU)
 
     N = LU.shape[0]
+    work = np.empty(2*N, dtype = np.complex64)
+    rwork = np.empty(2*N, dtype = np.float32)
 
-    cdef np.ndarray[l_int] iwork
-    if scalar in floating:
-        iwork = np.empty(N, dtype=int_dtype)
+    f_lapack.cgecon_(norm, &N, <float complex *>LU.data, &N, &normA,
+                     &rcond, <float complex *>work.data,
+                     <float *>rwork.data, &info)
 
-    cdef np.ndarray[scalar] work
-    if scalar in floating:
-        work = np.empty(4 * N, dtype=LU.dtype)
-    else:
-        work = np.empty(2 * N, dtype=LU.dtype)
+    assert info == 0, "Argument error in cgecon"
 
-    cdef np.ndarray rwork
-    if scalar is float_complex:
-        rwork = np.empty(2 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(2 * N, dtype=np.float64)
+    return rcond
 
-    # The actual calculation
+def zgecon(np.ndarray[np.complex128_t, ndim=2] LU,
+           double normA, char *norm = b"1"):
+    cdef l_int N, info
+    cdef double rcond
+    cdef np.ndarray[np.complex128_t, ndim=1] work
+    cdef np.ndarray[np.float64_t, ndim=1] rwork
 
-    if scalar is float:
-        lapack.sgecon(norm, &N, <float *>LU.data, &N, &snormA,
-                      &srcond, <float *>work.data,
-                      <l_int *>iwork.data, &info)
-    elif scalar is double:
-        lapack.dgecon(norm, &N, <double *>LU.data, &N, &normA,
-                      &drcond, <double *>work.data,
-                      <l_int *>iwork.data, &info)
-    elif scalar is float_complex:
-        lapack.cgecon(norm, &N, <float complex *>LU.data, &N, &snormA,
-                      &srcond, <float complex *>work.data,
-                      <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.zgecon(norm, &N, <double complex *>LU.data, &N, &normA,
-                      &drcond, <double complex *>work.data,
-                      <double *>rwork.data, &info)
+    assert_fortran_mat(LU)
 
-    assert info == 0, "Argument error in gecon"
+    N = LU.shape[0]
+    work = np.empty(2*N, dtype = np.complex128)
+    rwork = np.empty(2*N, dtype = np.float64)
 
-    if scalar in single_precision:
-        return srcond
-    else:
-        return drcond
+    f_lapack.zgecon_(norm, &N, <double complex *>LU.data, &N, &normA,
+                     &rcond, <double complex *>work.data,
+                     <double *>rwork.data, &info)
 
+    assert info == 0, "Argument error in zgecon"
+
+    return rcond
+
+# Wrappers for xGGEV
 
 # Helper function for xGGEV
 def ggev_postprocess(dtype, alphar, alphai, vl_r=None, vr_r=None):
@@ -264,363 +363,707 @@ def ggev_postprocess(dtype, alphar, alphai, vl_r=None, vr_r=None):
     return (alpha, vl, vr)
 
 
-def ggev(np.ndarray[scalar, ndim=2] A, np.ndarray[scalar, ndim=2] B,
-         left=False, right=True):
+def sggev(np.ndarray[np.float32_t, ndim=2] A,
+          np.ndarray[np.float32_t, ndim=2] B,
+          left=False, right=True):
     cdef l_int N, info, lwork
-
-    # Parameter checks
+    cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r
+    cdef float *vl_ptr
+    cdef float *vr_ptr
+    cdef float qwork
+    cdef np.ndarray[np.float32_t, ndim=1] work, alphar, alphai, beta
 
     assert_fortran_mat(A, B)
 
-    if A.ndim != 2 or A.ndim != 2:
-        raise ValueError("gen_eig requires both a and be to be matrices")
+    N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float32)
+    alphai = np.empty(N, dtype = np.float32)
+    beta = np.empty(N, dtype = np.float32)
+
+    if left:
+        vl_r = np.empty((N,N), dtype = np.float32, order='F')
+        vl_ptr = <float *>vl_r.data
+        jobvl = "V"
+    else:
+        vl_r = None
+        vl_ptr = NULL
+        jobvl = "N"
+
+    if right:
+        vr_r = np.empty((N,N), dtype = np.float32, order='F')
+        vr_ptr = <float *>vr_r.data
+        jobvr = "V"
+    else:
+        vr_r = None
+        vr_ptr = NULL
+        jobvr = "N"
+
+    # workspace query
+    lwork = -1
+
+    f_lapack.sggev_(jobvl, jobvr, &N, <float *>A.data, &N,
+                    <float *>B.data, &N,
+                    <float *>alphar.data, <float *> alphai.data,
+                    <float *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in sggev"
+
+    lwork = <l_int>qwork
+    work = np.empty(lwork, dtype = np.float32)
 
-    if A.shape[0] != A.shape[1]:
-        raise ValueError("gen_eig requires square matrix input")
+    # Now the real calculation
+    f_lapack.sggev_(jobvl, jobvr, &N, <float *>A.data, &N,
+                    <float *>B.data, &N,
+                    <float *>alphar.data, <float *> alphai.data,
+                    <float *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <float *>work.data, &lwork, &info)
 
-    if A.shape[0] != B.shape[0] or A.shape[1] != B.shape[1]:
-        raise ValueError("A and B do not have the same shape")
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in sggev")
+
+    assert info == 0, "Argument error in sggev"
+
+    alpha, vl, vr = ggev_postprocess(np.complex64, alphar, alphai, vl_r, vr_r)
+
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
+
+
+def dggev(np.ndarray[np.float64_t, ndim=2] A,
+          np.ndarray[np.float64_t, ndim=2] B,
+          left=False, right=True):
+    cdef l_int N, info, lwork
+    cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r
+    cdef double *vl_ptr
+    cdef double *vr_ptr
+    cdef double qwork
+    cdef np.ndarray[np.float64_t, ndim=1] work, alphar, alphai, beta
 
-    # Allocate workspaces
+    assert_fortran_mat(A, B)
 
     N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float64)
+    alphai = np.empty(N, dtype = np.float64)
+    beta = np.empty(N, dtype = np.float64)
+
+    if left:
+        vl_r = np.empty((N,N), dtype = np.float64, order='F')
+        vl_ptr = <double *>vl_r.data
+        jobvl = "V"
+    else:
+        vl_r = None
+        vl_ptr = NULL
+        jobvl = "N"
 
-    cdef np.ndarray[scalar] alphar, alphai
-    if scalar in cmplx:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = None
+    if right:
+        vr_r = np.empty((N,N), dtype = np.float64, order='F')
+        vr_ptr = <double *>vr_r.data
+        jobvr = "V"
     else:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = np.empty(N, dtype=A.dtype)
+        vr_r = None
+        vr_ptr = NULL
+        jobvr = "N"
+
+    # workspace query
+    lwork = -1
+
+    f_lapack.dggev_(jobvl, jobvr, &N, <double *>A.data, &N,
+                    <double *>B.data, &N,
+                    <double *>alphar.data, <double *> alphai.data,
+                    <double *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in dggev"
+
+    lwork = <l_int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+
+    # Now the real calculation
+    f_lapack.dggev_(jobvl, jobvr, &N, <double *>A.data, &N,
+                    <double *>B.data, &N,
+                    <double *>alphar.data, <double *> alphai.data,
+                    <double *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <double *>work.data, &lwork, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in dggev")
+
+    assert info == 0, "Argument error in dggev"
 
-    cdef np.ndarray[scalar] beta = np.empty(N, dtype=A.dtype)
+    alpha, vl, vr = ggev_postprocess(np.complex128, alphar, alphai, vl_r, vr_r)
+
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
 
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(8 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(8 * N, dtype=np.float64)
 
-    cdef np.ndarray vl
-    cdef scalar *vl_ptr
+def cggev(np.ndarray[np.complex64_t, ndim=2] A,
+          np.ndarray[np.complex64_t, ndim=2] B,
+          left=False, right=True):
+    cdef l_int N, info, lwork
     cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.complex64_t, ndim=2] vl, vr
+    cdef float complex *vl_ptr
+    cdef float complex *vr_ptr
+    cdef float complex qwork
+    cdef np.ndarray[np.complex64_t, ndim=1] work, alpha, beta
+    cdef np.ndarray[np.float32_t, ndim=1] rwork
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex64)
+    beta = np.empty(N, dtype = np.complex64)
+
     if left:
-        vl = np.empty((N,N), dtype=A.dtype, order='F')
-        vl_ptr = <scalar *>vl.data
+        vl = np.empty((N,N), dtype = np.complex64, order='F')
+        vl_ptr = <float complex *>vl.data
         jobvl = "V"
     else:
-        vl = None
         vl_ptr = NULL
         jobvl = "N"
 
-    cdef np.ndarray vr
-    cdef scalar *vr_ptr
-    cdef char *jobvr
     if right:
-        vr = np.empty((N,N), dtype=A.dtype, order='F')
-        vr_ptr = <scalar *>vr.data
+        vr = np.empty((N,N), dtype = np.complex64, order='F')
+        vr_ptr = <float complex *>vr.data
         jobvr = "V"
     else:
-        vr = None
         vr_ptr = NULL
         jobvr = "N"
 
-    # Workspace query
-    # Xggev expects &qwork as a <scalar *> (even though it's an integer)
+    rwork = np.empty(8 * N, dtype = np.float32)
+
+    # workspace query
     lwork = -1
-    cdef scalar qwork
+    work = np.empty(1, dtype = np.complex64)
 
-    if scalar is float:
-        lapack.sggev(jobvl, jobvr, &N, <float *>A.data, &N,
-                     <float *>B.data, &N,
-                     <float *>alphar.data, <float *> alphai.data,
-                     <float *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork, &info)
-    elif scalar is double:
-        lapack.dggev(jobvl, jobvr, &N, <double *>A.data, &N,
-                     <double *>B.data, &N,
-                     <double *>alphar.data, <double *> alphai.data,
-                     <double *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork, &info)
-    elif scalar is float_complex:
-        lapack.cggev(jobvl, jobvr, &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork,
-                     <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.zggev(jobvl, jobvr, &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork,
-                     <double *>rwork.data, &info)
+    f_lapack.cggev_(jobvl, jobvr, &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork,
+                    <float *>rwork.data, &info)
 
-    assert info == 0, "Argument error in ggev"
+    assert info == 0, "Argument error in cggev"
 
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype)
+    lwork = <l_int>qwork.real
 
-    # The actual calculation
+    work = np.empty(lwork, dtype = np.complex64)
 
-    if scalar is float:
-        lapack.sggev(jobvl, jobvr, &N, <float *>A.data, &N,
-                     <float *>B.data, &N,
-                     <float *>alphar.data, <float *> alphai.data,
-                     <float *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <float *>work.data, &lwork, &info)
-    elif scalar is double:
-        lapack.dggev(jobvl, jobvr, &N, <double *>A.data, &N,
-                     <double *>B.data, &N,
-                     <double *>alphar.data, <double *> alphai.data,
-                     <double *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <double *>work.data, &lwork, &info)
-    elif scalar is float_complex:
-        lapack.cggev(jobvl, jobvr, &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <float complex *>work.data, &lwork,
-                     <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.zggev(jobvl, jobvr, &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <double complex *>work.data, &lwork,
-                     <double *>rwork.data, &info)
+    # Now the real calculation
+    f_lapack.cggev_(jobvl, jobvr, &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <float complex *>work.data, &lwork,
+                    <float *>rwork.data, &info)
 
     if info > 0:
-        raise LinAlgError("QZ iteration failed to converge in sggev")
+        raise LinAlgError("QZ iteration failed to converge in cggev")
 
-    assert info == 0, "Argument error in ggev"
+    assert info == 0, "Argument error in cggev"
 
-    if scalar is float:
-        post_dtype = np.complex64
-    elif scalar is double:
-        post_dtype = np.complex128
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
 
-    cdef np.ndarray alpha
-    alpha = alphar
-    if scalar in floating:
-        alpha, vl, vr = ggev_postprocess(post_dtype, alphar, alphai, vl, vr)
 
-    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
+def zggev(np.ndarray[np.complex128_t, ndim=2] A,
+          np.ndarray[np.complex128_t, ndim=2] B,
+          left=False, right=True):
+    cdef l_int N, info, lwork
+    cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.complex128_t, ndim=2] vl, vr
+    cdef double complex *vl_ptr
+    cdef double complex *vr_ptr
+    cdef double complex qwork
+    cdef np.ndarray[np.complex128_t, ndim=1] work, alpha, beta
+    cdef np.ndarray[np.float64_t, ndim=1] rwork
+
+    assert_fortran_mat(A, B)
 
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex128)
+    beta = np.empty(N, dtype = np.complex128)
 
-def gees(np.ndarray[scalar, ndim=2] A, calc_q=True, calc_ev=True):
-    cdef l_int N, lwork, sdim, info
+    if left:
+        vl = np.empty((N,N), dtype = np.complex128, order='F')
+        vl_ptr = <double complex *>vl.data
+        jobvl = "V"
+    else:
+        vl_ptr = NULL
+        jobvl = "N"
 
-    assert_fortran_mat(A)
+    if right:
+        vr = np.empty((N,N), dtype = np.complex128, order='F')
+        vr_ptr = <double complex *>vr.data
+        jobvr = "V"
+    else:
+        vr_ptr = NULL
+        jobvr = "N"
 
-    if A.ndim != 2:
-        raise ValueError("Expect matrix as input")
+    rwork = np.empty(8 * N, dtype = np.float64)
 
-    if A.shape[0] != A.shape[1]:
-        raise ValueError("Expect square matrix")
+    # workspace query
+    lwork = -1
+    work = np.empty(1, dtype = np.complex128)
 
-    # Allocate workspaces
+    f_lapack.zggev_(jobvl, jobvr, &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork,
+                    <double *>rwork.data, &info)
 
-    N = A.shape[0]
+    assert info == 0, "Argument error in zggev"
 
-    cdef np.ndarray[scalar] wr, wi
-    if scalar in cmplx:
-        wr = np.empty(N, dtype=A.dtype)
-        wi = None
-    else:
-        wr = np.empty(N, dtype=A.dtype)
-        wi = np.empty(N, dtype=A.dtype)
+    lwork = <l_int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.zggev_(jobvl, jobvr, &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <double complex *>work.data, &lwork,
+                    <double *>rwork.data, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in zggev")
 
-    cdef np.ndarray rwork
-    if scalar is float_complex:
-        rwork = np.empty(N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(N, dtype=np.float64)
+    assert info == 0, "Argument error in zggev"
 
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
+
+
+# Wrapper for xGEES
+def sgees(np.ndarray[np.float32_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
     cdef char *jobvs
-    cdef scalar *vs_ptr
-    cdef np.ndarray[scalar, ndim=2] vs
+    cdef float *vs_ptr
+    cdef float qwork
+    cdef np.ndarray[np.float32_t, ndim=2] vs
+    cdef np.ndarray[np.float32_t] wr, wi, work
+
+    assert_fortran_mat(A)
+
+    N = A.shape[0]
+    wr = np.empty(N, dtype = np.float32)
+    wi = np.empty(N, dtype = np.float32)
+
     if calc_q:
-        vs = np.empty((N,N), dtype=A.dtype, order='F')
-        vs_ptr = <scalar *>vs.data
+        vs = np.empty((N,N), dtype = np.float32, order='F')
+        vs_ptr = <float *>vs.data
         jobvs = "V"
     else:
-        vs = None
         vs_ptr = NULL
         jobvs = "N"
 
-    # Workspace query
-    # Xgees expects &qwork as a <scalar *> (even though it's an integer)
+    # workspace query
     lwork = -1
-    cdef scalar qwork
-
-    if scalar is float:
-        lapack.sgees(jobvs, "N", NULL, &N, <float *>A.data, &N,
-                     &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgees(jobvs, "N", NULL, &N, <double *>A.data, &N,
-                     &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgees(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
-                     &sdim, <float complex *>wr.data, vs_ptr, &N,
-                     &qwork, &lwork, <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgees(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
-                     &sdim, <double complex *>wr.data, vs_ptr, &N,
-                     &qwork, &lwork, <double *>rwork.data, NULL, &info)
+    f_lapack.sgees_(jobvs, "N", NULL, &N, <float *>A.data, &N,
+                    &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
 
     assert info == 0, "Argument error in sgees"
 
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype)
-
-    # The actual calculation
-
-    if scalar is float:
-        lapack.sgees(jobvs, "N", NULL, &N, <float *>A.data, &N,
-                     &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
-                     <float *>work.data, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgees(jobvs, "N", NULL, &N, <double *>A.data, &N,
-                     &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
-                     <double *>work.data, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgees(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
-                     &sdim, <float complex *>wr.data, vs_ptr, &N,
-                     <float complex *>work.data, &lwork,
-                     <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgees(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
-                     &sdim, <double complex *>wr.data, vs_ptr, &N,
-                     <double complex *>work.data, &lwork,
-                     <double *>rwork.data, NULL, &info)
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+
+    # Now the real calculation
+    f_lapack.sgees_(jobvs, "N", NULL, &N, <float *>A.data, &N,
+                    &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
+                    <float *>work.data, &lwork, NULL, &info)
 
     if info > 0:
-        raise LinAlgError("QR iteration failed to converge in gees")
+        raise LinAlgError("QR iteration failed to converge in sgees")
 
-    assert info == 0, "Argument error in gees"
+    assert info == 0, "Argument error in sgees"
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray w = maybe_complex[scalar](0, wr, wi)
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
 
     return filter_args((True, calc_q, calc_ev), (A, vs, w))
 
 
-def trsen(np.ndarray[l_logical] select,
-          np.ndarray[scalar, ndim=2] T,
-          np.ndarray[scalar, ndim=2] Q,
-          calc_ev=True):
-    cdef l_int N, M, lwork, liwork, qiwork, info
-
-    assert_fortran_mat(T, Q)
+def dgees(np.ndarray[np.float64_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvs
+    cdef double *vs_ptr
+    cdef double qwork
+    cdef np.ndarray[np.float64_t, ndim=2] vs
+    cdef np.ndarray[np.float64_t] wr, wi, work
 
-    # Allocate workspaces
+    assert_fortran_mat(A)
 
-    N = T.shape[0]
+    N = A.shape[0]
+    wr = np.empty(N, dtype = np.float64)
+    wi = np.empty(N, dtype = np.float64)
 
-    cdef np.ndarray[scalar] wr, wi
-    if scalar in cmplx:
-        wr = np.empty(N, dtype=T.dtype)
-        wi = None
+    if calc_q:
+        vs = np.empty((N,N), dtype = np.float64, order='F')
+        vs_ptr = <double *>vs.data
+        jobvs = "V"
     else:
-        wr = np.empty(N, dtype=T.dtype)
-        wi = np.empty(N, dtype=T.dtype)
+        vs_ptr = NULL
+        jobvs = "N"
 
-    cdef char *compq
-    cdef scalar *q_ptr
-    if Q is not None:
-        compq = "V"
-        q_ptr = <scalar *>Q.data
-    else:
-        compq = "N"
-        q_ptr = NULL
+    # workspace query
+    lwork = -1
+    f_lapack.dgees_(jobvs, "N", NULL, &N, <double *>A.data, &N,
+                    &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
 
-    # Workspace query
-    # Xtrsen expects &qwork as a <scalar *> (even though it's an integer)
-    cdef scalar qwork
-    lwork = liwork = -1
+    assert info == 0, "Argument error in dgees"
 
-    if scalar is float:
-        lapack.strsen("N", compq, <l_logical *>select.data,
-                      &N, <float *>T.data, &N, q_ptr, &N,
-                      <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is double:
-        lapack.dtrsen("N", compq, <l_logical *>select.data,
-                      &N, <double *>T.data, &N, q_ptr, &N,
-                      <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctrsen("N", compq, <l_logical *>select.data,
-                      &N, <float complex *>T.data, &N, q_ptr, &N,
-                      <float complex *>wr.data, &M, NULL, NULL,
-                      &qwork, &lwork, &info)
-    elif scalar is double_complex:
-        lapack.ztrsen("N", compq, <l_logical *>select.data,
-                      &N, <double complex *>T.data, &N, q_ptr, &N,
-                      <double complex *>wr.data, &M, NULL, NULL,
-                      &qwork, &lwork, &info)
-
-    assert info == 0, "Argument error in trsen"
-
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=T.dtype)
-
-    cdef np.ndarray[l_int] iwork = None
-    if scalar in floating:
-        liwork = qiwork
-        iwork = np.empty(liwork, dtype=int_dtype)
-
-    # Tha actual calculation
-
-    if scalar is float:
-        lapack.strsen("N", compq, <l_logical *>select.data,
-                      &N, <float *>T.data, &N, q_ptr, &N,
-                      <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
-                      <float *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is double:
-        lapack.dtrsen("N", compq, <l_logical *>select.data,
-                      &N, <double *>T.data, &N, q_ptr, &N,
-                      <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
-                      <double *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctrsen("N", compq, <l_logical *>select.data,
-                      &N, <float complex *>T.data, &N, q_ptr, &N,
-                      <float complex *>wr.data, &M, NULL, NULL,
-                      <float complex *>work.data, &lwork, &info)
-    elif scalar is double_complex:
-        lapack.ztrsen("N", compq, <l_logical *>select.data,
-                      &N, <double complex *>T.data, &N, q_ptr, &N,
-                      <double complex *>wr.data, &M, NULL, NULL,
-                      <double complex *>work.data, &lwork, &info)
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+
+    # Now the real calculation
+    f_lapack.dgees_(jobvs, "N", NULL, &N, <double *>A.data, &N,
+                    &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
+                    <double *>work.data, &lwork, NULL, &info)
 
     if info > 0:
-        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+        raise LinAlgError("QR iteration failed to converge in dgees")
 
-    assert info == 0, "Argument error in trsen"
+    assert info == 0, "Argument error in dgees"
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray w = maybe_complex[scalar](0, wr, wi)
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
 
-    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+    return filter_args((True, calc_q, calc_ev), (A, vs, w))
 
 
-# Helper function for xTREVC and xTGEVC
-def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
-    cdef int N, M, i, m, indx
+def cgees(np.ndarray[np.complex64_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvs
+    cdef float complex *vs_ptr
+    cdef float complex qwork
+    cdef np.ndarray[np.complex64_t, ndim=2] vs
+    cdef np.ndarray[np.complex64_t] w, work
+    cdef np.ndarray[np.float32_t] rwork
+
+    assert_fortran_mat(A)
+
+    N = A.shape[0]
+    w = np.empty(N, dtype = np.complex64)
+    rwork = np.empty(N, dtype = np.float32)
+
+    if calc_q:
+        vs = np.empty((N,N), dtype = np.complex64, order='F')
+        vs_ptr = <float complex *>vs.data
+        jobvs = "V"
+    else:
+        vs_ptr = NULL
+        jobvs = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.cgees_(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
+                    &sdim, <float complex *>w.data, vs_ptr, &N,
+                    &qwork, &lwork, <float *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in cgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+
+    # Now the real calculation
+    f_lapack.cgees_(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
+                    &sdim, <float complex *>w.data, vs_ptr, &N,
+                    <float complex *>work.data, &lwork,
+                    <float *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QR iteration failed to converge in cgees")
+
+    assert info == 0, "Argument error in cgees"
+
+    return filter_args((True, calc_q, calc_ev), (A, vs, w))
+
+
+def zgees(np.ndarray[np.complex128_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvs
+    cdef double complex *vs_ptr
+    cdef double complex qwork
+    cdef np.ndarray[np.complex128_t, ndim=2] vs
+    cdef np.ndarray[np.complex128_t] w, work
+    cdef np.ndarray[np.float64_t] rwork
+
+    assert_fortran_mat(A)
+
+    N = A.shape[0]
+    w = np.empty(N, dtype = np.complex128)
+    rwork = np.empty(N, dtype = np.float64)
+
+    if calc_q:
+        vs = np.empty((N,N), dtype = np.complex128, order='F')
+        vs_ptr = <double complex *>vs.data
+        jobvs = "V"
+    else:
+        vs_ptr = NULL
+        jobvs = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.zgees_(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
+                    &sdim, <double complex *>w.data, vs_ptr, &N,
+                    &qwork, &lwork, <double *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.zgees_(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
+                    &sdim, <double complex *>w.data, vs_ptr, &N,
+                    <double complex *>work.data, &lwork,
+                    <double *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QR iteration failed to converge in zgees")
+
+    assert info == 0, "Argument error in zgees"
+
+    return filter_args((True, calc_q, calc_ev), (A, vs, w))
+
+# Wrapper for xTRSEN
+def strsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info
+    cdef char *compq
+    cdef float qwork
+    cdef float *q_ptr
+    cdef np.ndarray[np.float32_t] wr, wi, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    wr = np.empty(N, dtype = np.float32)
+    wi = np.empty(N, dtype = np.float32)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <float *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = liwork = -1
+    f_lapack.strsen_("N", compq, <l_logical *>select.data,
+                     &N, <float *>T.data, &N, q_ptr, &N,
+                     <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in strsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = f_lapack.l_int_dtype)
+
+    # Now the real calculation
+    f_lapack.strsen_("N", compq, <l_logical *>select.data,
+                     &N, <float *>T.data, &N, q_ptr, &N,
+                     <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
+                     <float *>work.data, &lwork,
+                     <int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in strsen"
+
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+def dtrsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info
+    cdef char *compq
+    cdef double qwork
+    cdef double *q_ptr
+    cdef np.ndarray[np.float64_t] wr, wi, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    wr = np.empty(N, dtype = np.float64)
+    wi = np.empty(N, dtype = np.float64)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <double *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = liwork = -1
+    f_lapack.dtrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double *>T.data, &N, q_ptr, &N,
+                     <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in dtrsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = f_lapack.l_int_dtype)
+
+    # Now the real calculation
+    f_lapack.dtrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double *>T.data, &N, q_ptr, &N,
+                     <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
+                     <double *>work.data, &lwork,
+                     <int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in dtrsen"
+
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+def ctrsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, info
+    cdef char *compq
+    cdef float complex qwork
+    cdef float complex *q_ptr
+    cdef np.ndarray[np.complex64_t] w, work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    w = np.empty(N, dtype = np.complex64)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <float complex *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    f_lapack.ctrsen_("N", compq, <l_logical *>select.data,
+                     &N, <float complex *>T.data, &N, q_ptr, &N,
+                     <float complex *>w.data, &M, NULL, NULL,
+                     &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in ctrsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+
+    # Now the real calculation
+    f_lapack.ctrsen_("N", compq, <l_logical *>select.data,
+                     &N, <float complex *>T.data, &N, q_ptr, &N,
+                     <float complex *>w.data, &M, NULL, NULL,
+                     <float complex *>work.data, &lwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in ctrsen"
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+def ztrsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, MM, M, lwork, info
+    cdef char *compq
+    cdef double complex qwork
+    cdef double complex *q_ptr
+    cdef np.ndarray[np.complex128_t] w, work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    w = np.empty(N, dtype = np.complex128)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <double complex *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    f_lapack.ztrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double complex *>T.data, &N, q_ptr, &N,
+                     <double complex *>w.data, &M, NULL, NULL,
+                     &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in ztrsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.ztrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double complex *>T.data, &N, q_ptr, &N,
+                     <double complex *>w.data, &M, NULL, NULL,
+                     <double complex *>work.data, &lwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in ztrsen"
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+# Helper function for xTREVC and xTGEVC
+def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
+    cdef int N, M, i, m, indx
 
     N = T.shape[0]
     if select is None:
-        select = np.ones(N, dtype = logical_dtype)
+        select = np.ones(N, dtype = f_lapack.l_logical_dtype)
     selindx = select.nonzero()[0]
     M = selindx.size
 
@@ -653,37 +1096,989 @@ def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
     return v
 
 
-def trevc(np.ndarray[scalar, ndim=2] T,
-          np.ndarray[scalar, ndim=2] Q,
-          np.ndarray[l_logical] select,
-          left=False, right=True):
-    cdef l_int N, info, M, MM
-    cdef char *side
-    cdef char *howmny
+# Wrappers for xTREVC
+def strevc(np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r
+    cdef float *vl_r_ptr
+    cdef float *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float32_t] work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(4*N, dtype = np.float32)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        # Correct for possible additional storage if a single complex
+        # eigenvalue is selected.
+        # For that: Figure out the positions of the 2x2 blocks.
+        cmplxindx = np.diagonal(T, -1).nonzero()[0]
+        for i in cmplxindx:
+            if bool(select[i]) != bool(select[i+1]):
+                MM += 1
+
+        # Select is overwritten in strevc.
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
+                              order = 'F')
+        select_ptr = <l_logical *>select_cpy.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl_r = np.asfortranarray(Q.copy())
+        else:
+            vl_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vl_r_ptr = <float *>vl_r.data
+    else:
+        vl_r_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr_r = np.asfortranarray(Q.copy())
+        else:
+            vr_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vr_r_ptr = <float *>vr_r.data
+    else:
+        vr_r_ptr = NULL
+
+    f_lapack.strevc_(side, howmny, select_ptr,
+                     &N, <float *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <float *>work.data, &info)
+
+    assert info == 0, "Argument error in strevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in strevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+        if right:
+            vr_r = np.asfortranarray(np.dot(Q, vr_r))
+
+    # If there are complex eigenvalues, we need to postprocess the
+    # eigenvectors.
+    if np.diagonal(T, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex64, T, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex64, T, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+def dtrevc(np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r
+    cdef double *vl_r_ptr
+    cdef double *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float64_t] work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(4*N, dtype = np.float64)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        # Correct for possible additional storage if a single complex
+        # eigenvalue is selected.
+        # For that: Figure out the positions of the 2x2 blocks.
+        cmplxindx = np.diagonal(T, -1).nonzero()[0]
+        for i in cmplxindx:
+            if bool(select[i]) != bool(select[i+1]):
+                MM += 1
+
+        # Select is overwritten in dtrevc.
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
+                              order = 'F')
+        select_ptr = <l_logical *>select_cpy.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl_r = np.asfortranarray(Q.copy())
+        else:
+            vl_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vl_r_ptr = <double *>vl_r.data
+    else:
+        vl_r_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr_r = np.asfortranarray(Q.copy())
+        else:
+            vr_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vr_r_ptr = <double *>vr_r.data
+    else:
+        vr_r_ptr = NULL
+
+    f_lapack.dtrevc_(side, howmny, select_ptr,
+                     &N, <double *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <double *>work.data, &info)
+
+    assert info == 0, "Argument error in dtrevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in dtrevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+        if right:
+            vr_r = np.asfortranarray(np.dot(Q, vr_r))
+
+    # If there are complex eigenvalues, we need to postprocess the eigenvectors
+    if np.diagonal(T, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex128, T, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex128, T, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+def ctrevc(np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex64_t, ndim=2] vl, vr
+    cdef float complex *vl_ptr
+    cdef float complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex64_t] work
+    cdef np.ndarray[np.float32_t] rwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(2*N, dtype = np.complex64)
+    rwork = np.empty(N, dtype = np.float32)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        select_ptr = <l_logical *>select.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl = np.asfortranarray(Q.copy())
+        else:
+            vl = np.empty((N, MM), dtype = np.complex64, order='F')
+        vl_ptr = <float complex *>vl.data
+    else:
+        vl_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr = np.asfortranarray(Q.copy())
+        else:
+            vr = np.empty((N, MM), dtype = np.complex64, order='F')
+        vr_ptr = <float complex *>vr.data
+    else:
+        vr_ptr = NULL
+
+    f_lapack.ctrevc_(side, howmny, select_ptr,
+                     &N, <float complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <float complex *>work.data, <float *>rwork.data, &info)
+
+    assert info == 0, "Argument error in ctrevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ctrevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl = np.asfortranarray(np.dot(Q, vl))
+        if right:
+            vr = np.asfortranarray(np.dot(Q, vr))
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+def ztrevc(np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex128_t, ndim=2] vl, vr
+    cdef double complex *vl_ptr
+    cdef double complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex128_t] work
+    cdef np.ndarray[np.float64_t] rwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(2*N, dtype = np.complex128)
+    rwork = np.empty(N, dtype = np.float64)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        select_ptr = <l_logical *>select.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl = np.asfortranarray(Q.copy())
+        else:
+            vl = np.empty((N, MM), dtype = np.complex128, order='F')
+        vl_ptr = <double complex *>vl.data
+    else:
+        vl_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr = np.asfortranarray(Q.copy())
+        else:
+            vr = np.empty((N, MM), dtype = np.complex128, order='F')
+        vr_ptr = <double complex *>vr.data
+    else:
+        vr_ptr = NULL
+
+    f_lapack.ztrevc_(side, howmny, select_ptr,
+                     &N, <double complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <double complex *>work.data, <double *>rwork.data, &info)
+
+    assert info == 0, "Argument error in ztrevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ztrevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl = np.asfortranarray(np.dot(Q, vl))
+        if right:
+            vr = np.asfortranarray(np.dot(Q, vr))
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+# wrappers for xGGES
+def sgges(np.ndarray[np.float32_t, ndim=2] A,
+          np.ndarray[np.float32_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef float *vsl_ptr
+    cdef float *vsr_ptr
+    cdef float qwork
+    cdef np.ndarray[np.float32_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.float32_t] alphar, alphai, beta, work
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float32)
+    alphai = np.empty(N, dtype = np.float32)
+    beta = np.empty(N, dtype = np.float32)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.float32, order='F')
+        vsl_ptr = <float *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.float32, order='F')
+        vsr_ptr = <float *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.sgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float *>A.data, &N,
+                    <float *>B.data, &N, &sdim,
+                    <float *>alphar.data, <float *>alphai.data,
+                    <float *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+
+    # Now the real calculation
+    f_lapack.sgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float *>A.data, &N,
+                    <float *>B.data, &N, &sdim,
+                    <float *>alphar.data, <float *>alphai.data,
+                    <float *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <float *>work.data, &lwork, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in sgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+def dgges(np.ndarray[np.float64_t, ndim=2] A,
+          np.ndarray[np.float64_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef double *vsl_ptr
+    cdef double *vsr_ptr
+    cdef double qwork
+    cdef np.ndarray[np.float64_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.float64_t] alphar, alphai, beta, work
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float64)
+    alphai = np.empty(N, dtype = np.float64)
+    beta = np.empty(N, dtype = np.float64)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.float64, order='F')
+        vsl_ptr = <double *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.float64, order='F')
+        vsr_ptr = <double *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.dgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double *>A.data, &N,
+                    <double *>B.data, &N, &sdim,
+                    <double *>alphar.data, <double *>alphai.data,
+                    <double *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+
+    # Now the real calculation
+    f_lapack.dgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double *>A.data, &N,
+                    <double *>B.data, &N, &sdim,
+                    <double *>alphar.data, <double *>alphai.data,
+                    <double *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <double *>work.data, &lwork, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in dgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+def cgges(np.ndarray[np.complex64_t, ndim=2] A,
+          np.ndarray[np.complex64_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef float complex *vsl_ptr
+    cdef float complex *vsr_ptr
+    cdef float complex qwork
+    cdef np.ndarray[np.complex64_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.complex64_t] alpha, beta, work
+    cdef np.ndarray[np.float32_t] rwork
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex64)
+    beta = np.empty(N, dtype = np.complex64)
+    rwork = np.empty(8*N, dtype = np.float32)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.complex64, order='F')
+        vsl_ptr = <float complex *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.complex64, order='F')
+        vsr_ptr = <float complex *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.cgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N, &sdim,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, <float *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+
+    # Now the real calculation
+    f_lapack.cgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N, &sdim,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <float complex *>work.data, &lwork,
+                    <float *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in cgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+def zgges(np.ndarray[np.complex128_t, ndim=2] A,
+          np.ndarray[np.complex128_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef double complex *vsl_ptr
+    cdef double complex *vsr_ptr
+    cdef double complex qwork
+    cdef np.ndarray[np.complex128_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.complex128_t] alpha, beta, work
+    cdef np.ndarray[np.float64_t] rwork
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex128)
+    beta = np.empty(N, dtype = np.complex128)
+    rwork = np.empty(8*N, dtype = np.float64)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.complex128, order='F')
+        vsl_ptr = <double complex *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.complex128, order='F')
+        vsr_ptr = <double complex *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.zgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N, &sdim,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, <double *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.zgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N, &sdim,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <double complex *>work.data, &lwork,
+                    <double *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in zgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+# wrappers for xTGSEN
+def stgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float32_t, ndim=2] S,
+           np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           np.ndarray[np.float32_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef float qwork
+    cdef float *q_ptr
+    cdef float *z_ptr
+    cdef np.ndarray[np.float32_t] alphar, alphai, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alphar = np.empty(N, dtype = np.float32)
+    alphai = np.empty(N, dtype = np.float32)
+    beta = np.empty(N, dtype = np.float32)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <float *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <float *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.stgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float *>S.data, &N,
+                     <float *>T.data, &N,
+                     <float *>alphar.data, <float *>alphai.data,
+                     <float *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in stgsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.stgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float *>S.data, &N,
+                     <float *>T.data, &N,
+                     <float *>alphar.data, <float *>alphai.data,
+                     <float *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <float *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in stgsen"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
+
+
+def dtgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float64_t, ndim=2] S,
+           np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           np.ndarray[np.float64_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef double qwork
+    cdef double *q_ptr
+    cdef double *z_ptr
+    cdef np.ndarray[np.float64_t] alphar, alphai, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alphar = np.empty(N, dtype = np.float64)
+    alphai = np.empty(N, dtype = np.float64)
+    beta = np.empty(N, dtype = np.float64)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <double *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <double *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.dtgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double *>S.data, &N,
+                     <double *>T.data, &N,
+                     <double *>alphar.data, <double *>alphai.data,
+                     <double *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in dtgsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.dtgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double *>S.data, &N,
+                     <double *>T.data, &N,
+                     <double *>alphar.data, <double *>alphai.data,
+                     <double *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <double *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in dtgsen"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
+
+
+def ctgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex64_t, ndim=2] S,
+           np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           np.ndarray[np.complex64_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef float complex qwork
+    cdef float complex *q_ptr
+    cdef float complex *z_ptr
+    cdef np.ndarray[np.complex64_t] alpha, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alpha = np.empty(N, dtype = np.complex64)
+    beta = np.empty(N, dtype = np.complex64)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <float complex *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <float complex *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.ctgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float complex *>S.data, &N,
+                     <float complex *>T.data, &N,
+                     <float complex *>alpha.data, <float complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in ctgsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.ctgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float complex *>S.data, &N,
+                     <float complex *>T.data, &N,
+                     <float complex *>alpha.data, <float complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <float complex *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in ctgsen"
+
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
+
+
+def ztgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex128_t, ndim=2] S,
+           np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           np.ndarray[np.complex128_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef double complex qwork
+    cdef double complex *q_ptr
+    cdef double complex *z_ptr
+    cdef np.ndarray[np.complex128_t] alpha, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alpha = np.empty(N, dtype = np.complex128)
+    beta = np.empty(N, dtype = np.complex128)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <double complex *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <double complex *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.ztgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double complex *>S.data, &N,
+                     <double complex *>T.data, &N,
+                     <double complex *>alpha.data, <double complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in ztgsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.ztgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double complex *>S.data, &N,
+                     <double complex *>T.data, &N,
+                     <double complex *>alpha.data, <double complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <double complex *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
 
-    # Parameter checks
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
 
-    if (T.shape[0] != T.shape[1] or Q.shape[0] != Q.shape[1]
-        or T.shape[0] != Q.shape[0]):
-        raise ValueError("Invalid Schur decomposition as input")
+    assert info == 0, "Argument error in ztgsen"
 
-    assert_fortran_mat(T, Q)
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
 
-    # Workspace allocation
 
-    N = T.shape[0]
+# xTGEVC
+def stgevc(np.ndarray[np.float32_t, ndim=2] S,
+           np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           np.ndarray[np.float32_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r
+    cdef float *vl_r_ptr
+    cdef float *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float32_t] work
 
-    cdef np.ndarray[scalar] work
-    if scalar in floating:
-        work = np.empty(4 * N, dtype=T.dtype)
-    else:
-        work = np.empty(2 * N, dtype=T.dtype)
+    assert_fortran_mat(S, T, Q, Z)
 
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(N, dtype=np.float64)
+    N = S.shape[0]
+    work = np.empty(6*N, dtype = np.float32)
 
     if left and right:
         side = "B"
@@ -694,102 +2089,78 @@ def trevc(np.ndarray[scalar, ndim=2] T,
     else:
         return
 
-    cdef np.ndarray[l_logical] select_cpy
-    cdef l_logical *select_ptr
+    backtr = False
+
     if select is not None:
         howmny = "S"
         MM = select.nonzero()[0].size
         # Correct for possible additional storage if a single complex
         # eigenvalue is selected.
         # For that: Figure out the positions of the 2x2 blocks.
-        cmplxindx = np.diagonal(T, -1).nonzero()[0]
+        cmplxindx = np.diagonal(S, -1).nonzero()[0]
         for i in cmplxindx:
             if bool(select[i]) != bool(select[i+1]):
                 MM += 1
 
-        # Select is overwritten in strevc.
-        select_cpy = np.array(select, dtype = logical_dtype,
+        # select is overwritten in stgevc
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
                               order = 'F')
         select_ptr = <l_logical *>select_cpy.data
     else:
         MM = N
         select_ptr = NULL
-        if Q is not None:
+        if ((left and right and Q is not None and Z is not None) or
+            (left and not right and Q is not None) or
+            (right and not left and Z is not None)):
             howmny = "B"
+            backtr = True
         else:
             howmny = "A"
 
-    cdef np.ndarray[scalar, ndim=2] vl_r = None
-    cdef scalar *vl_r_ptr
     if left:
-        if Q is not None and select is None:
-            vl_r = np.asfortranarray(Q.copy())
+        if backtr:
+            vl_r = Q
         else:
-            vl_r = np.empty((N, MM), dtype=T.dtype, order='F')
-        vl_r_ptr = <scalar *>vl_r.data
+            vl_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vl_r_ptr = <float *>vl_r.data
     else:
         vl_r_ptr = NULL
 
-    cdef np.ndarray[scalar, ndim=2]  vr_r = None
-    cdef scalar *vr_r_ptr
     if right:
-        if Q is not None and select is None:
-            vr_r = np.asfortranarray(Q.copy())
+        if backtr:
+            vr_r = Z
         else:
-            vr_r = np.empty((N, MM), dtype=T.dtype, order='F')
-        vr_r_ptr = <scalar *>vr_r.data
+            vr_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vr_r_ptr = <float *>vr_r.data
     else:
         vr_r_ptr = NULL
 
-    # The actual calculation
-
-    if scalar is float:
-        lapack.strevc(side, howmny, select_ptr,
-                      &N, <float *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float *>work.data, &info)
-    elif scalar is double:
-        lapack.dtrevc(side, howmny, select_ptr,
-                      &N, <double *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double *>work.data, &info)
-    elif scalar is float_complex:
-        lapack.ctrevc(side, howmny, select_ptr,
-                      &N, <float complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float complex *>work.data, <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.ztrevc(side, howmny, select_ptr,
-                      &N, <double complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double complex *>work.data, <double *>rwork.data, &info)
-
-    assert info == 0, "Argument error in trevc"
-    assert MM == M, "Unexpected number of eigenvectors returned in strevc"
+    f_lapack.stgevc_(side, howmny, select_ptr,
+                     &N, <float *>S.data, &N,
+                     <float *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <float *>work.data, &info)
 
-    if select is not None and Q is not None:
+    assert info == 0, "Argument error in stgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in stgevc"
+
+    if not backtr:
         if left:
             vl_r = np.asfortranarray(np.dot(Q, vl_r))
         if right:
-            vr_r = np.asfortranarray(np.dot(Q, vr_r))
+            vr_r = np.asfortranarray(np.dot(Z, vr_r))
 
-    cdef np.ndarray vl, vr
-    if left:
-        vl = vl_r
-    if right:
-        vr = vr_r
-    if scalar in floating:
-        # If there are complex eigenvalues, we need to postprocess the
-        # eigenvectors.
-        if scalar is float:
-            dtype = np.complex64
-        else:
-            dtype = np.complex128
-        if np.diagonal(T, -1).nonzero()[0].size:
-            if left:
-                vl = txevc_postprocess(dtype, T, vl_r, select)
-            if right:
-                vr = txevc_postprocess(dtype, T, vr_r, select)
+    # If there are complex eigenvalues, we need to postprocess the eigenvectors
+    if np.diagonal(S, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex64, S, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex64, S, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
 
     if left and right:
         return (vl, vr)
@@ -799,340 +2170,229 @@ def trevc(np.ndarray[scalar, ndim=2] T,
         return vr
 
 
-def gges(np.ndarray[scalar, ndim=2] A,
-          np.ndarray[scalar, ndim=2] B,
-          calc_q=True, calc_z=True, calc_ev=True):
-    cdef l_int N, sdim, info
-
-    # Check parameters
-
-    assert_fortran_mat(A, B)
-
-    if A.shape[0] != B.shape[1]:
-        raise ValueError("Expect square matrix A")
-
-    if A.shape[0] != B.shape[0] or A.shape[0] != B.shape[1]:
-        raise ValueError("Shape of B is incompatible with matrix A")
+def dtgevc(np.ndarray[np.float64_t, ndim=2] S,
+           np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           np.ndarray[np.float64_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r
+    cdef double *vl_r_ptr
+    cdef double *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float64_t] work
 
-    # Allocate workspaces
+    assert_fortran_mat(S, T, Q, Z)
 
-    N = A.shape[0]
+    N = S.shape[0]
+    work = np.empty(6*N, dtype = np.float64)
 
-    cdef np.ndarray[scalar] alphar, alphai
-    if scalar in cmplx:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = None
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
     else:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = np.empty(N, dtype=A.dtype)
+        return
 
-    cdef np.ndarray[scalar] beta = np.empty(N, dtype=A.dtype)
+    backtr = False
 
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(8 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(8 * N, dtype=np.float64)
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        # Correct for possible additional storage if a single complex
+        # eigenvalue is selected.
+        # For that: Figure out the positions of the 2x2 blocks.
+        cmplxindx = np.diagonal(S, -1).nonzero()[0]
+        for i in cmplxindx:
+            if bool(select[i]) != bool(select[i+1]):
+                MM += 1
 
-    cdef char *jobvsl
-    cdef scalar *vsl_ptr
-    cdef np.ndarray[scalar, ndim=2] vsl
-    if calc_q:
-        vsl = np.empty((N,N), dtype=A.dtype, order='F')
-        vsl_ptr = <scalar *>vsl.data
-        jobvsl = "V"
+        # select is overwritten in dtgevc
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
+                              order = 'F')
+        select_ptr = <l_logical *>select_cpy.data
     else:
-        vsl = None
-        vsl_ptr = NULL
-        jobvsl = "N"
+        MM = N
+        select_ptr = NULL
+        if ((left and right and Q is not None and Z is not None) or
+            (left and not right and Q is not None) or
+            (right and not left and Z is not None)):
+            howmny = "B"
+            backtr = True
+        else:
+            howmny = "A"
 
-    cdef char *jobvsr
-    cdef scalar *vsr_ptr
-    cdef np.ndarray[scalar, ndim=2] vsr
-    if calc_z:
-        vsr = np.empty((N,N), dtype=A.dtype, order='F')
-        vsr_ptr = <scalar *>vsr.data
-        jobvsr = "V"
+    if left:
+        if backtr:
+            vl_r = Q
+        else:
+            vl_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vl_r_ptr = <double *>vl_r.data
     else:
-        vsr = None
-        vsr_ptr = NULL
-        jobvsr = "N"
-
-    # Workspace query
-    # Xgges expects &qwork as a <scalar *> (even though it's an integer)
-    cdef l_int lwork = -1
-    cdef scalar qwork
-
-    if scalar is float:
-        lapack.sgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float *>A.data, &N,
-                     <float *>B.data, &N, &sdim,
-                     <float *>alphar.data, <float *>alphai.data,
-                     <float *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double *>A.data, &N,
-                     <double *>B.data, &N, &sdim,
-                     <double *>alphar.data, <double *>alphai.data,
-                     <double *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N, &sdim,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N, &sdim,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, <double *>rwork.data, NULL, &info)
-
-    assert info == 0, "Argument error in gges"
-
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype)
-
-    # The actual calculation
-
-    if scalar is float:
-        lapack.sgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float *>A.data, &N,
-                     <float *>B.data, &N, &sdim,
-                     <float *>alphar.data, <float *>alphai.data,
-                     <float *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <float *>work.data, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double *>A.data, &N,
-                     <double *>B.data, &N, &sdim,
-                     <double *>alphar.data, <double *>alphai.data,
-                     <double *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <double *>work.data, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N, &sdim,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <float complex *>work.data, &lwork,
-                     <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N, &sdim,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <double complex *>work.data, &lwork,
-                     <double *>rwork.data, NULL, &info)
+        vl_r_ptr = NULL
 
-    if info > 0:
-        raise LinAlgError("QZ iteration failed to converge in gges")
+    if right:
+        if backtr:
+            vr_r = Z
+        else:
+            vr_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vr_r_ptr = <double *>vr_r.data
+    else:
+        vr_r_ptr = NULL
 
-    assert info == 0, "Argument error in gges"
+    f_lapack.dtgevc_(side, howmny, select_ptr,
+                     &N, <double *>S.data, &N,
+                     <double *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <double *>work.data, &info)
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray alpha = maybe_complex[scalar](0, alphar, alphai)
+    assert info == 0, "Argument error in dtgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in dtgevc"
 
-    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
-                       (A, B, vsl, vsr, alpha, beta))
+    if not backtr:
+        if left:
+            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+        if right:
+            vr_r = np.asfortranarray(np.dot(Z, vr_r))
 
+    # If there are complex eigenvalues, we need to postprocess the
+    # eigenvectors.
+    if np.diagonal(S, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex128, S, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex128, S, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
 
-def tgsen(np.ndarray[l_logical] select,
-           np.ndarray[scalar, ndim=2] S,
-           np.ndarray[scalar, ndim=2] T,
-           np.ndarray[scalar, ndim=2] Q,
-           np.ndarray[scalar, ndim=2] Z,
-           calc_ev=True):
-    cdef l_int ijob = 0
-    cdef l_int N, M, lwork, liwork, info
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
 
-    # Check parameters
 
-    if ((S.shape[0] != S.shape[1] or T.shape[0] != T.shape[1] or
-         S.shape[0] != T.shape[0]) or
-        (Q is not None and (Q.shape[0] != Q.shape[1] or
-                            S.shape[0] != Q.shape[0])) or
-        (Z is not None and (Z.shape[0] != Z.shape[1] or
-                            S.shape[0] != Z.shape[0]))):
-        raise ValueError("Invalid Schur decomposition as input")
+def ctgevc(np.ndarray[np.complex64_t, ndim=2] S,
+           np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           np.ndarray[np.complex64_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex64_t, ndim=2] vl, vr
+    cdef float complex *vl_ptr
+    cdef float complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex64_t] work
+    cdef np.ndarray[np.float32_t] rwork
 
     assert_fortran_mat(S, T, Q, Z)
 
-    # Allocate workspaces
-
     N = S.shape[0]
+    work = np.empty(2*N, dtype = np.complex64)
+    rwork = np.empty(2*N, dtype = np.float32)
 
-    cdef np.ndarray[scalar] alphar, alphai
-    if scalar in cmplx:
-        alphar = np.empty(N, dtype=S.dtype)
-        alphai = None
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
     else:
-        alphar = np.empty(N, dtype=S.dtype)
-        alphai = np.empty(N, dtype=S.dtype)
+        return
 
-    cdef np.ndarray[scalar] beta
-    beta = np.empty(N, dtype=S.dtype)
+    backtr = False
 
-    cdef l_logical wantq
-    cdef scalar *q_ptr
-    if Q is not None:
-        wantq = 1
-        q_ptr = <scalar *>Q.data
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        select_ptr = <l_logical *>select.data
     else:
-        wantq = 0
-        q_ptr = NULL
+        MM = N
+        select_ptr = NULL
+        if ((left and right and Q is not None and Z is not None) or
+            (left and not right and Q is not None) or
+            (right and not left and Z is not None)):
+            howmny = "B"
+            backtr = True
+        else:
+            howmny = "A"
 
-    cdef l_logical wantz
-    cdef scalar *z_ptr
-    if Z is not None:
-        wantz = 1
-        z_ptr = <scalar *>Z.data
+    if left:
+        if backtr:
+            vl = Q
+        else:
+            vl = np.empty((N, MM), dtype = np.complex64, order='F')
+        vl_ptr = <float complex *>vl.data
     else:
-        wantz = 0
-        z_ptr = NULL
-
-    # Workspace query
-    # Xtgsen expects &qwork as a <scalar *> (even though it's an integer)
-    lwork = -1
-    liwork = -1
-    cdef scalar qwork
-    cdef l_int qiwork
-
-    if scalar is float:
-        lapack.stgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float *>S.data, &N,
-                      <float *>T.data, &N,
-                      <float *>alphar.data, <float *>alphai.data,
-                      <float *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is double:
-        lapack.dtgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double *>S.data, &N,
-                      <double *>T.data, &N,
-                      <double *>alphar.data, <double *>alphai.data,
-                      <double *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float complex *>S.data, &N,
-                      <float complex *>T.data, &N,
-                      <float complex *>alphar.data, <float complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is double_complex:
-        lapack.ztgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double complex *>S.data, &N,
-                      <double complex *>T.data, &N,
-                      <double complex *>alphar.data, <double complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-
-    assert info == 0, "Argument error in tgsen"
-
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=S.dtype)
+        vl_ptr = NULL
 
-    liwork = qiwork
-    cdef np.ndarray[l_int] iwork = np.empty(liwork, dtype=int_dtype)
-
-    # The actual calculation
-
-    if scalar is float:
-        lapack.stgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float *>S.data, &N,
-                      <float *>T.data, &N,
-                      <float *>alphar.data, <float *>alphai.data,
-                      <float *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <float *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is double:
-        lapack.dtgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double *>S.data, &N,
-                      <double *>T.data, &N,
-                      <double *>alphar.data, <double *>alphai.data,
-                      <double *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <double *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float complex *>S.data, &N,
-                      <float complex *>T.data, &N,
-                      <float complex *>alphar.data, <float complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <float complex *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is double_complex:
-        lapack.ztgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double complex *>S.data, &N,
-                      <double complex *>T.data, &N,
-                      <double complex *>alphar.data, <double complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <double complex *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
+    if right:
+        if backtr:
+            vr = Z
+        else:
+            vr = np.empty((N, MM), dtype = np.complex64, order='F')
+        vr_ptr = <float complex *>vr.data
+    else:
+        vr_ptr = NULL
 
-    if info > 0:
-        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+    f_lapack.ctgevc_(side, howmny, select_ptr,
+                     &N, <float complex *>S.data, &N,
+                     <float complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <float complex *>work.data, <float *>rwork.data, &info)
 
-    assert info == 0, "Argument error in tgsen"
+    assert info == 0, "Argument error in ctgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ctgevc"
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray alpha = maybe_complex[scalar](0, alphar, alphai)
+    if not backtr:
+        if left:
+            vl = np.asfortranarray(np.dot(Q, vl))
+        if right:
+            vr = np.asfortranarray(np.dot(Z, vr))
 
-    return filter_args((True, True, Q is not None, Z is not None,
-                        calc_ev, calc_ev),
-                       (S, T, Q, Z, alpha, beta))
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
 
 
-def tgevc(np.ndarray[scalar, ndim=2] S,
-          np.ndarray[scalar, ndim=2] T,
-          np.ndarray[scalar, ndim=2] Q,
-          np.ndarray[scalar, ndim=2] Z,
-          np.ndarray[l_logical] select,
-          left=False, right=True):
+def ztgevc(np.ndarray[np.complex128_t, ndim=2] S,
+           np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           np.ndarray[np.complex128_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
     cdef l_int N, info, M, MM
-
-    # Check parameters
-
-    if ((S.shape[0] != S.shape[1] or T.shape[0] != T.shape[1] or
-         S.shape[0] != T.shape[0]) or
-        (Q is not None and (Q.shape[0] != Q.shape[1] or
-                            S.shape[0] != Q.shape[0])) or
-        (Z is not None and (Z.shape[0] != Z.shape[1] or
-                            S.shape[0] != Z.shape[0]))):
-        raise ValueError("Invalid Schur decomposition as input")
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex128_t, ndim=2] vl, vr
+    cdef double complex *vl_ptr
+    cdef double complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex128_t] work
+    cdef np.ndarray[np.float64_t] rwork
 
     assert_fortran_mat(S, T, Q, Z)
 
-    # Allocate workspaces
-
     N = S.shape[0]
+    work = np.empty(2*N, dtype = np.complex128)
+    rwork = np.empty(2*N, dtype = np.float64)
 
-    cdef np.ndarray[scalar] work
-    if scalar in floating:
-        work = np.empty(6 * N, dtype=S.dtype)
-    else:
-        work = np.empty(2 * N, dtype=S.dtype)
-
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(2 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(2 * N, dtype=np.float64)
-
-    cdef char *side
     if left and right:
         side = "B"
     elif left:
@@ -1142,26 +2402,12 @@ def tgevc(np.ndarray[scalar, ndim=2] S,
     else:
         return
 
-    cdef l_logical backtr = False
+    backtr = False
 
-    cdef char *howmny
-    cdef np.ndarray[l_logical] select_cpy = None
-    cdef l_logical *select_ptr
     if select is not None:
         howmny = "S"
         MM = select.nonzero()[0].size
-        # Correct for possible additional storage if a single complex
-        # eigenvalue is selected.
-        # For that: Figure out the positions of the 2x2 blocks.
-        cmplxindx = np.diagonal(S, -1).nonzero()[0]
-        for i in cmplxindx:
-            if bool(select[i]) != bool(select[i+1]):
-                MM += 1
-
-        # select is overwritten in tgevc
-        select_cpy = np.array(select, dtype=logical_dtype,
-                              order = 'F')
-        select_ptr = <l_logical *>select_cpy.data
+        select_ptr = <l_logical *>select.data
     else:
         MM = N
         select_ptr = NULL
@@ -1173,78 +2419,38 @@ def tgevc(np.ndarray[scalar, ndim=2] S,
         else:
             howmny = "A"
 
-    cdef np.ndarray[scalar, ndim=2] vl_r
-    cdef scalar *vl_r_ptr
     if left:
         if backtr:
-            vl_r = Q
+            vl = Q
         else:
-            vl_r = np.empty((N, MM), dtype=S.dtype, order='F')
-        vl_r_ptr = <scalar *>vl_r.data
+            vl = np.empty((N, MM), dtype = np.complex128, order='F')
+        vl_ptr = <double complex *>vl.data
     else:
-        vl_r_ptr = NULL
+        vl_ptr = NULL
 
-    cdef np.ndarray[scalar, ndim=2] vr_r
-    cdef scalar *vr_r_ptr
     if right:
         if backtr:
-            vr_r = Z
+            vr = Z
         else:
-            vr_r = np.empty((N, MM), dtype=S.dtype, order='F')
-        vr_r_ptr = <scalar *>vr_r.data
+            vr = np.empty((N, MM), dtype = np.complex128, order='F')
+        vr_ptr = <double complex *>vr.data
     else:
-        vr_r_ptr = NULL
+        vr_ptr = NULL
 
-    if scalar is float:
-        lapack.stgevc(side, howmny, select_ptr,
-                      &N, <float *>S.data, &N,
-                      <float *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float *>work.data, &info)
-    elif scalar is double:
-        lapack.dtgevc(side, howmny, select_ptr,
-                      &N, <double *>S.data, &N,
-                      <double *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double *>work.data, &info)
-    elif scalar is float_complex:
-        lapack.ctgevc(side, howmny, select_ptr,
-                      &N, <float complex *>S.data, &N,
-                      <float complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float complex *>work.data, <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.ztgevc(side, howmny, select_ptr,
-                      &N, <double complex *>S.data, &N,
-                      <double complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double complex *>work.data, <double *>rwork.data, &info)
-
-    assert info == 0, "Argument error in tgevc"
-    assert MM == M, "Unexpected number of eigenvectors returned in tgevc"
+    f_lapack.ztgevc_(side, howmny, select_ptr,
+                     &N, <double complex *>S.data, &N,
+                     <double complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <double complex *>work.data, <double *>rwork.data, &info)
+
+    assert info == 0, "Argument error in ztgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ztgevc"
 
     if not backtr:
         if left:
-            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+            vl = np.asfortranarray(np.dot(Q, vl))
         if right:
-            vr_r = np.asfortranarray(np.dot(Z, vr_r))
-
-    # If there are complex eigenvalues, we need to postprocess the eigenvectors
-    cdef np.ndarray vl, vr
-    if left:
-        vl = vl_r
-    if right:
-        vr = vr_r
-    if scalar in floating:
-        if scalar is float:
-            dtype = np.complex64
-        else:
-            dtype = np.complex128
-        if np.diagonal(S, -1).nonzero()[0].size:
-            if left:
-                vl = txevc_postprocess(dtype, S, vl_r, select)
-            if right:
-                vr = txevc_postprocess(dtype, S, vr_r, select)
+            vr = np.asfortranarray(np.dot(Z, vr))
 
     if left and right:
         return (vl, vr)
@@ -1274,7 +2480,8 @@ def prepare_for_lapack(overwrite, *args):
     If an argument is ``None``, it is just passed through and not used to
     determine the proper LAPACK type.
 
-    Returns a list of properly converted arrays.
+    `prepare_for_lapack` returns a character indicating the proper LAPACK data
+    type ('s', 'd', 'c', 'z') and a list of properly converted arrays.
     """
 
     # Make sure we have NumPy arrays
@@ -1295,10 +2502,18 @@ def prepare_for_lapack(overwrite, *args):
     #       kind.
     dtype = np.common_type(*[arr for arr, ovwrt in mats if arr is not None])
 
-    if dtype not in (np.float32, np.float64, np.complex64, np.complex128):
+    if dtype == np.float32:
+        lapacktype = 's'
+    elif dtype == np.float64:
+        lapacktype = 'd'
+    elif dtype == np.complex64:
+        lapacktype = 'c'
+    elif dtype == np.complex128:
+        lapacktype = 'z'
+    else:
         raise AssertionError("Unexpected data type from common_type")
 
-    ret = []
+    ret = [ lapacktype ]
     for npmat, ovwrt in mats:
         # Now make sure that the array is contiguous, and copy if necessary.
         if npmat is not None:
@@ -1323,7 +2538,4 @@ def prepare_for_lapack(overwrite, *args):
 
         ret.append(npmat)
 
-    if len(ret) == 1:
-        return ret[0]
-    else:
-        return tuple(ret)
+    return tuple(ret)
diff --git a/setup.py b/setup.py
index 04d92ea7..197505db 100755
--- a/setup.py
+++ b/setup.py
@@ -372,9 +372,8 @@ def search_mumps():
         # Conda (via conda-forge).
         # TODO: remove dependency libs (scotch, metis...) when conda-forge
         # packaged mumps/scotch are built as properly linked shared libs
-        # 'openblas' provides Lapack and BLAS symbols
         ['zmumps', 'mumps_common', 'metis', 'esmumps', 'scotch',
-         'scotcherr', 'mpiseq', 'openblas'],
+         'scotcherr', 'mpiseq'],
     ]
     common_libs = ['pord', 'gfortran']
 
@@ -385,7 +384,34 @@ def search_mumps():
     return []
 
 
+def search_lapack():
+    """Return the BLAS variant that is installed."""
+    lib_sets = [
+        # Debian
+        ['blas', 'lapack'],
+        # Conda (via conda-forge). Openblas contains lapack symbols
+        ['openblas', 'gfortran'],
+    ]
+
+    for libs in lib_sets:
+        found_libs = search_libs(libs)
+        if found_libs:
+            return found_libs
+
+    print('Error: BLAS/LAPACK are required but were not found.',
+          file=sys.stderr)
+    sys.exit(1)
+
+
 def configure_special_extensions(exts, build_summary):
+    #### Special config for LAPACK.
+    lapack = exts['kwant.linalg.lapack']
+    if 'libraries' in lapack:
+        build_summary.append('User-configured LAPACK and BLAS')
+    else:
+        lapack['libraries'] = search_lapack()
+        build_summary.append('Default LAPACK and BLAS')
+
     #### Special config for MUMPS.
     mumps = exts['kwant.linalg._mumps']
     if 'libraries' in mumps:
@@ -400,6 +426,12 @@ def configure_special_extensions(exts, build_summary):
             del exts['kwant.linalg._mumps']
             build_summary.append('No MUMPS support')
 
+    if mumps:
+        # Copy config from LAPACK.
+        for key, value in lapack.items():
+            if key not in ['sources', 'depends']:
+                mumps.setdefault(key, []).extend(value)
+
     return exts
 
 
@@ -518,7 +550,8 @@ def main():
                        'kwant/graph/c_slicer/partitioner.h',
                        'kwant/graph/c_slicer/slicer.h'])),
         ('kwant.linalg.lapack',
-         dict(sources=['kwant/linalg/lapack.pyx'])),
+         dict(sources=['kwant/linalg/lapack.pyx'],
+              depends=['kwant/linalg/f_lapack.pxd'])),
         ('kwant.linalg._mumps',
          dict(sources=['kwant/linalg/_mumps.pyx'],
               depends=['kwant/linalg/cmumps.pxd']))])
@@ -534,7 +567,8 @@ def main():
         for ext in exts.values():
             ext.setdefault('include_dirs', []).append(numpy_include)
 
-    aliases = [('mumps', 'kwant.linalg._mumps')]
+    aliases = [('lapack', 'kwant.linalg.lapack'),
+               ('mumps', 'kwant.linalg._mumps')]
 
     init_cython()
 
-- 
GitLab