diff --git a/kwant/linalg/decomp_ev.py b/kwant/linalg/decomp_ev.py
index 8e7b95d68b0eae6701f3fe38a624653a5aa67f68..bd92ce9335d2ba31edc4535f0a8a12a5828e2943 100644
--- a/kwant/linalg/decomp_ev.py
+++ b/kwant/linalg/decomp_ev.py
@@ -50,5 +50,18 @@ def gen_eig(a, b, left=False, right=True, overwrite_ab=False):
         The right eigenvector corresponding to the eigenvalue
         ``alpha[i]/beta[i]`` is the column ``vr[:,i]``.
     """
-    a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
-    return lapack.ggev(a, b, left, right)
+
+    ltype, a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
+
+    if a.ndim != 2 or b.ndim != 2:
+        raise ValueError("gen_eig requires both a and be to be matrices")
+
+    if a.shape[0] != a.shape[1]:
+        raise ValueError("gen_eig requires square matrix input")
+
+    if b.shape[0] != a.shape[0] or b.shape[1] != a.shape[1]:
+        raise ValueError("gen_eig requires a and be to have the same shape")
+
+    ggev = getattr(lapack, ltype + "ggev")
+
+    return ggev(a, b, left, right)
diff --git a/kwant/linalg/decomp_lu.py b/kwant/linalg/decomp_lu.py
index 639b547bfdb082eb0e45df3180bb31ceeb67937f..79accc254feaa53206a9ea2f4a9d977a0d904138 100644
--- a/kwant/linalg/decomp_lu.py
+++ b/kwant/linalg/decomp_lu.py
@@ -45,8 +45,20 @@ def lu_factor(a, overwrite_a=False):
     singular : boolean
         Whether the matrix a is singular (up to machine precision)
     """
-    a = lapack.prepare_for_lapack(overwrite_a, a)
-    return lapack.getrf(a)
+
+    ltype, a = lapack.prepare_for_lapack(overwrite_a, a)
+
+    if a.ndim != 2:
+        raise ValueError("lu_factor expects a matrix")
+
+    if ltype == 'd':
+        return lapack.dgetrf(a)
+    elif ltype == 'z':
+        return lapack.zgetrf(a)
+    elif ltype == 's':
+        return lapack.sgetrf(a)
+    else:
+        return lapack.cgetrf(a)
 
 
 def lu_solve(matrix_factorization, b):
@@ -71,9 +83,23 @@ def lu_solve(matrix_factorization, b):
                              "a singular matrix. Result of solve step "
                              "are probably unreliable")
 
-    lu, b = lapack.prepare_for_lapack(False, lu, b)
+    ltype, lu, b = lapack.prepare_for_lapack(False, lu, b)
     ipiv = np.ascontiguousarray(np.asanyarray(ipiv), dtype=lapack.int_dtype)
-    return lapack.getrs(lu, ipiv, b)
+
+    if b.ndim > 2:
+        raise ValueError("lu_solve: b must be a vector or matrix")
+
+    if lu.shape[0] != b.shape[0]:
+        raise ValueError("lu_solve: incompatible dimensions of b")
+
+    if ltype == 'd':
+        return lapack.dgetrs(lu, ipiv, b)
+    elif ltype == 'z':
+        return lapack.zgetrs(lu, ipiv, b)
+    elif ltype == 's':
+        return lapack.sgetrs(lu, ipiv, b)
+    else:
+        return lapack.cgetrs(lu, ipiv, b)
 
 
 def rcond_from_lu(matrix_factorization, norm_a, norm="1"):
@@ -101,6 +127,17 @@ def rcond_from_lu(matrix_factorization, norm_a, norm="1"):
         norm specified in norm
     """
     (lu, ipiv, singular) = matrix_factorization
+    if not norm in ("1", "I"):
+        raise ValueError("norm in rcond_from_lu must be either '1' or 'I'")
     norm = norm.encode('utf8')  # lapack expects bytes
-    lu = lapack.prepare_for_lapack(False, lu)
-    return lapack.gecon(lu, norm_a, norm)
+
+    ltype, lu = lapack.prepare_for_lapack(False, lu)
+
+    if ltype == 'd':
+        return lapack.dgecon(lu, norm_a, norm)
+    elif ltype == 'z':
+        return lapack.zgecon(lu, norm_a, norm)
+    elif ltype == 's':
+        return lapack.sgecon(lu, norm_a, norm)
+    else:
+        return lapack.cgecon(lu, norm_a, norm)
diff --git a/kwant/linalg/decomp_schur.py b/kwant/linalg/decomp_schur.py
index d65432132efd3084e26a25643dbc36b82f48b39c..4accc17ff0f1643c9532ada85a29dfa08a2b67fa 100644
--- a/kwant/linalg/decomp_schur.py
+++ b/kwant/linalg/decomp_schur.py
@@ -62,8 +62,18 @@ def schur(a, calc_q=True, calc_ev=True, overwrite_a=False):
     LinAlgError
         If the underlying QR iteration fails to converge.
     """
-    a = lapack.prepare_for_lapack(overwrite_a, a)
-    return lapack.gees(a, calc_q, calc_ev)
+
+    ltype, a = lapack.prepare_for_lapack(overwrite_a, a)
+
+    if a.ndim != 2:
+        raise ValueError("Expect matrix as input")
+
+    if a.shape[0] != a.shape[1]:
+        raise ValueError("Expect square matrix")
+
+    gees = getattr(lapack, ltype + "gees")
+
+    return gees(a, calc_q, calc_ev)
 
 
 def convert_r2c_schur(t, q):
@@ -182,7 +192,9 @@ def order_schur(select, t, q, calc_ev=True, overwrite_tq=False):
         ``calc_ev == True``
     """
 
-    t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+    ltype, t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+
+    trsen = getattr(lapack, ltype + "trsen")
 
     # Figure out if select is a function or array.
     isfun = isarray = True
@@ -211,7 +223,7 @@ def order_schur(select, t, q, calc_ev=True, overwrite_tq=False):
             t, q = convert_r2c_schur(t, q)
             return order_schur(select, t, q, calc_ev, True)
 
-    return lapack.trsen(select, t, q, calc_ev)
+    return trsen(select, t, q, calc_ev)
 
 
 def evecs_from_schur(t, q, select=None, left=False, right=True,
@@ -255,7 +267,13 @@ def evecs_from_schur(t, q, select=None, left=False, right=True,
         ``right == True``.
     """
 
-    t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+    ltype, t, q = lapack.prepare_for_lapack(overwrite_tq, t, q)
+
+    if (t.shape[0] != t.shape[1] or q.shape[0] != q.shape[1]
+        or t.shape[0] != q.shape[0]):
+        raise ValueError("Invalid Schur decomposition as input")
+
+    trevc = getattr(lapack, ltype + "trevc")
 
     # check if select is a function or an array
     if select is not None:
@@ -282,7 +300,7 @@ def evecs_from_schur(t, q, select=None, left=False, right=True,
     else:
         selectarr = None
 
-    return lapack.trevc(t, q, selectarr, left, right)
+    return trevc(t, q, selectarr, left, right)
 
 
 def gen_schur(a, b, calc_q=True, calc_z=True, calc_ev=True,
@@ -346,8 +364,21 @@ def gen_schur(a, b, calc_q=True, calc_z=True, calc_ev=True,
     LinAlError
         If the underlying QZ iteration fails to converge.
     """
-    a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
-    return lapack.gges(a, b, calc_q, calc_z, calc_ev)
+
+    ltype, a, b = lapack.prepare_for_lapack(overwrite_ab, a, b)
+
+    if a.ndim != 2 or b.ndim != 2:
+        raise ValueError("Expect matrices as input")
+
+    if a.shape[0] != a.shape[1]:
+        raise ValueError("Expect square matrix a")
+
+    if a.shape[0] != b.shape[0] or a.shape[0] != b.shape[1]:
+        raise ValueError("Shape of b is incompatible to matrix a")
+
+    gges = getattr(lapack, ltype + "gges")
+
+    return gges(a, b, calc_q, calc_z, calc_ev)
 
 
 def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True,
@@ -408,8 +439,22 @@ def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True,
     LinAlError
         If the problem is too ill-conditioned.
     """
-    s, t, q, z = lapack.prepare_for_lapack(overwrite_stqz, s, t, q, z)
+    ltype, s, t, q, z = lapack.prepare_for_lapack(overwrite_stqz, s, t, q, z)
 
+    if (s.ndim != 2 or t.ndim != 2 or
+        (q is not None and q.ndim != 2) or
+        (z is not None and z.ndim != 2)):
+        raise ValueError("Expect matrices as input")
+
+    if ((s.shape[0] != s.shape[1] or t.shape[0] != t.shape[1] or
+         s.shape[0] != t.shape[0]) or
+        (q is not None and (q.shape[0] != q.shape[1] or
+                            s.shape[0] != q.shape[0])) or
+        (z is not None and (z.shape[0] != z.shape[1] or
+                            s.shape[0] != z.shape[0]))):
+        raise ValueError("Invalid Schur decomposition as input")
+
+    tgsen = getattr(lapack, ltype + "tgsen")
 
     # Figure out if select is a function or array.
     isfun = isarray = True
@@ -447,7 +492,7 @@ def order_gen_schur(select, s, t, q=None, z=None, calc_ev=True,
 
             return order_gen_schur(select, s, t, q, z, calc_ev, True)
 
-    return lapack.tgsen(select, s, t, q, z, calc_ev)
+    return tgsen(select, s, t, q, z, calc_ev)
 
 
 def convert_r2c_gen_schur(s, t, q=None, z=None):
@@ -491,7 +536,7 @@ def convert_r2c_gen_schur(s, t, q=None, z=None):
         If it fails to convert a 2x2 block into complex form (unlikely).
     """
 
-    s, t, q, z = lapack.prepare_for_lapack(True, s, t, q, z)
+    ltype, s, t, q, z = lapack.prepare_for_lapack(True, s, t, q, z)
     # Note: overwrite=True does not mean much here, the arrays are all copied
 
     if (s.ndim != 2 or t.ndim != 2 or
@@ -611,7 +656,20 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None,
 
     """
 
-    s, t, q, z = lapack.prepare_for_lapack(overwrite_qz, s, t, q, z)
+    ltype, s, t, q, z = lapack.prepare_for_lapack(overwrite_qz, s, t, q, z)
+
+    if (s.ndim != 2 or t.ndim != 2 or
+        (q is not None and q.ndim != 2) or
+        (z is not None and z.ndim != 2)):
+        raise ValueError("Expect matrices as input")
+
+    if ((s.shape[0] != s.shape[1] or t.shape[0] != t.shape[1] or
+         s.shape[0] != t.shape[0]) or
+        (q is not None and (q.shape[0] != q.shape[1] or
+                            s.shape[0] != q.shape[0])) or
+        (z is not None and (z.shape[0] != z.shape[1] or
+                            s.shape[0] != z.shape[0]))):
+        raise ValueError("Invalid Schur decomposition as input")
 
     if left and q is None:
         raise ValueError("Matrix q must be provided for left eigenvectors")
@@ -619,6 +677,8 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None,
     if right and z is None:
         raise ValueError("Matrix z must be provided for right eigenvectors")
 
+    tgevc = getattr(lapack, ltype + "tgevc")
+
     # Check if select is a function or an array.
     if select is not None:
         isfun = isarray = True
@@ -644,4 +704,4 @@ def evecs_from_gen_schur(s, t, q=None, z=None, select=None,
     else:
         selectarr = None
 
-    return lapack.tgevc(s, t, q, z, selectarr, left, right)
+    return tgevc(s, t, q, z, selectarr, left, right)
diff --git a/kwant/linalg/f_lapack.pxd b/kwant/linalg/f_lapack.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..1a0304af871c6bb75994eb0668389e0d6ad2b54d
--- /dev/null
+++ b/kwant/linalg/f_lapack.pxd
@@ -0,0 +1,217 @@
+# Copyright 2011-2013 Kwant authors.
+#
+# This file is part of Kwant.  It is subject to the license terms in the file
+# LICENSE.rst found in the top-level directory of this distribution and at
+# http://kwant-project.org/license.  A list of Kwant authors can be found in
+# the file AUTHORS.rst at the top-level directory of this distribution and at
+# http://kwant-project.org/authors.
+
+ctypedef int l_int
+ctypedef int l_logical
+
+cdef extern:
+    void sgetrf_(l_int *, l_int *, float *, l_int *, l_int *, l_int *)
+    void dgetrf_(l_int *, l_int *, double *, l_int *, l_int *, l_int *)
+    void cgetrf_(l_int *, l_int *, float complex *, l_int *, l_int *,
+                 l_int *)
+    void zgetrf_(l_int *, l_int *, double complex *, l_int *, l_int *,
+                 l_int *)
+
+    void sgetrs_(char *, l_int *, l_int *, float *, l_int *, l_int *,
+                 float *, l_int *, l_int *)
+    void dgetrs_(char *, l_int *, l_int *, double *, l_int *, l_int *,
+                 double *, l_int *, l_int *)
+    void cgetrs_(char *, l_int *, l_int *, float complex *, l_int *,
+                 l_int *, float complex *, l_int *, l_int *)
+    void zgetrs_(char *, l_int *, l_int *, double complex *, l_int *,
+                 l_int *, double complex *, l_int *, l_int *)
+
+    void sgecon_(char *, l_int *, float *, l_int *, float *, float *,
+                 float *, l_int *, l_int *)
+    void dgecon_(char *, l_int *, double *, l_int *, double *, double *,
+                 double *, l_int *, l_int *)
+    void cgecon_(char *, l_int *, float complex *, l_int *, float *,
+                 float *, float complex *, float *, l_int *)
+    void zgecon_(char *, l_int *, double complex *, l_int *, double *,
+                 double *, double complex *, double *, l_int *)
+
+    void sggev_(char *, char *, l_int *, float *, l_int *, float *, l_int *,
+                float *, float *, float *, float *, l_int *, float *, l_int *,
+                float *, l_int *, l_int *)
+    void dggev_(char *, char *, l_int *, double *, l_int *, double *, l_int *,
+                double *, double *, double *, double *, l_int *,
+                double *, l_int *, double *, l_int *, l_int *)
+    void cggev_(char *, char *, l_int *, float complex *, l_int *,
+                float complex *, l_int *, float complex *, float complex *,
+                float complex *, l_int *, float complex *, l_int *,
+                float complex *, l_int *, float *, l_int *)
+    void zggev_(char *, char *, l_int *, double complex *, l_int *,
+                double complex *, l_int *, double complex *,
+                double complex *, double complex *, l_int *,
+                double complex *, l_int *, double complex *, l_int *,
+                double *, l_int *)
+
+    void sgees_(char *, char *, l_logical (*)(float *, float *),
+                l_int *, float *, l_int *, l_int *,
+                float *, float *, float *, l_int *,
+                float *, l_int *, l_logical *, l_int *)
+    void dgees_(char *, char *, l_logical (*)(double *, double *),
+                l_int *, double *, l_int *, l_int *,
+                double *, double *, double *, l_int *,
+                double *, l_int *, l_logical *, l_int *)
+    void cgees_(char *, char *,
+                l_logical (*)(float complex *),
+                l_int *, float complex *,
+                l_int *, l_int *, float complex *,
+                float complex *, l_int *,
+                float complex *, l_int *, float *,
+                l_logical *, l_int *)
+    void zgees_(char *, char *,
+                l_logical (*)(double complex *),
+                l_int *, double complex *,
+                l_int *, l_int *, double complex *,
+                double complex *, l_int *,
+                double complex *, l_int *,
+                double *, l_logical *, l_int *)
+
+    void strsen_(char *, char *, l_logical *, l_int *,
+                 float *, l_int *, float *,
+                 l_int *, float *, float *, l_int *,
+                 float *, float *, float *, l_int *,
+                 l_int *, l_int *, l_int *)
+    void dtrsen_(char *, char *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *, double *,
+                 l_int *, double *, double *, double *,
+                 l_int *, l_int *, l_int *, l_int *)
+    void ctrsen_(char *, char *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *, l_int *,
+                 float *, float *, float complex *,
+                 l_int *, l_int *)
+    void ztrsen_(char *, char *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *, l_int *,
+                 double *, double *, double complex *,
+                 l_int *, l_int *)
+
+    void strevc_(char *, char *, l_logical *,
+                 l_int *, float *, l_int *,
+                 float *, l_int *, float *, l_int *,
+                 l_int *, l_int *, float *, l_int *)
+    void dtrevc_(char *, char *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *,
+                 l_int *, l_int *, l_int *, double *,
+                 l_int *)
+    void ctrevc_(char *, char *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, l_int *, l_int *,
+                 float complex *, float *, l_int *)
+    void ztrevc_(char *, char *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, l_int *, l_int *,
+                 double complex *, double *, l_int *)
+
+    void sgges_(char *, char *, char *,
+                l_logical (*)(float *, float *, float *),
+                l_int *, float *, l_int *, float *,
+                l_int *, l_int *, float *, float *,
+                float *, float *, l_int *, float *,
+                l_int *, float *, l_int *, l_logical *,
+                l_int *)
+    void dgges_(char *, char *, char *,
+                l_logical (*)(double *, double *, double *),
+                l_int *, double *, l_int *, double *,
+                l_int *, l_int *, double *, double *,
+                double *, double *, l_int *, double *,
+                l_int *, double *, l_int *,
+                l_logical *, l_int *)
+    void cgges_(char *, char *, char *,
+                l_logical (*)(float complex *, float complex *),
+                l_int *, float complex *,
+                l_int *, float complex *,
+                l_int *, l_int *, float complex *,
+                float complex *, float complex *,
+                l_int *, float complex *,
+                l_int *, float complex *,
+                l_int *, float *, l_logical *, l_int *)
+    void zgges_(char *, char *, char *,
+                l_logical (*)(double complex *, double complex *),
+                l_int *, double complex *,
+                l_int *, double complex *,
+                l_int *, l_int *, double complex *,
+                double complex *,
+                double complex *, l_int *,
+                double complex *, l_int *,
+                double complex *, l_int *,
+                double *, l_logical *, l_int *)
+
+    void stgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, float *, l_int *, float *,
+                 l_int *, float *, float *, float *,
+                 float *, l_int *, float *, l_int *,
+                 l_int *, float *, float *, float *, float *,
+                 l_int *, l_int *, l_int *, l_int *)
+    void dtgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *, double *,
+                 double *, double *, l_int *, double *,
+                 l_int *, l_int *, double *, double *,
+                 double *, double *, l_int *, l_int *,
+                 l_int *, l_int *)
+    void ctgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 float complex *,
+                 float complex *, l_int *,
+                 float complex *, l_int *, l_int *,
+                 float *, float *, float *,
+                 float complex *, l_int *, l_int *,
+                 l_int *, l_int *)
+    void ztgsen_(l_int *, l_logical *,
+                 l_logical *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 double complex *,
+                 double complex *, l_int *,
+                 double complex *, l_int *, l_int *,
+                 double *, double *, double *,
+                 double complex *, l_int *, l_int *,
+                 l_int *, l_int *)
+
+    void stgevc_(char *, char *, l_logical *,
+                 l_int *, float *, l_int *,
+                 float *, l_int *, float *,
+                 l_int *, float *, l_int *,
+                 l_int *, l_int *, float *, l_int *)
+    void dtgevc_(char *, char *, l_logical *,
+                 l_int *, double *, l_int *,
+                 double *, l_int *, double *,
+                 l_int *, double *, l_int *,
+                 l_int *, l_int *, double *, l_int *)
+    void ctgevc_(char *, char *, l_logical *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, float complex *,
+                 l_int *, l_int *, l_int *,
+                 float complex *, float *, l_int *)
+    void ztgevc_(char *, char *, l_logical *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, double complex *,
+                 l_int *, l_int *, l_int *,
+                 double complex *, double *, l_int *)
diff --git a/kwant/linalg/f_lapack.py b/kwant/linalg/f_lapack.py
new file mode 100644
index 0000000000000000000000000000000000000000..39b702b5394dafb773744dfa1d0faa05c17a419c
--- /dev/null
+++ b/kwant/linalg/f_lapack.py
@@ -0,0 +1,14 @@
+# Copyright 2011-2013 Kwant authors.
+#
+# This file is part of Kwant.  It is subject to the license terms in the file
+# LICENSE.rst found in the top-level directory of this distribution and at
+# http://kwant-project.org/license.  A list of Kwant authors can be found in
+# the file AUTHORS.rst at the top-level directory of this distribution and at
+# http://kwant-project.org/authors.
+
+__all__ = ['l_int_dtype', 'l_logical_dtype']
+
+import numpy as np
+
+l_int_dtype = np.int32
+l_logical_dtype = np.int32
diff --git a/kwant/linalg/lapack.pyx b/kwant/linalg/lapack.pyx
index e173bc0ce28e860f93ee58d36c725d020fd95c72..d6699a0e11ea912506d417d1e1ab59270c3270a3 100644
--- a/kwant/linalg/lapack.pyx
+++ b/kwant/linalg/lapack.pyx
@@ -1,4 +1,4 @@
-# Copyright 2011-2017 Kwant authors.
+# Copyright 2011-2013 Kwant authors.
 #
 # This file is part of Kwant.  It is subject to the license terms in the file
 # LICENSE.rst found in the top-level directory of this distribution and at
@@ -8,53 +8,29 @@
 
 """Low-level access to LAPACK functions. """
 
-__all__ = ['getrf',
-           'getrs',
-           'gecon',
-           'ggev',
-           'gees',
-           'trsen',
-           'trevc',
-           'gges',
-           'tgsen',
-           'tgevc',
+__all__ = ['sgetrf', 'dgetrf', 'cgetrf', 'zgetrf',
+           'sgetrs', 'dgetrs', 'cgetrs', 'zgetrs',
+           'sgecon', 'dgecon', 'cgecon', 'zgecon',
+           'sggev', 'dggev', 'cggev', 'zggev',
+           'sgees', 'dgees', 'cgees', 'zgees',
+           'strsen', 'dtrsen', 'ctrsen', 'ztrsen',
+           'strevc', 'dtrevc', 'ctrevc', 'ztrevc',
+           'sgges', 'dgges', 'cgges', 'zgges',
+           'stgsen', 'dtgsen', 'ctgsen', 'ztgsen',
+           'stgevc', 'dtgevc', 'ctgevc', 'ztgevc',
            'prepare_for_lapack']
 
 import numpy as np
 cimport numpy as np
 
-cimport scipy.linalg.cython_lapack as lapack
+# Strangely absolute imports from kwant.linalg.f_lapack don't work here.  So
+# continue using traditional implicit relative imports.
+from . import f_lapack
+from . cimport f_lapack
+from .f_lapack cimport l_int, l_logical
 
-ctypedef int l_int
-ctypedef bint l_logical
-
-int_dtype = np.int32
-logical_dtype = np.int32
-
-ctypedef float complex float_complex
-ctypedef double complex double_complex
-
-ctypedef fused scalar:
-    float
-    double
-    float complex
-    double complex
-
-ctypedef fused single_precision:
-    float
-    float complex
-
-ctypedef fused double_precision:
-    double
-    double complex
-
-ctypedef fused cmplx:
-    float complex
-    double complex
-
-ctypedef fused floating:
-    float
-    double
+int_dtype = f_lapack.l_int_dtype
+logical_dtype = f_lapack.l_logical_dtype
 
 # exceptions
 
@@ -75,161 +51,284 @@ def assert_fortran_mat(*mats):
             raise ValueError("Input matrix must be Fortran contiguous")
 
 
-cdef np.ndarray maybe_complex(scalar selector,
-                              np.ndarray real, np.ndarray imag):
-    cdef np.ndarray r
-    r = real
-    if scalar in floating:
-        if imag.nonzero()[0].size:
-            r = real + 1j * imag
-    return r
+# Wrappers for xGETRF
+def sgetrf(np.ndarray[np.float32_t, ndim=2] A):
+    cdef l_int M, N, info
+    cdef np.ndarray[l_int, ndim=1] ipiv
+
+    assert_fortran_mat(A)
+
+    M = A.shape[0]
+    N = A.shape[1]
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.sgetrf_(&M, &N, <float *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
 
+    assert info >= 0, "Argument error in sgetrf"
 
-cdef l_int lwork_from_qwork(scalar qwork):
-    if scalar in floating:
-        return <l_int>qwork
-    else:
-        return <l_int>qwork.real
+    return (A, ipiv, info > 0 or M != N)
+
+def dgetrf(np.ndarray[np.float64_t, ndim=2] A):
+    cdef l_int M, N, info
+    cdef np.ndarray[l_int, ndim=1] ipiv
+
+    assert_fortran_mat(A)
+
+    M = A.shape[0]
+    N = A.shape[1]
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.dgetrf_(&M, &N, <double *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
+
+    assert info >= 0, "Argument error in dgetrf"
+
+    return (A, ipiv, info > 0 or M != N)
+
+def cgetrf(np.ndarray[np.complex64_t, ndim=2] A):
+    cdef l_int M, N, info
+    cdef np.ndarray[l_int, ndim=1] ipiv
+
+    assert_fortran_mat(A)
+
+    M = A.shape[0]
+    N = A.shape[1]
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.cgetrf_(&M, &N, <float complex *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
+
+    assert info >= 0, "Argument error in cgetrf"
 
+    return (A, ipiv, info > 0 or M != N)
 
-def getrf(np.ndarray[scalar, ndim=2] A):
+def zgetrf(np.ndarray[np.complex128_t, ndim=2] A):
     cdef l_int M, N, info
-    cdef np.ndarray[l_int] ipiv
+    cdef np.ndarray[l_int, ndim=1] ipiv
 
     assert_fortran_mat(A)
 
     M = A.shape[0]
     N = A.shape[1]
-    ipiv = np.empty(min(M,N), dtype = int_dtype)
-
-    if scalar is float:
-        lapack.sgetrf(&M, &N, <float *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-    elif scalar is double:
-        lapack.dgetrf(&M, &N, <double *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-    elif scalar is float_complex:
-        lapack.cgetrf(&M, &N, <float complex *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-    elif scalar is double_complex:
-        lapack.zgetrf(&M, &N, <double complex *>A.data, &M,
-                      <l_int *>ipiv.data, &info)
-
-    assert info >= 0, "Argument error in getrf"
+    ipiv = np.empty(min(M,N), dtype = f_lapack.l_int_dtype)
+
+    f_lapack.zgetrf_(&M, &N, <double complex *>A.data, &M,
+                     <l_int *>ipiv.data, &info)
+
+    assert info >= 0, "Argument error in zgetrf"
 
     return (A, ipiv, info > 0 or M != N)
 
+# Wrappers for xGETRS
 
-def getrs(np.ndarray[scalar, ndim=2] LU, np.ndarray[l_int] IPIV,
-          np.ndarray B):
+def sgetrs(np.ndarray[np.float32_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
     cdef l_int N, NRHS, info
+    cdef np.ndarray b
 
     assert_fortran_mat(LU)
 
-    # Consistency checks for LU and B
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+         not B.flags["F_CONTIGUOUS"])):
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
+    N = LU.shape[0]
+    if b.ndim == 1:
+        NRHS = 1
+    elif b.ndim == 2:
+        NRHS = B.shape[1]
+    else:
+        raise ValueError("In sgetrs: B must be a vector or matrix")
+
+    f_lapack.sgetrs_("N", &N, &NRHS, <float *>LU.data, &N,
+                     <l_int *>IPIV.data, <float *>b.data, &N,
+                     &info)
+
+    assert info == 0, "Argument error in sgetrs"
+
+    return b
 
-    if B.descr.type_num != LU.descr.type_num:
-        raise TypeError('B must have same dtype as LU')
+def dgetrs(np.ndarray[np.float64_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
+    cdef l_int N, NRHS, info
+    cdef np.ndarray b
+
+    assert_fortran_mat(LU)
 
-    # Workaround for 1x1-Fortran bug in NumPy < v2.0
-    if ((B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
          not B.flags["F_CONTIGUOUS"])):
-        raise ValueError("B must be Fortran ordered")
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
+    N = LU.shape[0]
+    if b.ndim == 1:
+        NRHS = 1
+    elif b.ndim == 2:
+        NRHS = b.shape[1]
+    else:
+        raise ValueError("In dgetrs: B must be a vector or matrix")
+
+    f_lapack.dgetrs_("N", &N, &NRHS, <double *>LU.data, &N,
+                     <l_int *>IPIV.data, <double *>b.data, &N,
+                     &info)
 
-    if B.ndim > 2:
-        raise ValueError("B must be a vector or matrix")
+    assert info == 0, "Argument error in dgetrs"
 
-    if LU.shape[0] != B.shape[0]:
-        raise ValueError('LU and B have incompatible shapes')
+    return b
 
+def cgetrs(np.ndarray[np.complex64_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
+    cdef l_int N, NRHS, info
+    cdef np.ndarray b
+
+    assert_fortran_mat(LU)
+
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+         not B.flags["F_CONTIGUOUS"])):
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
     N = LU.shape[0]
+    if b.ndim == 1:
+        NRHS = 1
+    elif b.ndim == 2:
+        NRHS = b.shape[1]
+    else:
+        raise ValueError("In cgetrs: B must be a vector or matrix")
 
-    if B.ndim == 1:
+    f_lapack.cgetrs_("N", &N, &NRHS, <float complex *>LU.data, &N,
+                     <l_int *>IPIV.data, <float complex *>b.data, &N,
+                     &info)
+
+    assert info == 0, "Argument error in cgetrs"
+
+    return b
+
+def zgetrs(np.ndarray[np.complex128_t, ndim=2] LU,
+           np.ndarray[l_int, ndim=1] IPIV, B):
+    cdef l_int N, NRHS, info
+    cdef np.ndarray b
+
+    assert_fortran_mat(LU)
+
+    # again: workaround for 1x1-Fortran bug in NumPy < v2.0
+    if (not isinstance(B, np.ndarray) or
+        (B.ndim == 2 and (B.shape[0] > 1 or B.shape[1] > 1) and
+         not B.flags["F_CONTIGUOUS"])):
+        raise ValueError("In dgetrs: B must be a Fortran ordered NumPy array")
+
+    b = B
+    N = LU.shape[0]
+    if b.ndim == 1:
         NRHS = 1
-    elif B.ndim == 2:
-        NRHS = B.shape[1]
+    elif b.ndim == 2:
+        NRHS = b.shape[1]
+    else:
+        raise ValueError("In zgetrs: B must be a vector or matrix")
+
+    f_lapack.zgetrs_("N", &N, &NRHS, <double complex *>LU.data, &N,
+                     <l_int *>IPIV.data, <double complex *>b.data, &N,
+                     &info)
+
+    assert info == 0, "Argument error in zgetrs"
+
+    return b
+
+# Wrappers for xGECON
 
-    if scalar is float:
-        lapack.sgetrs("N", &N, &NRHS, <float *>LU.data, &N,
-                      <l_int *>IPIV.data, <float *>B.data, &N,
-                      &info)
-    elif scalar is double:
-        lapack.dgetrs("N", &N, &NRHS, <double *>LU.data, &N,
-                      <l_int *>IPIV.data, <double *>B.data, &N,
-                      &info)
-    elif scalar is float_complex:
-        lapack.cgetrs("N", &N, &NRHS, <float complex *>LU.data, &N,
-                      <l_int *>IPIV.data, <float complex *>B.data, &N,
-                      &info)
-    elif scalar is double_complex:
-        lapack.zgetrs("N", &N, &NRHS, <double complex *>LU.data, &N,
-                      <l_int *>IPIV.data, <double complex *>B.data, &N,
-                      &info)
-
-    assert info == 0, "Argument error in getrs"
-
-    return B
-
-
-def gecon(np.ndarray[scalar, ndim=2] LU, double normA, char *norm = b"1"):
+def sgecon(np.ndarray[np.float32_t, ndim=2] LU,
+            float normA, char *norm = b"1"):
     cdef l_int N, info
-    cdef float srcond, snormA
-    cdef double drcond
+    cdef float rcond
+    cdef np.ndarray[np.float32_t, ndim=1] work
+    cdef np.ndarray[l_int, ndim=1] iwork
 
-    # Parameter checks
+    assert_fortran_mat(LU)
+
+    N = LU.shape[0]
+    work = np.empty(4*N, dtype = np.float32)
+    iwork = np.empty(N, dtype = f_lapack.l_int_dtype)
+
+    f_lapack.sgecon_(norm, &N, <float *>LU.data, &N, &normA,
+                     &rcond, <float *>work.data,
+                     <l_int *>iwork.data, &info)
+
+    assert info == 0, "Argument error in sgecon"
+
+    return rcond
+
+def dgecon(np.ndarray[np.float64_t, ndim=2] LU,
+            double normA, char *norm = b"1"):
+    cdef l_int N, info
+    cdef double rcond
+    cdef np.ndarray[np.float64_t, ndim=1] work
+    cdef np.ndarray[l_int, ndim=1] iwork
 
     assert_fortran_mat(LU)
-    if norm[0] != b"1" and norm[0] != b"I":
-        raise ValueError("'norm' must be either '1' or 'I'")
-    if scalar in single_precision:
-        snormA = normA
 
-    # Allocate workspaces
+    N = LU.shape[0]
+    work = np.empty(4*N, dtype = np.float64)
+    iwork = np.empty(N, dtype = f_lapack.l_int_dtype)
+
+    f_lapack.dgecon_(norm, &N, <double *>LU.data, &N, &normA,
+                     &rcond, <double *>work.data,
+                     <l_int *>iwork.data, &info)
+
+    assert info == 0, "Argument error in dgecon"
+
+    return rcond
+
+def cgecon(np.ndarray[np.complex64_t, ndim=2] LU,
+            float normA, char *norm = b"1"):
+    cdef l_int N, info
+    cdef float rcond
+    cdef np.ndarray[np.complex64_t, ndim=1] work
+    cdef np.ndarray[np.float32_t, ndim=1] rwork
+
+    assert_fortran_mat(LU)
 
     N = LU.shape[0]
+    work = np.empty(2*N, dtype = np.complex64)
+    rwork = np.empty(2*N, dtype = np.float32)
 
-    cdef np.ndarray[l_int] iwork
-    if scalar in floating:
-        iwork = np.empty(N, dtype=int_dtype)
+    f_lapack.cgecon_(norm, &N, <float complex *>LU.data, &N, &normA,
+                     &rcond, <float complex *>work.data,
+                     <float *>rwork.data, &info)
 
-    cdef np.ndarray[scalar] work
-    if scalar in floating:
-        work = np.empty(4 * N, dtype=LU.dtype)
-    else:
-        work = np.empty(2 * N, dtype=LU.dtype)
+    assert info == 0, "Argument error in cgecon"
 
-    cdef np.ndarray rwork
-    if scalar is float_complex:
-        rwork = np.empty(2 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(2 * N, dtype=np.float64)
+    return rcond
 
-    # The actual calculation
+def zgecon(np.ndarray[np.complex128_t, ndim=2] LU,
+           double normA, char *norm = b"1"):
+    cdef l_int N, info
+    cdef double rcond
+    cdef np.ndarray[np.complex128_t, ndim=1] work
+    cdef np.ndarray[np.float64_t, ndim=1] rwork
 
-    if scalar is float:
-        lapack.sgecon(norm, &N, <float *>LU.data, &N, &snormA,
-                      &srcond, <float *>work.data,
-                      <l_int *>iwork.data, &info)
-    elif scalar is double:
-        lapack.dgecon(norm, &N, <double *>LU.data, &N, &normA,
-                      &drcond, <double *>work.data,
-                      <l_int *>iwork.data, &info)
-    elif scalar is float_complex:
-        lapack.cgecon(norm, &N, <float complex *>LU.data, &N, &snormA,
-                      &srcond, <float complex *>work.data,
-                      <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.zgecon(norm, &N, <double complex *>LU.data, &N, &normA,
-                      &drcond, <double complex *>work.data,
-                      <double *>rwork.data, &info)
+    assert_fortran_mat(LU)
 
-    assert info == 0, "Argument error in gecon"
+    N = LU.shape[0]
+    work = np.empty(2*N, dtype = np.complex128)
+    rwork = np.empty(2*N, dtype = np.float64)
 
-    if scalar in single_precision:
-        return srcond
-    else:
-        return drcond
+    f_lapack.zgecon_(norm, &N, <double complex *>LU.data, &N, &normA,
+                     &rcond, <double complex *>work.data,
+                     <double *>rwork.data, &info)
 
+    assert info == 0, "Argument error in zgecon"
+
+    return rcond
+
+# Wrappers for xGGEV
 
 # Helper function for xGGEV
 def ggev_postprocess(dtype, alphar, alphai, vl_r=None, vr_r=None):
@@ -264,363 +363,707 @@ def ggev_postprocess(dtype, alphar, alphai, vl_r=None, vr_r=None):
     return (alpha, vl, vr)
 
 
-def ggev(np.ndarray[scalar, ndim=2] A, np.ndarray[scalar, ndim=2] B,
-         left=False, right=True):
+def sggev(np.ndarray[np.float32_t, ndim=2] A,
+          np.ndarray[np.float32_t, ndim=2] B,
+          left=False, right=True):
     cdef l_int N, info, lwork
-
-    # Parameter checks
+    cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r
+    cdef float *vl_ptr
+    cdef float *vr_ptr
+    cdef float qwork
+    cdef np.ndarray[np.float32_t, ndim=1] work, alphar, alphai, beta
 
     assert_fortran_mat(A, B)
 
-    if A.ndim != 2 or A.ndim != 2:
-        raise ValueError("gen_eig requires both a and be to be matrices")
+    N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float32)
+    alphai = np.empty(N, dtype = np.float32)
+    beta = np.empty(N, dtype = np.float32)
+
+    if left:
+        vl_r = np.empty((N,N), dtype = np.float32, order='F')
+        vl_ptr = <float *>vl_r.data
+        jobvl = "V"
+    else:
+        vl_r = None
+        vl_ptr = NULL
+        jobvl = "N"
+
+    if right:
+        vr_r = np.empty((N,N), dtype = np.float32, order='F')
+        vr_ptr = <float *>vr_r.data
+        jobvr = "V"
+    else:
+        vr_r = None
+        vr_ptr = NULL
+        jobvr = "N"
+
+    # workspace query
+    lwork = -1
+
+    f_lapack.sggev_(jobvl, jobvr, &N, <float *>A.data, &N,
+                    <float *>B.data, &N,
+                    <float *>alphar.data, <float *> alphai.data,
+                    <float *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in sggev"
+
+    lwork = <l_int>qwork
+    work = np.empty(lwork, dtype = np.float32)
 
-    if A.shape[0] != A.shape[1]:
-        raise ValueError("gen_eig requires square matrix input")
+    # Now the real calculation
+    f_lapack.sggev_(jobvl, jobvr, &N, <float *>A.data, &N,
+                    <float *>B.data, &N,
+                    <float *>alphar.data, <float *> alphai.data,
+                    <float *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <float *>work.data, &lwork, &info)
 
-    if A.shape[0] != B.shape[0] or A.shape[1] != B.shape[1]:
-        raise ValueError("A and B do not have the same shape")
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in sggev")
+
+    assert info == 0, "Argument error in sggev"
+
+    alpha, vl, vr = ggev_postprocess(np.complex64, alphar, alphai, vl_r, vr_r)
+
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
+
+
+def dggev(np.ndarray[np.float64_t, ndim=2] A,
+          np.ndarray[np.float64_t, ndim=2] B,
+          left=False, right=True):
+    cdef l_int N, info, lwork
+    cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r
+    cdef double *vl_ptr
+    cdef double *vr_ptr
+    cdef double qwork
+    cdef np.ndarray[np.float64_t, ndim=1] work, alphar, alphai, beta
 
-    # Allocate workspaces
+    assert_fortran_mat(A, B)
 
     N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float64)
+    alphai = np.empty(N, dtype = np.float64)
+    beta = np.empty(N, dtype = np.float64)
+
+    if left:
+        vl_r = np.empty((N,N), dtype = np.float64, order='F')
+        vl_ptr = <double *>vl_r.data
+        jobvl = "V"
+    else:
+        vl_r = None
+        vl_ptr = NULL
+        jobvl = "N"
 
-    cdef np.ndarray[scalar] alphar, alphai
-    if scalar in cmplx:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = None
+    if right:
+        vr_r = np.empty((N,N), dtype = np.float64, order='F')
+        vr_ptr = <double *>vr_r.data
+        jobvr = "V"
     else:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = np.empty(N, dtype=A.dtype)
+        vr_r = None
+        vr_ptr = NULL
+        jobvr = "N"
+
+    # workspace query
+    lwork = -1
+
+    f_lapack.dggev_(jobvl, jobvr, &N, <double *>A.data, &N,
+                    <double *>B.data, &N,
+                    <double *>alphar.data, <double *> alphai.data,
+                    <double *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in dggev"
+
+    lwork = <l_int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+
+    # Now the real calculation
+    f_lapack.dggev_(jobvl, jobvr, &N, <double *>A.data, &N,
+                    <double *>B.data, &N,
+                    <double *>alphar.data, <double *> alphai.data,
+                    <double *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <double *>work.data, &lwork, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in dggev")
+
+    assert info == 0, "Argument error in dggev"
 
-    cdef np.ndarray[scalar] beta = np.empty(N, dtype=A.dtype)
+    alpha, vl, vr = ggev_postprocess(np.complex128, alphar, alphai, vl_r, vr_r)
+
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
 
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(8 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(8 * N, dtype=np.float64)
 
-    cdef np.ndarray vl
-    cdef scalar *vl_ptr
+def cggev(np.ndarray[np.complex64_t, ndim=2] A,
+          np.ndarray[np.complex64_t, ndim=2] B,
+          left=False, right=True):
+    cdef l_int N, info, lwork
     cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.complex64_t, ndim=2] vl, vr
+    cdef float complex *vl_ptr
+    cdef float complex *vr_ptr
+    cdef float complex qwork
+    cdef np.ndarray[np.complex64_t, ndim=1] work, alpha, beta
+    cdef np.ndarray[np.float32_t, ndim=1] rwork
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex64)
+    beta = np.empty(N, dtype = np.complex64)
+
     if left:
-        vl = np.empty((N,N), dtype=A.dtype, order='F')
-        vl_ptr = <scalar *>vl.data
+        vl = np.empty((N,N), dtype = np.complex64, order='F')
+        vl_ptr = <float complex *>vl.data
         jobvl = "V"
     else:
-        vl = None
         vl_ptr = NULL
         jobvl = "N"
 
-    cdef np.ndarray vr
-    cdef scalar *vr_ptr
-    cdef char *jobvr
     if right:
-        vr = np.empty((N,N), dtype=A.dtype, order='F')
-        vr_ptr = <scalar *>vr.data
+        vr = np.empty((N,N), dtype = np.complex64, order='F')
+        vr_ptr = <float complex *>vr.data
         jobvr = "V"
     else:
-        vr = None
         vr_ptr = NULL
         jobvr = "N"
 
-    # Workspace query
-    # Xggev expects &qwork as a <scalar *> (even though it's an integer)
+    rwork = np.empty(8 * N, dtype = np.float32)
+
+    # workspace query
     lwork = -1
-    cdef scalar qwork
+    work = np.empty(1, dtype = np.complex64)
 
-    if scalar is float:
-        lapack.sggev(jobvl, jobvr, &N, <float *>A.data, &N,
-                     <float *>B.data, &N,
-                     <float *>alphar.data, <float *> alphai.data,
-                     <float *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork, &info)
-    elif scalar is double:
-        lapack.dggev(jobvl, jobvr, &N, <double *>A.data, &N,
-                     <double *>B.data, &N,
-                     <double *>alphar.data, <double *> alphai.data,
-                     <double *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork, &info)
-    elif scalar is float_complex:
-        lapack.cggev(jobvl, jobvr, &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork,
-                     <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.zggev(jobvl, jobvr, &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     &qwork, &lwork,
-                     <double *>rwork.data, &info)
+    f_lapack.cggev_(jobvl, jobvr, &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork,
+                    <float *>rwork.data, &info)
 
-    assert info == 0, "Argument error in ggev"
+    assert info == 0, "Argument error in cggev"
 
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype)
+    lwork = <l_int>qwork.real
 
-    # The actual calculation
+    work = np.empty(lwork, dtype = np.complex64)
 
-    if scalar is float:
-        lapack.sggev(jobvl, jobvr, &N, <float *>A.data, &N,
-                     <float *>B.data, &N,
-                     <float *>alphar.data, <float *> alphai.data,
-                     <float *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <float *>work.data, &lwork, &info)
-    elif scalar is double:
-        lapack.dggev(jobvl, jobvr, &N, <double *>A.data, &N,
-                     <double *>B.data, &N,
-                     <double *>alphar.data, <double *> alphai.data,
-                     <double *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <double *>work.data, &lwork, &info)
-    elif scalar is float_complex:
-        lapack.cggev(jobvl, jobvr, &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <float complex *>work.data, &lwork,
-                     <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.zggev(jobvl, jobvr, &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vl_ptr, &N, vr_ptr, &N,
-                     <double complex *>work.data, &lwork,
-                     <double *>rwork.data, &info)
+    # Now the real calculation
+    f_lapack.cggev_(jobvl, jobvr, &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <float complex *>work.data, &lwork,
+                    <float *>rwork.data, &info)
 
     if info > 0:
-        raise LinAlgError("QZ iteration failed to converge in sggev")
+        raise LinAlgError("QZ iteration failed to converge in cggev")
 
-    assert info == 0, "Argument error in ggev"
+    assert info == 0, "Argument error in cggev"
 
-    if scalar is float:
-        post_dtype = np.complex64
-    elif scalar is double:
-        post_dtype = np.complex128
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
 
-    cdef np.ndarray alpha
-    alpha = alphar
-    if scalar in floating:
-        alpha, vl, vr = ggev_postprocess(post_dtype, alphar, alphai, vl, vr)
 
-    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
+def zggev(np.ndarray[np.complex128_t, ndim=2] A,
+          np.ndarray[np.complex128_t, ndim=2] B,
+          left=False, right=True):
+    cdef l_int N, info, lwork
+    cdef char *jobvl
+    cdef char *jobvr
+    cdef np.ndarray[np.complex128_t, ndim=2] vl, vr
+    cdef double complex *vl_ptr
+    cdef double complex *vr_ptr
+    cdef double complex qwork
+    cdef np.ndarray[np.complex128_t, ndim=1] work, alpha, beta
+    cdef np.ndarray[np.float64_t, ndim=1] rwork
+
+    assert_fortran_mat(A, B)
 
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex128)
+    beta = np.empty(N, dtype = np.complex128)
 
-def gees(np.ndarray[scalar, ndim=2] A, calc_q=True, calc_ev=True):
-    cdef l_int N, lwork, sdim, info
+    if left:
+        vl = np.empty((N,N), dtype = np.complex128, order='F')
+        vl_ptr = <double complex *>vl.data
+        jobvl = "V"
+    else:
+        vl_ptr = NULL
+        jobvl = "N"
 
-    assert_fortran_mat(A)
+    if right:
+        vr = np.empty((N,N), dtype = np.complex128, order='F')
+        vr_ptr = <double complex *>vr.data
+        jobvr = "V"
+    else:
+        vr_ptr = NULL
+        jobvr = "N"
 
-    if A.ndim != 2:
-        raise ValueError("Expect matrix as input")
+    rwork = np.empty(8 * N, dtype = np.float64)
 
-    if A.shape[0] != A.shape[1]:
-        raise ValueError("Expect square matrix")
+    # workspace query
+    lwork = -1
+    work = np.empty(1, dtype = np.complex128)
 
-    # Allocate workspaces
+    f_lapack.zggev_(jobvl, jobvr, &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    &qwork, &lwork,
+                    <double *>rwork.data, &info)
 
-    N = A.shape[0]
+    assert info == 0, "Argument error in zggev"
 
-    cdef np.ndarray[scalar] wr, wi
-    if scalar in cmplx:
-        wr = np.empty(N, dtype=A.dtype)
-        wi = None
-    else:
-        wr = np.empty(N, dtype=A.dtype)
-        wi = np.empty(N, dtype=A.dtype)
+    lwork = <l_int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.zggev_(jobvl, jobvr, &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vl_ptr, &N, vr_ptr, &N,
+                    <double complex *>work.data, &lwork,
+                    <double *>rwork.data, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in zggev")
 
-    cdef np.ndarray rwork
-    if scalar is float_complex:
-        rwork = np.empty(N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(N, dtype=np.float64)
+    assert info == 0, "Argument error in zggev"
 
+    return filter_args((True, True, left, right), (alpha, beta, vl, vr))
+
+
+# Wrapper for xGEES
+def sgees(np.ndarray[np.float32_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
     cdef char *jobvs
-    cdef scalar *vs_ptr
-    cdef np.ndarray[scalar, ndim=2] vs
+    cdef float *vs_ptr
+    cdef float qwork
+    cdef np.ndarray[np.float32_t, ndim=2] vs
+    cdef np.ndarray[np.float32_t] wr, wi, work
+
+    assert_fortran_mat(A)
+
+    N = A.shape[0]
+    wr = np.empty(N, dtype = np.float32)
+    wi = np.empty(N, dtype = np.float32)
+
     if calc_q:
-        vs = np.empty((N,N), dtype=A.dtype, order='F')
-        vs_ptr = <scalar *>vs.data
+        vs = np.empty((N,N), dtype = np.float32, order='F')
+        vs_ptr = <float *>vs.data
         jobvs = "V"
     else:
-        vs = None
         vs_ptr = NULL
         jobvs = "N"
 
-    # Workspace query
-    # Xgees expects &qwork as a <scalar *> (even though it's an integer)
+    # workspace query
     lwork = -1
-    cdef scalar qwork
-
-    if scalar is float:
-        lapack.sgees(jobvs, "N", NULL, &N, <float *>A.data, &N,
-                     &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgees(jobvs, "N", NULL, &N, <double *>A.data, &N,
-                     &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgees(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
-                     &sdim, <float complex *>wr.data, vs_ptr, &N,
-                     &qwork, &lwork, <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgees(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
-                     &sdim, <double complex *>wr.data, vs_ptr, &N,
-                     &qwork, &lwork, <double *>rwork.data, NULL, &info)
+    f_lapack.sgees_(jobvs, "N", NULL, &N, <float *>A.data, &N,
+                    &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
 
     assert info == 0, "Argument error in sgees"
 
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype)
-
-    # The actual calculation
-
-    if scalar is float:
-        lapack.sgees(jobvs, "N", NULL, &N, <float *>A.data, &N,
-                     &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
-                     <float *>work.data, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgees(jobvs, "N", NULL, &N, <double *>A.data, &N,
-                     &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
-                     <double *>work.data, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgees(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
-                     &sdim, <float complex *>wr.data, vs_ptr, &N,
-                     <float complex *>work.data, &lwork,
-                     <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgees(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
-                     &sdim, <double complex *>wr.data, vs_ptr, &N,
-                     <double complex *>work.data, &lwork,
-                     <double *>rwork.data, NULL, &info)
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+
+    # Now the real calculation
+    f_lapack.sgees_(jobvs, "N", NULL, &N, <float *>A.data, &N,
+                    &sdim, <float *>wr.data, <float *>wi.data, vs_ptr, &N,
+                    <float *>work.data, &lwork, NULL, &info)
 
     if info > 0:
-        raise LinAlgError("QR iteration failed to converge in gees")
+        raise LinAlgError("QR iteration failed to converge in sgees")
 
-    assert info == 0, "Argument error in gees"
+    assert info == 0, "Argument error in sgees"
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray w = maybe_complex[scalar](0, wr, wi)
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
 
     return filter_args((True, calc_q, calc_ev), (A, vs, w))
 
 
-def trsen(np.ndarray[l_logical] select,
-          np.ndarray[scalar, ndim=2] T,
-          np.ndarray[scalar, ndim=2] Q,
-          calc_ev=True):
-    cdef l_int N, M, lwork, liwork, qiwork, info
-
-    assert_fortran_mat(T, Q)
+def dgees(np.ndarray[np.float64_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvs
+    cdef double *vs_ptr
+    cdef double qwork
+    cdef np.ndarray[np.float64_t, ndim=2] vs
+    cdef np.ndarray[np.float64_t] wr, wi, work
 
-    # Allocate workspaces
+    assert_fortran_mat(A)
 
-    N = T.shape[0]
+    N = A.shape[0]
+    wr = np.empty(N, dtype = np.float64)
+    wi = np.empty(N, dtype = np.float64)
 
-    cdef np.ndarray[scalar] wr, wi
-    if scalar in cmplx:
-        wr = np.empty(N, dtype=T.dtype)
-        wi = None
+    if calc_q:
+        vs = np.empty((N,N), dtype = np.float64, order='F')
+        vs_ptr = <double *>vs.data
+        jobvs = "V"
     else:
-        wr = np.empty(N, dtype=T.dtype)
-        wi = np.empty(N, dtype=T.dtype)
+        vs_ptr = NULL
+        jobvs = "N"
 
-    cdef char *compq
-    cdef scalar *q_ptr
-    if Q is not None:
-        compq = "V"
-        q_ptr = <scalar *>Q.data
-    else:
-        compq = "N"
-        q_ptr = NULL
+    # workspace query
+    lwork = -1
+    f_lapack.dgees_(jobvs, "N", NULL, &N, <double *>A.data, &N,
+                    &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
 
-    # Workspace query
-    # Xtrsen expects &qwork as a <scalar *> (even though it's an integer)
-    cdef scalar qwork
-    lwork = liwork = -1
+    assert info == 0, "Argument error in dgees"
 
-    if scalar is float:
-        lapack.strsen("N", compq, <l_logical *>select.data,
-                      &N, <float *>T.data, &N, q_ptr, &N,
-                      <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is double:
-        lapack.dtrsen("N", compq, <l_logical *>select.data,
-                      &N, <double *>T.data, &N, q_ptr, &N,
-                      <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctrsen("N", compq, <l_logical *>select.data,
-                      &N, <float complex *>T.data, &N, q_ptr, &N,
-                      <float complex *>wr.data, &M, NULL, NULL,
-                      &qwork, &lwork, &info)
-    elif scalar is double_complex:
-        lapack.ztrsen("N", compq, <l_logical *>select.data,
-                      &N, <double complex *>T.data, &N, q_ptr, &N,
-                      <double complex *>wr.data, &M, NULL, NULL,
-                      &qwork, &lwork, &info)
-
-    assert info == 0, "Argument error in trsen"
-
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=T.dtype)
-
-    cdef np.ndarray[l_int] iwork = None
-    if scalar in floating:
-        liwork = qiwork
-        iwork = np.empty(liwork, dtype=int_dtype)
-
-    # Tha actual calculation
-
-    if scalar is float:
-        lapack.strsen("N", compq, <l_logical *>select.data,
-                      &N, <float *>T.data, &N, q_ptr, &N,
-                      <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
-                      <float *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is double:
-        lapack.dtrsen("N", compq, <l_logical *>select.data,
-                      &N, <double *>T.data, &N, q_ptr, &N,
-                      <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
-                      <double *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctrsen("N", compq, <l_logical *>select.data,
-                      &N, <float complex *>T.data, &N, q_ptr, &N,
-                      <float complex *>wr.data, &M, NULL, NULL,
-                      <float complex *>work.data, &lwork, &info)
-    elif scalar is double_complex:
-        lapack.ztrsen("N", compq, <l_logical *>select.data,
-                      &N, <double complex *>T.data, &N, q_ptr, &N,
-                      <double complex *>wr.data, &M, NULL, NULL,
-                      <double complex *>work.data, &lwork, &info)
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+
+    # Now the real calculation
+    f_lapack.dgees_(jobvs, "N", NULL, &N, <double *>A.data, &N,
+                    &sdim, <double *>wr.data, <double *>wi.data, vs_ptr, &N,
+                    <double *>work.data, &lwork, NULL, &info)
 
     if info > 0:
-        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+        raise LinAlgError("QR iteration failed to converge in dgees")
 
-    assert info == 0, "Argument error in trsen"
+    assert info == 0, "Argument error in dgees"
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray w = maybe_complex[scalar](0, wr, wi)
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
 
-    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+    return filter_args((True, calc_q, calc_ev), (A, vs, w))
 
 
-# Helper function for xTREVC and xTGEVC
-def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
-    cdef int N, M, i, m, indx
+def cgees(np.ndarray[np.complex64_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvs
+    cdef float complex *vs_ptr
+    cdef float complex qwork
+    cdef np.ndarray[np.complex64_t, ndim=2] vs
+    cdef np.ndarray[np.complex64_t] w, work
+    cdef np.ndarray[np.float32_t] rwork
+
+    assert_fortran_mat(A)
+
+    N = A.shape[0]
+    w = np.empty(N, dtype = np.complex64)
+    rwork = np.empty(N, dtype = np.float32)
+
+    if calc_q:
+        vs = np.empty((N,N), dtype = np.complex64, order='F')
+        vs_ptr = <float complex *>vs.data
+        jobvs = "V"
+    else:
+        vs_ptr = NULL
+        jobvs = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.cgees_(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
+                    &sdim, <float complex *>w.data, vs_ptr, &N,
+                    &qwork, &lwork, <float *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in cgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+
+    # Now the real calculation
+    f_lapack.cgees_(jobvs, "N", NULL, &N, <float complex *>A.data, &N,
+                    &sdim, <float complex *>w.data, vs_ptr, &N,
+                    <float complex *>work.data, &lwork,
+                    <float *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QR iteration failed to converge in cgees")
+
+    assert info == 0, "Argument error in cgees"
+
+    return filter_args((True, calc_q, calc_ev), (A, vs, w))
+
+
+def zgees(np.ndarray[np.complex128_t, ndim=2] A,
+          calc_q=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvs
+    cdef double complex *vs_ptr
+    cdef double complex qwork
+    cdef np.ndarray[np.complex128_t, ndim=2] vs
+    cdef np.ndarray[np.complex128_t] w, work
+    cdef np.ndarray[np.float64_t] rwork
+
+    assert_fortran_mat(A)
+
+    N = A.shape[0]
+    w = np.empty(N, dtype = np.complex128)
+    rwork = np.empty(N, dtype = np.float64)
+
+    if calc_q:
+        vs = np.empty((N,N), dtype = np.complex128, order='F')
+        vs_ptr = <double complex *>vs.data
+        jobvs = "V"
+    else:
+        vs_ptr = NULL
+        jobvs = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.zgees_(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
+                    &sdim, <double complex *>w.data, vs_ptr, &N,
+                    &qwork, &lwork, <double *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.zgees_(jobvs, "N", NULL, &N, <double complex *>A.data, &N,
+                    &sdim, <double complex *>w.data, vs_ptr, &N,
+                    <double complex *>work.data, &lwork,
+                    <double *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QR iteration failed to converge in zgees")
+
+    assert info == 0, "Argument error in zgees"
+
+    return filter_args((True, calc_q, calc_ev), (A, vs, w))
+
+# Wrapper for xTRSEN
+def strsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info
+    cdef char *compq
+    cdef float qwork
+    cdef float *q_ptr
+    cdef np.ndarray[np.float32_t] wr, wi, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    wr = np.empty(N, dtype = np.float32)
+    wi = np.empty(N, dtype = np.float32)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <float *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = liwork = -1
+    f_lapack.strsen_("N", compq, <l_logical *>select.data,
+                     &N, <float *>T.data, &N, q_ptr, &N,
+                     <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in strsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = f_lapack.l_int_dtype)
+
+    # Now the real calculation
+    f_lapack.strsen_("N", compq, <l_logical *>select.data,
+                     &N, <float *>T.data, &N, q_ptr, &N,
+                     <float *>wr.data, <float *>wi.data, &M, NULL, NULL,
+                     <float *>work.data, &lwork,
+                     <int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in strsen"
+
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+def dtrsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info
+    cdef char *compq
+    cdef double qwork
+    cdef double *q_ptr
+    cdef np.ndarray[np.float64_t] wr, wi, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    wr = np.empty(N, dtype = np.float64)
+    wi = np.empty(N, dtype = np.float64)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <double *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = liwork = -1
+    f_lapack.dtrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double *>T.data, &N, q_ptr, &N,
+                     <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in dtrsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = f_lapack.l_int_dtype)
+
+    # Now the real calculation
+    f_lapack.dtrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double *>T.data, &N, q_ptr, &N,
+                     <double *>wr.data, <double *>wi.data, &M, NULL, NULL,
+                     <double *>work.data, &lwork,
+                     <int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in dtrsen"
+
+    if wi.nonzero()[0].size:
+        w = wr + 1j * wi
+    else:
+        w = wr
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+def ctrsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, info
+    cdef char *compq
+    cdef float complex qwork
+    cdef float complex *q_ptr
+    cdef np.ndarray[np.complex64_t] w, work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    w = np.empty(N, dtype = np.complex64)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <float complex *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    f_lapack.ctrsen_("N", compq, <l_logical *>select.data,
+                     &N, <float complex *>T.data, &N, q_ptr, &N,
+                     <float complex *>w.data, &M, NULL, NULL,
+                     &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in ctrsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+
+    # Now the real calculation
+    f_lapack.ctrsen_("N", compq, <l_logical *>select.data,
+                     &N, <float complex *>T.data, &N, q_ptr, &N,
+                     <float complex *>w.data, &M, NULL, NULL,
+                     <float complex *>work.data, &lwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in ctrsen"
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+def ztrsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           calc_ev=True):
+    cdef l_int N, MM, M, lwork, info
+    cdef char *compq
+    cdef double complex qwork
+    cdef double complex *q_ptr
+    cdef np.ndarray[np.complex128_t] w, work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    w = np.empty(N, dtype = np.complex128)
+
+    if Q is not None:
+        compq = "V"
+        q_ptr = <double complex *>Q.data
+    else:
+        compq = "N"
+        q_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    f_lapack.ztrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double complex *>T.data, &N, q_ptr, &N,
+                     <double complex *>w.data, &M, NULL, NULL,
+                     &qwork, &lwork, &info)
+
+    assert info == 0, "Argument error in ztrsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.ztrsen_("N", compq, <l_logical *>select.data,
+                     &N, <double complex *>T.data, &N, q_ptr, &N,
+                     <double complex *>w.data, &M, NULL, NULL,
+                     <double complex *>work.data, &lwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in ztrsen"
+
+    return filter_args((True, Q is not None, calc_ev), (T, Q, w))
+
+
+# Helper function for xTREVC and xTGEVC
+def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
+    cdef int N, M, i, m, indx
 
     N = T.shape[0]
     if select is None:
-        select = np.ones(N, dtype = logical_dtype)
+        select = np.ones(N, dtype = f_lapack.l_logical_dtype)
     selindx = select.nonzero()[0]
     M = selindx.size
 
@@ -653,37 +1096,989 @@ def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
     return v
 
 
-def trevc(np.ndarray[scalar, ndim=2] T,
-          np.ndarray[scalar, ndim=2] Q,
-          np.ndarray[l_logical] select,
-          left=False, right=True):
-    cdef l_int N, info, M, MM
-    cdef char *side
-    cdef char *howmny
+# Wrappers for xTREVC
+def strevc(np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r
+    cdef float *vl_r_ptr
+    cdef float *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float32_t] work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(4*N, dtype = np.float32)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        # Correct for possible additional storage if a single complex
+        # eigenvalue is selected.
+        # For that: Figure out the positions of the 2x2 blocks.
+        cmplxindx = np.diagonal(T, -1).nonzero()[0]
+        for i in cmplxindx:
+            if bool(select[i]) != bool(select[i+1]):
+                MM += 1
+
+        # Select is overwritten in strevc.
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
+                              order = 'F')
+        select_ptr = <l_logical *>select_cpy.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl_r = np.asfortranarray(Q.copy())
+        else:
+            vl_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vl_r_ptr = <float *>vl_r.data
+    else:
+        vl_r_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr_r = np.asfortranarray(Q.copy())
+        else:
+            vr_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vr_r_ptr = <float *>vr_r.data
+    else:
+        vr_r_ptr = NULL
+
+    f_lapack.strevc_(side, howmny, select_ptr,
+                     &N, <float *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <float *>work.data, &info)
+
+    assert info == 0, "Argument error in strevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in strevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+        if right:
+            vr_r = np.asfortranarray(np.dot(Q, vr_r))
+
+    # If there are complex eigenvalues, we need to postprocess the
+    # eigenvectors.
+    if np.diagonal(T, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex64, T, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex64, T, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+def dtrevc(np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r
+    cdef double *vl_r_ptr
+    cdef double *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float64_t] work
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(4*N, dtype = np.float64)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        # Correct for possible additional storage if a single complex
+        # eigenvalue is selected.
+        # For that: Figure out the positions of the 2x2 blocks.
+        cmplxindx = np.diagonal(T, -1).nonzero()[0]
+        for i in cmplxindx:
+            if bool(select[i]) != bool(select[i+1]):
+                MM += 1
+
+        # Select is overwritten in dtrevc.
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
+                              order = 'F')
+        select_ptr = <l_logical *>select_cpy.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl_r = np.asfortranarray(Q.copy())
+        else:
+            vl_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vl_r_ptr = <double *>vl_r.data
+    else:
+        vl_r_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr_r = np.asfortranarray(Q.copy())
+        else:
+            vr_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vr_r_ptr = <double *>vr_r.data
+    else:
+        vr_r_ptr = NULL
+
+    f_lapack.dtrevc_(side, howmny, select_ptr,
+                     &N, <double *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <double *>work.data, &info)
+
+    assert info == 0, "Argument error in dtrevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in dtrevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+        if right:
+            vr_r = np.asfortranarray(np.dot(Q, vr_r))
+
+    # If there are complex eigenvalues, we need to postprocess the eigenvectors
+    if np.diagonal(T, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex128, T, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex128, T, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+def ctrevc(np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex64_t, ndim=2] vl, vr
+    cdef float complex *vl_ptr
+    cdef float complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex64_t] work
+    cdef np.ndarray[np.float32_t] rwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(2*N, dtype = np.complex64)
+    rwork = np.empty(N, dtype = np.float32)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        select_ptr = <l_logical *>select.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl = np.asfortranarray(Q.copy())
+        else:
+            vl = np.empty((N, MM), dtype = np.complex64, order='F')
+        vl_ptr = <float complex *>vl.data
+    else:
+        vl_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr = np.asfortranarray(Q.copy())
+        else:
+            vr = np.empty((N, MM), dtype = np.complex64, order='F')
+        vr_ptr = <float complex *>vr.data
+    else:
+        vr_ptr = NULL
+
+    f_lapack.ctrevc_(side, howmny, select_ptr,
+                     &N, <float complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <float complex *>work.data, <float *>rwork.data, &info)
+
+    assert info == 0, "Argument error in ctrevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ctrevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl = np.asfortranarray(np.dot(Q, vl))
+        if right:
+            vr = np.asfortranarray(np.dot(Q, vr))
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+def ztrevc(np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex128_t, ndim=2] vl, vr
+    cdef double complex *vl_ptr
+    cdef double complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex128_t] work
+    cdef np.ndarray[np.float64_t] rwork
+
+    assert_fortran_mat(T, Q)
+
+    N = T.shape[0]
+    work = np.empty(2*N, dtype = np.complex128)
+    rwork = np.empty(N, dtype = np.float64)
+
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
+    else:
+        return
+
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        select_ptr = <l_logical *>select.data
+    else:
+        MM = N
+        select_ptr = NULL
+        if Q is not None:
+            howmny = "B"
+        else:
+            howmny = "A"
+
+    if left:
+        if Q is not None and select is None:
+            vl = np.asfortranarray(Q.copy())
+        else:
+            vl = np.empty((N, MM), dtype = np.complex128, order='F')
+        vl_ptr = <double complex *>vl.data
+    else:
+        vl_ptr = NULL
+
+    if right:
+        if Q is not None and select is None:
+            vr = np.asfortranarray(Q.copy())
+        else:
+            vr = np.empty((N, MM), dtype = np.complex128, order='F')
+        vr_ptr = <double complex *>vr.data
+    else:
+        vr_ptr = NULL
+
+    f_lapack.ztrevc_(side, howmny, select_ptr,
+                     &N, <double complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <double complex *>work.data, <double *>rwork.data, &info)
+
+    assert info == 0, "Argument error in ztrevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ztrevc"
+
+    if select is not None and Q is not None:
+        if left:
+            vl = np.asfortranarray(np.dot(Q, vl))
+        if right:
+            vr = np.asfortranarray(np.dot(Q, vr))
+
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
+
+
+# wrappers for xGGES
+def sgges(np.ndarray[np.float32_t, ndim=2] A,
+          np.ndarray[np.float32_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef float *vsl_ptr
+    cdef float *vsr_ptr
+    cdef float qwork
+    cdef np.ndarray[np.float32_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.float32_t] alphar, alphai, beta, work
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float32)
+    alphai = np.empty(N, dtype = np.float32)
+    beta = np.empty(N, dtype = np.float32)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.float32, order='F')
+        vsl_ptr = <float *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.float32, order='F')
+        vsr_ptr = <float *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.sgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float *>A.data, &N,
+                    <float *>B.data, &N, &sdim,
+                    <float *>alphar.data, <float *>alphai.data,
+                    <float *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+
+    # Now the real calculation
+    f_lapack.sgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float *>A.data, &N,
+                    <float *>B.data, &N, &sdim,
+                    <float *>alphar.data, <float *>alphai.data,
+                    <float *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <float *>work.data, &lwork, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in sgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+def dgges(np.ndarray[np.float64_t, ndim=2] A,
+          np.ndarray[np.float64_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef double *vsl_ptr
+    cdef double *vsr_ptr
+    cdef double qwork
+    cdef np.ndarray[np.float64_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.float64_t] alphar, alphai, beta, work
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alphar = np.empty(N, dtype = np.float64)
+    alphai = np.empty(N, dtype = np.float64)
+    beta = np.empty(N, dtype = np.float64)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.float64, order='F')
+        vsl_ptr = <double *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.float64, order='F')
+        vsr_ptr = <double *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.dgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double *>A.data, &N,
+                    <double *>B.data, &N, &sdim,
+                    <double *>alphar.data, <double *>alphai.data,
+                    <double *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+
+    # Now the real calculation
+    f_lapack.dgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double *>A.data, &N,
+                    <double *>B.data, &N, &sdim,
+                    <double *>alphar.data, <double *>alphai.data,
+                    <double *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <double *>work.data, &lwork, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in dgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+def cgges(np.ndarray[np.complex64_t, ndim=2] A,
+          np.ndarray[np.complex64_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef float complex *vsl_ptr
+    cdef float complex *vsr_ptr
+    cdef float complex qwork
+    cdef np.ndarray[np.complex64_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.complex64_t] alpha, beta, work
+    cdef np.ndarray[np.float32_t] rwork
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex64)
+    beta = np.empty(N, dtype = np.complex64)
+    rwork = np.empty(8*N, dtype = np.float32)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.complex64, order='F')
+        vsl_ptr = <float complex *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.complex64, order='F')
+        vsr_ptr = <float complex *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.cgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N, &sdim,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, <float *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+
+    # Now the real calculation
+    f_lapack.cgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <float complex *>A.data, &N,
+                    <float complex *>B.data, &N, &sdim,
+                    <float complex *>alpha.data, <float complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <float complex *>work.data, &lwork,
+                    <float *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in cgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+def zgges(np.ndarray[np.complex128_t, ndim=2] A,
+          np.ndarray[np.complex128_t, ndim=2] B,
+          calc_q=True, calc_z=True, calc_ev=True):
+    cdef l_int N, lwork, sdim, info
+    cdef char *jobvsl
+    cdef char *jobvsr
+    cdef double complex *vsl_ptr
+    cdef double complex *vsr_ptr
+    cdef double complex qwork
+    cdef np.ndarray[np.complex128_t, ndim=2] vsl, vsr
+    cdef np.ndarray[np.complex128_t] alpha, beta, work
+    cdef np.ndarray[np.float64_t] rwork
+
+    assert_fortran_mat(A, B)
+
+    N = A.shape[0]
+    alpha = np.empty(N, dtype = np.complex128)
+    beta = np.empty(N, dtype = np.complex128)
+    rwork = np.empty(8*N, dtype = np.float64)
+
+    if calc_q:
+        vsl = np.empty((N,N), dtype = np.complex128, order='F')
+        vsl_ptr = <double complex *>vsl.data
+        jobvsl = "V"
+    else:
+        vsl = None
+        vsl_ptr = NULL
+        jobvsl = "N"
+
+    if calc_z:
+        vsr = np.empty((N,N), dtype = np.complex128, order='F')
+        vsr_ptr = <double complex *>vsr.data
+        jobvsr = "V"
+    else:
+        vsr = None
+        vsr_ptr = NULL
+        jobvsr = "N"
+
+    # workspace query
+    lwork = -1
+    f_lapack.zgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N, &sdim,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    &qwork, &lwork, <double *>rwork.data, NULL, &info)
+
+    assert info == 0, "Argument error in zgees"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+
+    # Now the real calculation
+    f_lapack.zgges_(jobvsl, jobvsr, "N", NULL,
+                    &N, <double complex *>A.data, &N,
+                    <double complex *>B.data, &N, &sdim,
+                    <double complex *>alpha.data, <double complex *>beta.data,
+                    vsl_ptr, &N, vsr_ptr, &N,
+                    <double complex *>work.data, &lwork,
+                    <double *>rwork.data, NULL, &info)
+
+    if info > 0:
+        raise LinAlgError("QZ iteration failed to converge in zgges")
+
+    assert info == 0, "Argument error in zgees"
+
+    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
+                       (A, B, vsl, vsr, alpha, beta))
+
+
+# wrappers for xTGSEN
+def stgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float32_t, ndim=2] S,
+           np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           np.ndarray[np.float32_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef float qwork
+    cdef float *q_ptr
+    cdef float *z_ptr
+    cdef np.ndarray[np.float32_t] alphar, alphai, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alphar = np.empty(N, dtype = np.float32)
+    alphai = np.empty(N, dtype = np.float32)
+    beta = np.empty(N, dtype = np.float32)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <float *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <float *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.stgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float *>S.data, &N,
+                     <float *>T.data, &N,
+                     <float *>alphar.data, <float *>alphai.data,
+                     <float *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in stgsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float32)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.stgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float *>S.data, &N,
+                     <float *>T.data, &N,
+                     <float *>alphar.data, <float *>alphai.data,
+                     <float *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <float *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in stgsen"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
+
+
+def dtgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.float64_t, ndim=2] S,
+           np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           np.ndarray[np.float64_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef double qwork
+    cdef double *q_ptr
+    cdef double *z_ptr
+    cdef np.ndarray[np.float64_t] alphar, alphai, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alphar = np.empty(N, dtype = np.float64)
+    alphai = np.empty(N, dtype = np.float64)
+    beta = np.empty(N, dtype = np.float64)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <double *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <double *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.dtgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double *>S.data, &N,
+                     <double *>T.data, &N,
+                     <double *>alphar.data, <double *>alphai.data,
+                     <double *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in dtgsen"
+
+    lwork = <int>qwork
+    work = np.empty(lwork, dtype = np.float64)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.dtgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double *>S.data, &N,
+                     <double *>T.data, &N,
+                     <double *>alphar.data, <double *>alphai.data,
+                     <double *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <double *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in dtgsen"
+
+    if alphai.nonzero()[0].size:
+        alpha = alphar + 1j * alphai
+    else:
+        alpha = alphar
+
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
+
+
+def ctgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex64_t, ndim=2] S,
+           np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           np.ndarray[np.complex64_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef float complex qwork
+    cdef float complex *q_ptr
+    cdef float complex *z_ptr
+    cdef np.ndarray[np.complex64_t] alpha, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alpha = np.empty(N, dtype = np.complex64)
+    beta = np.empty(N, dtype = np.complex64)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <float complex *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <float complex *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.ctgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float complex *>S.data, &N,
+                     <float complex *>T.data, &N,
+                     <float complex *>alpha.data, <float complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in ctgsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex64)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.ctgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <float complex *>S.data, &N,
+                     <float complex *>T.data, &N,
+                     <float complex *>alpha.data, <float complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <float complex *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
+
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+
+    assert info == 0, "Argument error in ctgsen"
+
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
+
+
+def ztgsen(np.ndarray[l_logical] select,
+           np.ndarray[np.complex128_t, ndim=2] S,
+           np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           np.ndarray[np.complex128_t, ndim=2] Z=None,
+           calc_ev=True):
+    cdef l_int N, M, lwork, liwork, qiwork, info, ijob
+    cdef l_logical wantq, wantz
+    cdef double complex qwork
+    cdef double complex *q_ptr
+    cdef double complex *z_ptr
+    cdef np.ndarray[np.complex128_t] alpha, beta, work
+    cdef np.ndarray[l_int] iwork
+
+    assert_fortran_mat(S, T, Q, Z)
+
+    N = S.shape[0]
+    alpha = np.empty(N, dtype = np.complex128)
+    beta = np.empty(N, dtype = np.complex128)
+    ijob = 0
+
+    if Q is not None:
+        wantq = 1
+        q_ptr = <double complex *>Q.data
+    else:
+        wantq = 0
+        q_ptr = NULL
+
+    if Z is not None:
+        wantz = 1
+        z_ptr = <double complex *>Z.data
+    else:
+        wantz = 0
+        z_ptr = NULL
+
+    # workspace query
+    lwork = -1
+    liwork = -1
+    f_lapack.ztgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double complex *>S.data, &N,
+                     <double complex *>T.data, &N,
+                     <double complex *>alpha.data, <double complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     &qwork, &lwork, &qiwork, &liwork, &info)
+
+    assert info == 0, "Argument error in ztgsen"
+
+    lwork = <int>qwork.real
+    work = np.empty(lwork, dtype = np.complex128)
+    liwork = qiwork
+    iwork = np.empty(liwork, dtype = int_dtype)
+
+    # Now the real calculation
+    f_lapack.ztgsen_(&ijob, &wantq, &wantz, <l_logical *>select.data,
+                     &N, <double complex *>S.data, &N,
+                     <double complex *>T.data, &N,
+                     <double complex *>alpha.data, <double complex *>beta.data,
+                     q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
+                     <double complex *>work.data, &lwork,
+                     <l_int *>iwork.data, &liwork, &info)
 
-    # Parameter checks
+    if info > 0:
+        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
 
-    if (T.shape[0] != T.shape[1] or Q.shape[0] != Q.shape[1]
-        or T.shape[0] != Q.shape[0]):
-        raise ValueError("Invalid Schur decomposition as input")
+    assert info == 0, "Argument error in ztgsen"
 
-    assert_fortran_mat(T, Q)
+    return filter_args((True, True, Q is not None, Z is not None,
+                        calc_ev, calc_ev),
+                       (S, T, Q, Z, alpha, beta))
 
-    # Workspace allocation
 
-    N = T.shape[0]
+# xTGEVC
+def stgevc(np.ndarray[np.float32_t, ndim=2] S,
+           np.ndarray[np.float32_t, ndim=2] T,
+           np.ndarray[np.float32_t, ndim=2] Q=None,
+           np.ndarray[np.float32_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float32_t, ndim=2] vl_r, vr_r
+    cdef float *vl_r_ptr
+    cdef float *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float32_t] work
 
-    cdef np.ndarray[scalar] work
-    if scalar in floating:
-        work = np.empty(4 * N, dtype=T.dtype)
-    else:
-        work = np.empty(2 * N, dtype=T.dtype)
+    assert_fortran_mat(S, T, Q, Z)
 
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(N, dtype=np.float64)
+    N = S.shape[0]
+    work = np.empty(6*N, dtype = np.float32)
 
     if left and right:
         side = "B"
@@ -694,102 +2089,78 @@ def trevc(np.ndarray[scalar, ndim=2] T,
     else:
         return
 
-    cdef np.ndarray[l_logical] select_cpy
-    cdef l_logical *select_ptr
+    backtr = False
+
     if select is not None:
         howmny = "S"
         MM = select.nonzero()[0].size
         # Correct for possible additional storage if a single complex
         # eigenvalue is selected.
         # For that: Figure out the positions of the 2x2 blocks.
-        cmplxindx = np.diagonal(T, -1).nonzero()[0]
+        cmplxindx = np.diagonal(S, -1).nonzero()[0]
         for i in cmplxindx:
             if bool(select[i]) != bool(select[i+1]):
                 MM += 1
 
-        # Select is overwritten in strevc.
-        select_cpy = np.array(select, dtype = logical_dtype,
+        # select is overwritten in stgevc
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
                               order = 'F')
         select_ptr = <l_logical *>select_cpy.data
     else:
         MM = N
         select_ptr = NULL
-        if Q is not None:
+        if ((left and right and Q is not None and Z is not None) or
+            (left and not right and Q is not None) or
+            (right and not left and Z is not None)):
             howmny = "B"
+            backtr = True
         else:
             howmny = "A"
 
-    cdef np.ndarray[scalar, ndim=2] vl_r = None
-    cdef scalar *vl_r_ptr
     if left:
-        if Q is not None and select is None:
-            vl_r = np.asfortranarray(Q.copy())
+        if backtr:
+            vl_r = Q
         else:
-            vl_r = np.empty((N, MM), dtype=T.dtype, order='F')
-        vl_r_ptr = <scalar *>vl_r.data
+            vl_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vl_r_ptr = <float *>vl_r.data
     else:
         vl_r_ptr = NULL
 
-    cdef np.ndarray[scalar, ndim=2]  vr_r = None
-    cdef scalar *vr_r_ptr
     if right:
-        if Q is not None and select is None:
-            vr_r = np.asfortranarray(Q.copy())
+        if backtr:
+            vr_r = Z
         else:
-            vr_r = np.empty((N, MM), dtype=T.dtype, order='F')
-        vr_r_ptr = <scalar *>vr_r.data
+            vr_r = np.empty((N, MM), dtype = np.float32, order='F')
+        vr_r_ptr = <float *>vr_r.data
     else:
         vr_r_ptr = NULL
 
-    # The actual calculation
-
-    if scalar is float:
-        lapack.strevc(side, howmny, select_ptr,
-                      &N, <float *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float *>work.data, &info)
-    elif scalar is double:
-        lapack.dtrevc(side, howmny, select_ptr,
-                      &N, <double *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double *>work.data, &info)
-    elif scalar is float_complex:
-        lapack.ctrevc(side, howmny, select_ptr,
-                      &N, <float complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float complex *>work.data, <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.ztrevc(side, howmny, select_ptr,
-                      &N, <double complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double complex *>work.data, <double *>rwork.data, &info)
-
-    assert info == 0, "Argument error in trevc"
-    assert MM == M, "Unexpected number of eigenvectors returned in strevc"
+    f_lapack.stgevc_(side, howmny, select_ptr,
+                     &N, <float *>S.data, &N,
+                     <float *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <float *>work.data, &info)
 
-    if select is not None and Q is not None:
+    assert info == 0, "Argument error in stgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in stgevc"
+
+    if not backtr:
         if left:
             vl_r = np.asfortranarray(np.dot(Q, vl_r))
         if right:
-            vr_r = np.asfortranarray(np.dot(Q, vr_r))
+            vr_r = np.asfortranarray(np.dot(Z, vr_r))
 
-    cdef np.ndarray vl, vr
-    if left:
-        vl = vl_r
-    if right:
-        vr = vr_r
-    if scalar in floating:
-        # If there are complex eigenvalues, we need to postprocess the
-        # eigenvectors.
-        if scalar is float:
-            dtype = np.complex64
-        else:
-            dtype = np.complex128
-        if np.diagonal(T, -1).nonzero()[0].size:
-            if left:
-                vl = txevc_postprocess(dtype, T, vl_r, select)
-            if right:
-                vr = txevc_postprocess(dtype, T, vr_r, select)
+    # If there are complex eigenvalues, we need to postprocess the eigenvectors
+    if np.diagonal(S, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex64, S, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex64, S, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
 
     if left and right:
         return (vl, vr)
@@ -799,340 +2170,229 @@ def trevc(np.ndarray[scalar, ndim=2] T,
         return vr
 
 
-def gges(np.ndarray[scalar, ndim=2] A,
-          np.ndarray[scalar, ndim=2] B,
-          calc_q=True, calc_z=True, calc_ev=True):
-    cdef l_int N, sdim, info
-
-    # Check parameters
-
-    assert_fortran_mat(A, B)
-
-    if A.shape[0] != B.shape[1]:
-        raise ValueError("Expect square matrix A")
-
-    if A.shape[0] != B.shape[0] or A.shape[0] != B.shape[1]:
-        raise ValueError("Shape of B is incompatible with matrix A")
+def dtgevc(np.ndarray[np.float64_t, ndim=2] S,
+           np.ndarray[np.float64_t, ndim=2] T,
+           np.ndarray[np.float64_t, ndim=2] Q=None,
+           np.ndarray[np.float64_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.float64_t, ndim=2] vl_r, vr_r
+    cdef double *vl_r_ptr
+    cdef double *vr_r_ptr
+    cdef np.ndarray[l_logical] select_cpy
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.float64_t] work
 
-    # Allocate workspaces
+    assert_fortran_mat(S, T, Q, Z)
 
-    N = A.shape[0]
+    N = S.shape[0]
+    work = np.empty(6*N, dtype = np.float64)
 
-    cdef np.ndarray[scalar] alphar, alphai
-    if scalar in cmplx:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = None
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
     else:
-        alphar = np.empty(N, dtype=A.dtype)
-        alphai = np.empty(N, dtype=A.dtype)
+        return
 
-    cdef np.ndarray[scalar] beta = np.empty(N, dtype=A.dtype)
+    backtr = False
 
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(8 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(8 * N, dtype=np.float64)
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        # Correct for possible additional storage if a single complex
+        # eigenvalue is selected.
+        # For that: Figure out the positions of the 2x2 blocks.
+        cmplxindx = np.diagonal(S, -1).nonzero()[0]
+        for i in cmplxindx:
+            if bool(select[i]) != bool(select[i+1]):
+                MM += 1
 
-    cdef char *jobvsl
-    cdef scalar *vsl_ptr
-    cdef np.ndarray[scalar, ndim=2] vsl
-    if calc_q:
-        vsl = np.empty((N,N), dtype=A.dtype, order='F')
-        vsl_ptr = <scalar *>vsl.data
-        jobvsl = "V"
+        # select is overwritten in dtgevc
+        select_cpy = np.array(select, dtype = f_lapack.l_logical_dtype,
+                              order = 'F')
+        select_ptr = <l_logical *>select_cpy.data
     else:
-        vsl = None
-        vsl_ptr = NULL
-        jobvsl = "N"
+        MM = N
+        select_ptr = NULL
+        if ((left and right and Q is not None and Z is not None) or
+            (left and not right and Q is not None) or
+            (right and not left and Z is not None)):
+            howmny = "B"
+            backtr = True
+        else:
+            howmny = "A"
 
-    cdef char *jobvsr
-    cdef scalar *vsr_ptr
-    cdef np.ndarray[scalar, ndim=2] vsr
-    if calc_z:
-        vsr = np.empty((N,N), dtype=A.dtype, order='F')
-        vsr_ptr = <scalar *>vsr.data
-        jobvsr = "V"
+    if left:
+        if backtr:
+            vl_r = Q
+        else:
+            vl_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vl_r_ptr = <double *>vl_r.data
     else:
-        vsr = None
-        vsr_ptr = NULL
-        jobvsr = "N"
-
-    # Workspace query
-    # Xgges expects &qwork as a <scalar *> (even though it's an integer)
-    cdef l_int lwork = -1
-    cdef scalar qwork
-
-    if scalar is float:
-        lapack.sgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float *>A.data, &N,
-                     <float *>B.data, &N, &sdim,
-                     <float *>alphar.data, <float *>alphai.data,
-                     <float *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double *>A.data, &N,
-                     <double *>B.data, &N, &sdim,
-                     <double *>alphar.data, <double *>alphai.data,
-                     <double *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N, &sdim,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N, &sdim,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     &qwork, &lwork, <double *>rwork.data, NULL, &info)
-
-    assert info == 0, "Argument error in gges"
-
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=A.dtype)
-
-    # The actual calculation
-
-    if scalar is float:
-        lapack.sgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float *>A.data, &N,
-                     <float *>B.data, &N, &sdim,
-                     <float *>alphar.data, <float *>alphai.data,
-                     <float *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <float *>work.data, &lwork, NULL, &info)
-    elif scalar is double:
-        lapack.dgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double *>A.data, &N,
-                     <double *>B.data, &N, &sdim,
-                     <double *>alphar.data, <double *>alphai.data,
-                     <double *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <double *>work.data, &lwork, NULL, &info)
-    elif scalar is float_complex:
-        lapack.cgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <float complex *>A.data, &N,
-                     <float complex *>B.data, &N, &sdim,
-                     <float complex *>alphar.data, <float complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <float complex *>work.data, &lwork,
-                     <float *>rwork.data, NULL, &info)
-    elif scalar is double_complex:
-        lapack.zgges(jobvsl, jobvsr, "N", NULL,
-                     &N, <double complex *>A.data, &N,
-                     <double complex *>B.data, &N, &sdim,
-                     <double complex *>alphar.data, <double complex *>beta.data,
-                     vsl_ptr, &N, vsr_ptr, &N,
-                     <double complex *>work.data, &lwork,
-                     <double *>rwork.data, NULL, &info)
+        vl_r_ptr = NULL
 
-    if info > 0:
-        raise LinAlgError("QZ iteration failed to converge in gges")
+    if right:
+        if backtr:
+            vr_r = Z
+        else:
+            vr_r = np.empty((N, MM), dtype = np.float64, order='F')
+        vr_r_ptr = <double *>vr_r.data
+    else:
+        vr_r_ptr = NULL
 
-    assert info == 0, "Argument error in gges"
+    f_lapack.dtgevc_(side, howmny, select_ptr,
+                     &N, <double *>S.data, &N,
+                     <double *>T.data, &N,
+                     vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
+                     <double *>work.data, &info)
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray alpha = maybe_complex[scalar](0, alphar, alphai)
+    assert info == 0, "Argument error in dtgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in dtgevc"
 
-    return filter_args((True, True, calc_q, calc_z, calc_ev, calc_ev),
-                       (A, B, vsl, vsr, alpha, beta))
+    if not backtr:
+        if left:
+            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+        if right:
+            vr_r = np.asfortranarray(np.dot(Z, vr_r))
 
+    # If there are complex eigenvalues, we need to postprocess the
+    # eigenvectors.
+    if np.diagonal(S, -1).nonzero()[0].size:
+        if left:
+            vl = txevc_postprocess(np.complex128, S, vl_r, select)
+        if right:
+            vr = txevc_postprocess(np.complex128, S, vr_r, select)
+    else:
+        if left:
+            vl = vl_r
+        if right:
+            vr = vr_r
 
-def tgsen(np.ndarray[l_logical] select,
-           np.ndarray[scalar, ndim=2] S,
-           np.ndarray[scalar, ndim=2] T,
-           np.ndarray[scalar, ndim=2] Q,
-           np.ndarray[scalar, ndim=2] Z,
-           calc_ev=True):
-    cdef l_int ijob = 0
-    cdef l_int N, M, lwork, liwork, info
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
 
-    # Check parameters
 
-    if ((S.shape[0] != S.shape[1] or T.shape[0] != T.shape[1] or
-         S.shape[0] != T.shape[0]) or
-        (Q is not None and (Q.shape[0] != Q.shape[1] or
-                            S.shape[0] != Q.shape[0])) or
-        (Z is not None and (Z.shape[0] != Z.shape[1] or
-                            S.shape[0] != Z.shape[0]))):
-        raise ValueError("Invalid Schur decomposition as input")
+def ctgevc(np.ndarray[np.complex64_t, ndim=2] S,
+           np.ndarray[np.complex64_t, ndim=2] T,
+           np.ndarray[np.complex64_t, ndim=2] Q=None,
+           np.ndarray[np.complex64_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
+    cdef l_int N, info, M, MM
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex64_t, ndim=2] vl, vr
+    cdef float complex *vl_ptr
+    cdef float complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex64_t] work
+    cdef np.ndarray[np.float32_t] rwork
 
     assert_fortran_mat(S, T, Q, Z)
 
-    # Allocate workspaces
-
     N = S.shape[0]
+    work = np.empty(2*N, dtype = np.complex64)
+    rwork = np.empty(2*N, dtype = np.float32)
 
-    cdef np.ndarray[scalar] alphar, alphai
-    if scalar in cmplx:
-        alphar = np.empty(N, dtype=S.dtype)
-        alphai = None
+    if left and right:
+        side = "B"
+    elif left:
+        side = "L"
+    elif right:
+        side = "R"
     else:
-        alphar = np.empty(N, dtype=S.dtype)
-        alphai = np.empty(N, dtype=S.dtype)
+        return
 
-    cdef np.ndarray[scalar] beta
-    beta = np.empty(N, dtype=S.dtype)
+    backtr = False
 
-    cdef l_logical wantq
-    cdef scalar *q_ptr
-    if Q is not None:
-        wantq = 1
-        q_ptr = <scalar *>Q.data
+    if select is not None:
+        howmny = "S"
+        MM = select.nonzero()[0].size
+        select_ptr = <l_logical *>select.data
     else:
-        wantq = 0
-        q_ptr = NULL
+        MM = N
+        select_ptr = NULL
+        if ((left and right and Q is not None and Z is not None) or
+            (left and not right and Q is not None) or
+            (right and not left and Z is not None)):
+            howmny = "B"
+            backtr = True
+        else:
+            howmny = "A"
 
-    cdef l_logical wantz
-    cdef scalar *z_ptr
-    if Z is not None:
-        wantz = 1
-        z_ptr = <scalar *>Z.data
+    if left:
+        if backtr:
+            vl = Q
+        else:
+            vl = np.empty((N, MM), dtype = np.complex64, order='F')
+        vl_ptr = <float complex *>vl.data
     else:
-        wantz = 0
-        z_ptr = NULL
-
-    # Workspace query
-    # Xtgsen expects &qwork as a <scalar *> (even though it's an integer)
-    lwork = -1
-    liwork = -1
-    cdef scalar qwork
-    cdef l_int qiwork
-
-    if scalar is float:
-        lapack.stgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float *>S.data, &N,
-                      <float *>T.data, &N,
-                      <float *>alphar.data, <float *>alphai.data,
-                      <float *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is double:
-        lapack.dtgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double *>S.data, &N,
-                      <double *>T.data, &N,
-                      <double *>alphar.data, <double *>alphai.data,
-                      <double *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float complex *>S.data, &N,
-                      <float complex *>T.data, &N,
-                      <float complex *>alphar.data, <float complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-    elif scalar is double_complex:
-        lapack.ztgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double complex *>S.data, &N,
-                      <double complex *>T.data, &N,
-                      <double complex *>alphar.data, <double complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      &qwork, &lwork, &qiwork, &liwork, &info)
-
-    assert info == 0, "Argument error in tgsen"
-
-    lwork = lwork_from_qwork(qwork)
-    cdef np.ndarray[scalar] work = np.empty(lwork, dtype=S.dtype)
+        vl_ptr = NULL
 
-    liwork = qiwork
-    cdef np.ndarray[l_int] iwork = np.empty(liwork, dtype=int_dtype)
-
-    # The actual calculation
-
-    if scalar is float:
-        lapack.stgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float *>S.data, &N,
-                      <float *>T.data, &N,
-                      <float *>alphar.data, <float *>alphai.data,
-                      <float *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <float *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is double:
-        lapack.dtgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double *>S.data, &N,
-                      <double *>T.data, &N,
-                      <double *>alphar.data, <double *>alphai.data,
-                      <double *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <double *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is float_complex:
-        lapack.ctgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <float complex *>S.data, &N,
-                      <float complex *>T.data, &N,
-                      <float complex *>alphar.data, <float complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <float complex *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
-    elif scalar is double_complex:
-        lapack.ztgsen(&ijob, &wantq, &wantz, <l_logical *>select.data,
-                      &N, <double complex *>S.data, &N,
-                      <double complex *>T.data, &N,
-                      <double complex *>alphar.data, <double complex *>beta.data,
-                      q_ptr, &N, z_ptr, &N, &M, NULL, NULL, NULL,
-                      <double complex *>work.data, &lwork,
-                      <l_int *>iwork.data, &liwork, &info)
+    if right:
+        if backtr:
+            vr = Z
+        else:
+            vr = np.empty((N, MM), dtype = np.complex64, order='F')
+        vr_ptr = <float complex *>vr.data
+    else:
+        vr_ptr = NULL
 
-    if info > 0:
-        raise LinAlgError("Reordering failed; problem is very ill-conditioned")
+    f_lapack.ctgevc_(side, howmny, select_ptr,
+                     &N, <float complex *>S.data, &N,
+                     <float complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <float complex *>work.data, <float *>rwork.data, &info)
 
-    assert info == 0, "Argument error in tgsen"
+    assert info == 0, "Argument error in ctgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ctgevc"
 
-    # Real inputs possibly produce complex output
-    cdef np.ndarray alpha = maybe_complex[scalar](0, alphar, alphai)
+    if not backtr:
+        if left:
+            vl = np.asfortranarray(np.dot(Q, vl))
+        if right:
+            vr = np.asfortranarray(np.dot(Z, vr))
 
-    return filter_args((True, True, Q is not None, Z is not None,
-                        calc_ev, calc_ev),
-                       (S, T, Q, Z, alpha, beta))
+    if left and right:
+        return (vl, vr)
+    elif left:
+        return vl
+    else:
+        return vr
 
 
-def tgevc(np.ndarray[scalar, ndim=2] S,
-          np.ndarray[scalar, ndim=2] T,
-          np.ndarray[scalar, ndim=2] Q,
-          np.ndarray[scalar, ndim=2] Z,
-          np.ndarray[l_logical] select,
-          left=False, right=True):
+def ztgevc(np.ndarray[np.complex128_t, ndim=2] S,
+           np.ndarray[np.complex128_t, ndim=2] T,
+           np.ndarray[np.complex128_t, ndim=2] Q=None,
+           np.ndarray[np.complex128_t, ndim=2] Z=None,
+           np.ndarray[l_logical] select=None,
+           left=False, right=True):
     cdef l_int N, info, M, MM
-
-    # Check parameters
-
-    if ((S.shape[0] != S.shape[1] or T.shape[0] != T.shape[1] or
-         S.shape[0] != T.shape[0]) or
-        (Q is not None and (Q.shape[0] != Q.shape[1] or
-                            S.shape[0] != Q.shape[0])) or
-        (Z is not None and (Z.shape[0] != Z.shape[1] or
-                            S.shape[0] != Z.shape[0]))):
-        raise ValueError("Invalid Schur decomposition as input")
+    cdef char *side
+    cdef char *howmny
+    cdef np.ndarray[np.complex128_t, ndim=2] vl, vr
+    cdef double complex *vl_ptr
+    cdef double complex *vr_ptr
+    cdef l_logical *select_ptr
+    cdef np.ndarray[np.complex128_t] work
+    cdef np.ndarray[np.float64_t] rwork
 
     assert_fortran_mat(S, T, Q, Z)
 
-    # Allocate workspaces
-
     N = S.shape[0]
+    work = np.empty(2*N, dtype = np.complex128)
+    rwork = np.empty(2*N, dtype = np.float64)
 
-    cdef np.ndarray[scalar] work
-    if scalar in floating:
-        work = np.empty(6 * N, dtype=S.dtype)
-    else:
-        work = np.empty(2 * N, dtype=S.dtype)
-
-    cdef np.ndarray rwork = None
-    if scalar is float_complex:
-        rwork = np.empty(2 * N, dtype=np.float32)
-    elif scalar is double_complex:
-        rwork = np.empty(2 * N, dtype=np.float64)
-
-    cdef char *side
     if left and right:
         side = "B"
     elif left:
@@ -1142,26 +2402,12 @@ def tgevc(np.ndarray[scalar, ndim=2] S,
     else:
         return
 
-    cdef l_logical backtr = False
+    backtr = False
 
-    cdef char *howmny
-    cdef np.ndarray[l_logical] select_cpy = None
-    cdef l_logical *select_ptr
     if select is not None:
         howmny = "S"
         MM = select.nonzero()[0].size
-        # Correct for possible additional storage if a single complex
-        # eigenvalue is selected.
-        # For that: Figure out the positions of the 2x2 blocks.
-        cmplxindx = np.diagonal(S, -1).nonzero()[0]
-        for i in cmplxindx:
-            if bool(select[i]) != bool(select[i+1]):
-                MM += 1
-
-        # select is overwritten in tgevc
-        select_cpy = np.array(select, dtype=logical_dtype,
-                              order = 'F')
-        select_ptr = <l_logical *>select_cpy.data
+        select_ptr = <l_logical *>select.data
     else:
         MM = N
         select_ptr = NULL
@@ -1173,78 +2419,38 @@ def tgevc(np.ndarray[scalar, ndim=2] S,
         else:
             howmny = "A"
 
-    cdef np.ndarray[scalar, ndim=2] vl_r
-    cdef scalar *vl_r_ptr
     if left:
         if backtr:
-            vl_r = Q
+            vl = Q
         else:
-            vl_r = np.empty((N, MM), dtype=S.dtype, order='F')
-        vl_r_ptr = <scalar *>vl_r.data
+            vl = np.empty((N, MM), dtype = np.complex128, order='F')
+        vl_ptr = <double complex *>vl.data
     else:
-        vl_r_ptr = NULL
+        vl_ptr = NULL
 
-    cdef np.ndarray[scalar, ndim=2] vr_r
-    cdef scalar *vr_r_ptr
     if right:
         if backtr:
-            vr_r = Z
+            vr = Z
         else:
-            vr_r = np.empty((N, MM), dtype=S.dtype, order='F')
-        vr_r_ptr = <scalar *>vr_r.data
+            vr = np.empty((N, MM), dtype = np.complex128, order='F')
+        vr_ptr = <double complex *>vr.data
     else:
-        vr_r_ptr = NULL
+        vr_ptr = NULL
 
-    if scalar is float:
-        lapack.stgevc(side, howmny, select_ptr,
-                      &N, <float *>S.data, &N,
-                      <float *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float *>work.data, &info)
-    elif scalar is double:
-        lapack.dtgevc(side, howmny, select_ptr,
-                      &N, <double *>S.data, &N,
-                      <double *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double *>work.data, &info)
-    elif scalar is float_complex:
-        lapack.ctgevc(side, howmny, select_ptr,
-                      &N, <float complex *>S.data, &N,
-                      <float complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <float complex *>work.data, <float *>rwork.data, &info)
-    elif scalar is double_complex:
-        lapack.ztgevc(side, howmny, select_ptr,
-                      &N, <double complex *>S.data, &N,
-                      <double complex *>T.data, &N,
-                      vl_r_ptr, &N, vr_r_ptr, &N, &MM, &M,
-                      <double complex *>work.data, <double *>rwork.data, &info)
-
-    assert info == 0, "Argument error in tgevc"
-    assert MM == M, "Unexpected number of eigenvectors returned in tgevc"
+    f_lapack.ztgevc_(side, howmny, select_ptr,
+                     &N, <double complex *>S.data, &N,
+                     <double complex *>T.data, &N,
+                     vl_ptr, &N, vr_ptr, &N, &MM, &M,
+                     <double complex *>work.data, <double *>rwork.data, &info)
+
+    assert info == 0, "Argument error in ztgevc"
+    assert MM == M, "Unexpected number of eigenvectors returned in ztgevc"
 
     if not backtr:
         if left:
-            vl_r = np.asfortranarray(np.dot(Q, vl_r))
+            vl = np.asfortranarray(np.dot(Q, vl))
         if right:
-            vr_r = np.asfortranarray(np.dot(Z, vr_r))
-
-    # If there are complex eigenvalues, we need to postprocess the eigenvectors
-    cdef np.ndarray vl, vr
-    if left:
-        vl = vl_r
-    if right:
-        vr = vr_r
-    if scalar in floating:
-        if scalar is float:
-            dtype = np.complex64
-        else:
-            dtype = np.complex128
-        if np.diagonal(S, -1).nonzero()[0].size:
-            if left:
-                vl = txevc_postprocess(dtype, S, vl_r, select)
-            if right:
-                vr = txevc_postprocess(dtype, S, vr_r, select)
+            vr = np.asfortranarray(np.dot(Z, vr))
 
     if left and right:
         return (vl, vr)
@@ -1274,7 +2480,8 @@ def prepare_for_lapack(overwrite, *args):
     If an argument is ``None``, it is just passed through and not used to
     determine the proper LAPACK type.
 
-    Returns a list of properly converted arrays.
+    `prepare_for_lapack` returns a character indicating the proper LAPACK data
+    type ('s', 'd', 'c', 'z') and a list of properly converted arrays.
     """
 
     # Make sure we have NumPy arrays
@@ -1295,10 +2502,18 @@ def prepare_for_lapack(overwrite, *args):
     #       kind.
     dtype = np.common_type(*[arr for arr, ovwrt in mats if arr is not None])
 
-    if dtype not in (np.float32, np.float64, np.complex64, np.complex128):
+    if dtype == np.float32:
+        lapacktype = 's'
+    elif dtype == np.float64:
+        lapacktype = 'd'
+    elif dtype == np.complex64:
+        lapacktype = 'c'
+    elif dtype == np.complex128:
+        lapacktype = 'z'
+    else:
         raise AssertionError("Unexpected data type from common_type")
 
-    ret = []
+    ret = [ lapacktype ]
     for npmat, ovwrt in mats:
         # Now make sure that the array is contiguous, and copy if necessary.
         if npmat is not None:
@@ -1323,7 +2538,4 @@ def prepare_for_lapack(overwrite, *args):
 
         ret.append(npmat)
 
-    if len(ret) == 1:
-        return ret[0]
-    else:
-        return tuple(ret)
+    return tuple(ret)
diff --git a/setup.py b/setup.py
index 04d92ea7a23deb14abd64ef07aa4cc7cdb7ddfd4..197505db81f8a782506519d5a4f70687f713aa84 100755
--- a/setup.py
+++ b/setup.py
@@ -372,9 +372,8 @@ def search_mumps():
         # Conda (via conda-forge).
         # TODO: remove dependency libs (scotch, metis...) when conda-forge
         # packaged mumps/scotch are built as properly linked shared libs
-        # 'openblas' provides Lapack and BLAS symbols
         ['zmumps', 'mumps_common', 'metis', 'esmumps', 'scotch',
-         'scotcherr', 'mpiseq', 'openblas'],
+         'scotcherr', 'mpiseq'],
     ]
     common_libs = ['pord', 'gfortran']
 
@@ -385,7 +384,34 @@ def search_mumps():
     return []
 
 
+def search_lapack():
+    """Return the BLAS variant that is installed."""
+    lib_sets = [
+        # Debian
+        ['blas', 'lapack'],
+        # Conda (via conda-forge). Openblas contains lapack symbols
+        ['openblas', 'gfortran'],
+    ]
+
+    for libs in lib_sets:
+        found_libs = search_libs(libs)
+        if found_libs:
+            return found_libs
+
+    print('Error: BLAS/LAPACK are required but were not found.',
+          file=sys.stderr)
+    sys.exit(1)
+
+
 def configure_special_extensions(exts, build_summary):
+    #### Special config for LAPACK.
+    lapack = exts['kwant.linalg.lapack']
+    if 'libraries' in lapack:
+        build_summary.append('User-configured LAPACK and BLAS')
+    else:
+        lapack['libraries'] = search_lapack()
+        build_summary.append('Default LAPACK and BLAS')
+
     #### Special config for MUMPS.
     mumps = exts['kwant.linalg._mumps']
     if 'libraries' in mumps:
@@ -400,6 +426,12 @@ def configure_special_extensions(exts, build_summary):
             del exts['kwant.linalg._mumps']
             build_summary.append('No MUMPS support')
 
+    if mumps:
+        # Copy config from LAPACK.
+        for key, value in lapack.items():
+            if key not in ['sources', 'depends']:
+                mumps.setdefault(key, []).extend(value)
+
     return exts
 
 
@@ -518,7 +550,8 @@ def main():
                        'kwant/graph/c_slicer/partitioner.h',
                        'kwant/graph/c_slicer/slicer.h'])),
         ('kwant.linalg.lapack',
-         dict(sources=['kwant/linalg/lapack.pyx'])),
+         dict(sources=['kwant/linalg/lapack.pyx'],
+              depends=['kwant/linalg/f_lapack.pxd'])),
         ('kwant.linalg._mumps',
          dict(sources=['kwant/linalg/_mumps.pyx'],
               depends=['kwant/linalg/cmumps.pxd']))])
@@ -534,7 +567,8 @@ def main():
         for ext in exts.values():
             ext.setdefault('include_dirs', []).append(numpy_include)
 
-    aliases = [('mumps', 'kwant.linalg._mumps')]
+    aliases = [('lapack', 'kwant.linalg.lapack'),
+               ('mumps', 'kwant.linalg._mumps')]
 
     init_cython()