diff --git a/kwant/_system.pyx b/kwant/_system.pyx
index beebd9465866dd61532b1f8d92ef411a9ee07555..4deb6bc3823d281ca791688fa6fed8c6552e6452 100644
--- a/kwant/_system.pyx
+++ b/kwant/_system.pyx
@@ -38,7 +38,7 @@ def make_sparse(ham, args, CGraph gr, diag,
 
     # Calculate the data size.
     num_entries = 0
-    for n_fs in xrange(len(from_sites)):
+    for n_fs in range(len(from_sites)):
         fs = from_sites[n_fs]
         if fs in n_by_to_site:
             num_entries += from_norb[n_fs] * from_norb[n_fs]
@@ -52,15 +52,15 @@ def make_sparse(ham, args, CGraph gr, diag,
     data = np.empty(num_entries, complex)
 
     cdef gint k = 0
-    for n_fs in xrange(len(from_sites)):
+    for n_fs in range(len(from_sites)):
         fs = from_sites[n_fs]
         if fs in n_by_to_site:
             n_ts = n_by_to_site[fs]
             h = diag[n_fs]
             if not (h.shape[0] == h.shape[1] == from_norb[n_fs]):
                 raise ValueError(msg.format(fs, fs))
-            for i in xrange(h.shape[0]):
-                for j in xrange(h.shape[1]):
+            for i in range(h.shape[0]):
+                for j in range(h.shape[1]):
                     value = h[i, j]
                     if value != 0:
                         data[k] = value
@@ -76,8 +76,8 @@ def make_sparse(ham, args, CGraph gr, diag,
             h = matrix(ham(ts, fs, *args), complex)
             if h.shape[0] != to_norb[n_ts] or h.shape[1] != from_norb[n_fs]:
                 raise ValueError(msg.format(fs, ts))
-            for i in xrange(h.shape[0]):
-                for j in xrange(h.shape[1]):
+            for i in range(h.shape[0]):
+                for j in range(h.shape[1]):
                     value = h[i, j]
                     if value != 0:
                         data[k] = value
@@ -114,7 +114,7 @@ def make_sparse_full(ham, args, CGraph gr, diag,
 
     # Calculate the data size.
     num_entries = 0
-    for fs in xrange(n):
+    for fs in range(n):
         num_entries += from_norb[fs] * from_norb[fs]
         nbors = gr.out_neighbors(fs)
         for ts in nbors.data[:nbors.size]:
@@ -125,12 +125,12 @@ def make_sparse_full(ham, args, CGraph gr, diag,
     data = np.empty(num_entries, complex)
 
     cdef gint k = 0
-    for fs in xrange(n):
+    for fs in range(n):
         h = diag[fs]
         if not (h.shape[0] == h.shape[1] == from_norb[fs]):
             raise ValueError(msg.format(fs, fs))
-        for i in xrange(h.shape[0]):
-            for j in xrange(h.shape[1]):
+        for i in range(h.shape[0]):
+            for j in range(h.shape[1]):
                 value = h[i, j]
                 if value != 0:
                     data[k] = value
@@ -145,8 +145,8 @@ def make_sparse_full(ham, args, CGraph gr, diag,
             h = matrix(ham(ts, fs, *args), complex)
             if h.shape[0] != to_norb[ts] or h.shape[1] != from_norb[fs]:
                 raise ValueError(msg.format(fs, ts))
-            for i in xrange(h.shape[0]):
-                for j in xrange(h.shape[1]):
+            for i in range(h.shape[0]):
+                for j in range(h.shape[1]):
                     value = h[i, j]
                     if value != 0:
                         data[k] = value
@@ -181,7 +181,7 @@ def make_dense(ham, args, CGraph gr, diag,
 
     h_sub = np.zeros((to_off[-1], from_off[-1]), complex)
     h_sub_view = h_sub
-    for n_fs in xrange(len(from_sites)):
+    for n_fs in range(len(from_sites)):
         fs = from_sites[n_fs]
         if fs in n_by_to_site:
             n_ts = n_by_to_site[fs]
@@ -218,7 +218,7 @@ def make_dense_full(ham, args, CGraph gr, diag,
 
     h_sub = np.zeros((to_off[-1], from_off[-1]), complex)
     h_sub_view = h_sub
-    for fs in xrange(n):
+    for fs in range(n):
         h = diag[fs]
         if not (h.shape[0] ==  h.shape[1] == from_norb[fs]):
             raise ValueError(msg.format(fs, fs))
@@ -284,7 +284,7 @@ def hamiltonian_submatrix(self, args=(), to_sites=None, from_sites=None,
     if from_sites is None:
         diag = n * [None]
         from_norb = np.empty(n, gint_dtype)
-        for site in xrange(n):
+        for site in range(n):
             diag[site] = h = matrix(ham(site, site, *args), complex)
             from_norb[site] = h.shape[0]
     else:
@@ -306,7 +306,7 @@ def hamiltonian_submatrix(self, args=(), to_sites=None, from_sites=None,
     else:
         if to_sites is None:
             to_norb = np.empty(n, gint_dtype)
-            for site in xrange(n):
+            for site in range(n):
                 h = matrix(ham(site, site, *args), complex)
                 to_norb[site] = h.shape[0]
         else:
diff --git a/kwant/graph/c_slicer.pxd b/kwant/graph/c_slicer.pxd
index 9e692a55bc68e3328a606270c104cedec877779f..d43128e60972fc6b8085cdc4376b3c3153dc6972 100644
--- a/kwant/graph/c_slicer.pxd
+++ b/kwant/graph/c_slicer.pxd
@@ -6,7 +6,7 @@
 # the file AUTHORS.rst at the top-level directory of this distribution and at
 # http://kwant-project.org/authors.
 
-from defs cimport gint
+from .defs cimport gint
 
 cdef extern from "c_slicer/slicer.h":
     struct Slicing:
diff --git a/kwant/graph/core.pyx b/kwant/graph/core.pyx
index c60d5165659d02c21eecbfd72eca1992ab616216..18cddb2ba1ee1f1660be41d6be6d88d6045909dd 100644
--- a/kwant/graph/core.pyx
+++ b/kwant/graph/core.pyx
@@ -384,7 +384,7 @@ cdef class CGraph:
         """
         if node < 0 or node >= self.num_nodes:
             raise NodeDoesNotExistError()
-        return iter(xrange(self.heads_idxs[node], self.heads_idxs[node + 1]))
+        return iter(range(self.heads_idxs[node], self.heads_idxs[node + 1]))
 
     def in_neighbors(self, gint node):
         """Return the nodes which point to a node.
@@ -527,15 +527,15 @@ cdef class CGraph:
         if tail >= self.num_nodes or head >= self.num_nodes:
             raise NodeDoesNotExistError()
         if tail >= 0:
-            for head_index in xrange(self.heads_idxs[tail],
-                                     self.heads_idxs[tail + 1]):
+            for head_index in range(self.heads_idxs[tail],
+                                    self.heads_idxs[tail + 1]):
                 if self.heads[head_index] == head:
                     return head_index
         else:
             if not self.twoway:
                 raise DisabledFeatureError(_need_twoway)
-            for tail_index in xrange(self.tails_idxs[head],
-                                     self.tails_idxs[head + 1]):
+            for tail_index in range(self.tails_idxs[head],
+                                    self.tails_idxs[head + 1]):
                 if self.tails[tail_index] == tail:
                     return self.edge_ids[tail_index]
         raise EdgeDoesNotExistError()
@@ -564,15 +564,15 @@ cdef class CGraph:
             raise NodeDoesNotExistError()
         result = []
         if tail >= 0:
-            for head_index in xrange(self.heads_idxs[tail],
-                                     self.heads_idxs[tail + 1]):
+            for head_index in range(self.heads_idxs[tail],
+                                    self.heads_idxs[tail + 1]):
                 if self.heads[head_index] == head:
                     result.append(head_index)
         else:
             if not self.twoway:
                 raise DisabledFeatureError(_need_twoway)
-            for tail_index in xrange(self.tails_idxs[head],
-                                     self.tails_idxs[head + 1]):
+            for tail_index in range(self.tails_idxs[head],
+                                    self.tails_idxs[head + 1]):
                 if self.tails[tail_index] == tail:
                     result.append(self.edge_ids[tail_index])
         return result
diff --git a/kwant/graph/slicer.pyx b/kwant/graph/slicer.pyx
index 0ddcd48f014d76a9d084ebba6a215d1b61dcbe9b..07458361189982a0ff1458d747fc9efc8d573e4d 100644
--- a/kwant/graph/slicer.pyx
+++ b/kwant/graph/slicer.pyx
@@ -14,9 +14,7 @@ from .defs cimport gint
 from .defs import gint_dtype
 from .core cimport CGraph
 
-cimport c_slicer
-# TODO: Once cythonize() no longer warns about it, replace the above be
-# from . cimport c_slicer
+from . cimport c_slicer
 
 __all__ = ['slice']
 
@@ -44,7 +42,7 @@ def slice(CGraph graph, left, right):
     # slicing only possible if there is no overlap between
     # left and right slices
     if np.intersect1d(rightarr, leftarr, assume_unique=True).size:
-        return [tuple(xrange(graph.num_nodes))]
+        return [tuple(range(graph.num_nodes))]
 
     slicing = c_slicer.slice(graph.num_nodes,
                              graph.heads_idxs,
@@ -52,7 +50,7 @@ def slice(CGraph graph, left, right):
                              leftarr.size, <gint *>leftarr.data,
                              rightarr.size, <gint *>rightarr.data)
     slices = []
-    for i in xrange(slicing.nslices):
+    for i in range(slicing.nslices):
         slc_size = slicing.slice_ptr[i+1] - slicing.slice_ptr[i]
         slc = np.empty(slc_size, dtype=gint_dtype)
         memcpy(slc.data, slicing.slices + slicing.slice_ptr[i],
diff --git a/kwant/graph/utils.pyx b/kwant/graph/utils.pyx
index 60fd3b9cfa7e4ff45c3ea373429c867a30c7a57d..1ecb14319919b604404271a334608cfd008753cf 100644
--- a/kwant/graph/utils.pyx
+++ b/kwant/graph/utils.pyx
@@ -73,19 +73,19 @@ def make_undirected(CGraph gr, remove_dups=True, calc_weights=False):
     # additional buffer array.
     cdef gint *buffer = ret.heads_idxs + 1
 
-    for i in xrange(gr.num_nodes):
-        for p in xrange(gr.heads_idxs[i], gr.heads_idxs[i+1]):
+    for i in range(gr.num_nodes):
+        for p in range(gr.heads_idxs[i], gr.heads_idxs[i+1]):
             if gr.heads[p] >= 0:
                 buffer[i] += 1
                 buffer[gr.heads[p]] += 1
 
     cdef gint s = 0
-    for i in xrange(ret.num_nodes):
+    for i in range(ret.num_nodes):
         s += buffer[i]
         buffer[i] = s - buffer[i]
 
-    for i in xrange(gr.num_nodes):
-        for p in xrange(gr.heads_idxs[i], gr.heads_idxs[i+1]):
+    for i in range(gr.num_nodes):
+        for p in range(gr.heads_idxs[i], gr.heads_idxs[i+1]):
             j = gr.heads[p]
             if j >= 0:
                 ret.heads[buffer[i]] = j
@@ -150,10 +150,10 @@ def remove_duplicates(CGraph gr, np.ndarray[gint, ndim=1] edge_weights=None):
 
     nnz=0
 
-    for i in xrange(gr.num_nodes):
+    for i in range(gr.num_nodes):
         q = nnz
 
-        for p in xrange(gr.heads_idxs[i], gr.heads_idxs[i+1]):
+        for p in range(gr.heads_idxs[i], gr.heads_idxs[i+1]):
             j = gr.heads[p]
 
             # Check if we have found a previous entry (i,j).  (In this case w
@@ -233,7 +233,7 @@ def induced_subgraph(CGraph gr, select,
     else:
         selecttab = select(np.arange(gr.num_nodes, dtype=gint_dtype))
 
-    for i in xrange(gr.num_nodes):
+    for i in range(gr.num_nodes):
         if selecttab[i]:
             indextab[i] = sub_num_nodes
             sub_num_nodes += 1
@@ -241,9 +241,9 @@ def induced_subgraph(CGraph gr, select,
     # Now count the number of new edges.
     sub_num_edges = 0
 
-    for i in xrange(gr.num_nodes):
+    for i in range(gr.num_nodes):
         if indextab[i] > -1:
-            for iedge in xrange(gr.heads_idxs[i], gr.heads_idxs[i + 1]):
+            for iedge in range(gr.heads_idxs[i], gr.heads_idxs[i + 1]):
                 if indextab[gr.heads[iedge]] > -1:
                     sub_num_edges += 1
 
@@ -256,10 +256,10 @@ def induced_subgraph(CGraph gr, select,
     # Now fill the new edge array.
     edge_count = 0
 
-    for i in xrange(gr.num_nodes):
+    for i in range(gr.num_nodes):
         if indextab[i]>-1:
             subgr.heads_idxs[indextab[i]] = edge_count
-            for iedge in xrange(gr.heads_idxs[i], gr.heads_idxs[i+1]):
+            for iedge in range(gr.heads_idxs[i], gr.heads_idxs[i+1]):
                 if indextab[gr.heads[iedge]] > -1:
                     subgr.heads[edge_count] = indextab[gr.heads[iedge]]
                     if edge_weights is not None:
@@ -276,8 +276,5 @@ def induced_subgraph(CGraph gr, select,
 
 
 def print_graph(gr):
-    for i in xrange(gr.num_nodes):
-        print i," -> ",
-        for j in gr.out_neighbors(i):
-            print j,
-        print
+    for i in range(gr.num_nodes):
+        print(i, "->", *gr.out_neighbors(i))
diff --git a/kwant/linalg/_mumps.pyx b/kwant/linalg/_mumps.pyx
index 6d5aad67c2c53de2a68b0ae248a931140f21931f..68b59aa053e04373ae2c706c609ede7e740eff8a 100644
--- a/kwant/linalg/_mumps.pyx
+++ b/kwant/linalg/_mumps.pyx
@@ -8,9 +8,9 @@
 
 cimport numpy as np
 import numpy as np
-cimport cmumps
-import cmumps
-from fortran_helpers import assert_fortran_matvec, assert_fortran_mat
+from . cimport cmumps
+from . import cmumps
+from .fortran_helpers import assert_fortran_matvec, assert_fortran_mat
 
 int_dtype = cmumps.int_dtype
 
diff --git a/kwant/linalg/lapack.pyx b/kwant/linalg/lapack.pyx
index da74fbc51b1fc457a21247129c12d07abcc31012..d6699a0e11ea912506d417d1e1ab59270c3270a3 100644
--- a/kwant/linalg/lapack.pyx
+++ b/kwant/linalg/lapack.pyx
@@ -25,9 +25,9 @@ cimport numpy as np
 
 # Strangely absolute imports from kwant.linalg.f_lapack don't work here.  So
 # continue using traditional implicit relative imports.
-import f_lapack
-cimport f_lapack
-from f_lapack cimport l_int, l_logical
+from . import f_lapack
+from . cimport f_lapack
+from .f_lapack cimport l_int, l_logical
 
 int_dtype = f_lapack.l_int_dtype
 logical_dtype = f_lapack.l_logical_dtype
@@ -1070,7 +1070,7 @@ def txevc_postprocess(dtype, T, vreal, np.ndarray[l_logical] select):
     v = np.empty((N, M), dtype = dtype, order='F')
 
     indx = 0
-    for m in xrange(M):
+    for m in range(M):
         k = selindx[m]
 
         if k < N-1 and T[k+1,k]:
@@ -2486,7 +2486,7 @@ def prepare_for_lapack(overwrite, *args):
 
     # Make sure we have NumPy arrays
     mats = [None]*len(args)
-    for i in xrange(len(args)):
+    for i in range(len(args)):
         if args[i] is not None:
             arr = np.asanyarray(args[i])
             if not np.issubdtype(arr.dtype, np.number):
diff --git a/setup.py b/setup.py
index 2d14a51675cd70641d6fe87a7ef2e806a6282b02..d12e1773bbb2c36a7f492479824e19e88fc4228a 100755
--- a/setup.py
+++ b/setup.py
@@ -374,7 +374,7 @@ def ext_modules(extensions):
     """
     if use_cython and cython_version >= REQUIRED_CYTHON_VERSION:
         return cythonize([Extension(*args, **kwrds)
-                          for args, kwrds in extensions])
+                          for args, kwrds in extensions], language_level=3)
 
     # Cython is not going to be run: replace pyx extension by that of
     # the shipped translated file.