diff --git a/doc/source/images/tutorial3b.py b/doc/source/images/tutorial3b.py
index dff11c641d2810d5c705c78b2c81c7a567c3f66a..c219a0f30bafb2fe975ac8ec3cc02c4450262502 100644
--- a/doc/source/images/tutorial3b.py
+++ b/doc/source/images/tutorial3b.py
@@ -68,7 +68,7 @@ for iB in xrange(100):
     B = iB * 0.002
 
 # Obtain the Hamiltonian as a dense matrix
-    ham_mat = fsys.hamiltonian_submatrix()
+    ham_mat = fsys.hamiltonian_submatrix()[0]
 
     ev = la.eigh(ham_mat, eigvals_only=True)
 
diff --git a/doc/source/whatsnew/0.1.rst b/doc/source/whatsnew/0.1.rst
index 747914dd02aeb9e6f7ae1ce8b16dbf275b881915..6d331008501c2c0e9aeb559ed56687b3e25a4635 100644
--- a/doc/source/whatsnew/0.1.rst
+++ b/doc/source/whatsnew/0.1.rst
@@ -2,3 +2,10 @@ What's New in kwant 0.2
 =======================
 
 This article explains the user-visible changes in kwant 0.2.
+
+
+Return value of `~kwant.system.System.hamiltonian_submatrix`
+------------------------------------------------------------
+
+`~kwant.system.System.hamiltonian_submatrix` now always returns a 3-tuple
+``hamiltonian_part, to_norb, from_norb``.
diff --git a/examples/tutorial3b.py b/examples/tutorial3b.py
index 11240f01585c20cf7ca02add85ee538e94cd95bf..08f317fe0a262565f4724a7986bde076bd2057e1 100644
--- a/examples/tutorial3b.py
+++ b/examples/tutorial3b.py
@@ -61,7 +61,7 @@ def plot_spectrum(fsys, Bfields):
         B = Bfield
 
         # Obtain the Hamiltonian as a dense matrix
-        ham_mat = fsys.hamiltonian_submatrix()
+        ham_mat = fsys.hamiltonian_submatrix()[0]
 
         ev = la.eigh(ham_mat, eigvals_only=True)
 
diff --git a/kwant/builder.py b/kwant/builder.py
index b8c859754151f4f557109b5a0620876a8c649532..07b60af6034d2b1c55221b5e6c492e8425503528 100644
--- a/kwant/builder.py
+++ b/kwant/builder.py
@@ -1358,9 +1358,13 @@ class Builder(object):
 
 ################ Finalized systems
 
-class System(system.System):
-    """Finalized Builder."""
 
+class FiniteSystem(system.FiniteSystem):
+    """
+    Finalized `Builder` with leads.
+
+    Usable as input for the solvers in `kwant.solvers`.
+    """
     def hamiltonian(self, i, j):
         if i == j:
             value = self.onsite_hamiltonians[i]
@@ -1391,14 +1395,35 @@ class System(system.System):
         return self.site(i).pos
 
 
-class FiniteSystem(System, system.FiniteSystem):
-    """
-    Finalized `Builder` with leads.
-
-    Usable as input for the solvers in `kwant.solvers`.
-    """
-    pass
+class InfiniteSystem(system.InfiniteSystem):
+    """Finalized infinite system, extracted from a `Builder`."""
+    def hamiltonian(self, i, j):
+        if i == j:
+            if i >= self.slice_size:
+                i -= self.slice_size
+            value = self.onsite_hamiltonians[i]
+            if hasattr(value, '__call__'):
+                value = value(self.symmetry.to_fd(self.site(i)))
+            return value
+        else:
+            edge_id = self.graph.first_edge_id(i, j)
+            value = self.hoppings[edge_id]
+            conj = value is other
+            if conj:
+                i, j = j, i
+                edge_id = self.graph.first_edge_id(i, j)
+                value = self.hoppings[edge_id]
+            if hasattr(value, '__call__'):
+                site_i = self.site(i)
+                site_j = self.site(j)
+                value = value(*self.symmetry.to_fd(site_i, site_j))
+            if conj:
+                value = herm_conj(value)
+            return value
 
+    def site(self, i):
+        a, b = self.psites_idxs[i : i + 2]
+        return unpack(self.psites[a : b], self.group_by_pgid)
 
-class InfiniteSystem(System, system.InfiniteSystem):
-    """Finalized infinite system, extracted from a `Builder`."""
+    def pos(self, i):
+        return self.site(i).pos
diff --git a/kwant/solvers/sparse.py b/kwant/solvers/sparse.py
index 68546232f5ab7bc1317f41579271dd68273a9783..bfe80ad43e9420bb5785cf0d7d87738c86630c7b 100644
--- a/kwant/solvers/sparse.py
+++ b/kwant/solvers/sparse.py
@@ -113,17 +113,16 @@ def make_linear_sys(sys, out_leads, in_leads, energy=0,
     """
     if not sys.lead_neighbor_seqs:
         raise ValueError('System contains no leads.')
-    h_sys = sys.hamiltonian_submatrix(sparse=True).tocsc()
+    h_sys, norb = sys.hamiltonian_submatrix(sparse=True)[:2]
+    h_sys = h_sys.tocsc()
     h_sys = h_sys - energy * sp.identity(h_sys.shape[0], format='csc')
 
     # Hermiticity check.
     if np.any(np.abs((h_sys - h_sys.T.conj()).data) > 1e-13):
         raise ValueError('System Hamiltonian is not Hermitian.')
 
-    norb, num_nodes = sys.num_orbitals, sys.graph.num_nodes
-    norb_arr = np.array([norb(i) for i in xrange(num_nodes)], int)
-    offsets = np.zeros(norb_arr.shape[0] + 1, int)
-    offsets[1 :] = np.cumsum(norb_arr)
+    offsets = np.zeros(norb.shape[0] + 1, int)
+    offsets[1 :] = np.cumsum(norb)
 
     # Process the leads, generate the eigenvector matrices and lambda vectors.
     # Then create blocks of the linear system and add them step by step.
diff --git a/kwant/system.py b/kwant/system.py
index e512a4950d109020eeb89ee70726c6e21b76b547..c87d1221eeefdc73d43c9ceb95f91d0827e72a66 100644
--- a/kwant/system.py
+++ b/kwant/system.py
@@ -46,98 +46,152 @@ class System(object):
         """
         pass
 
-    def hamiltonian_submatrix(self, b_sites=None, a_sites=None, sparse=False):
+    def hamiltonian_submatrix(self, to_sites=None, from_sites=None,
+                              sparse=False):
         """Return a submatrix of the system Hamiltonian.
 
+        Parameters
+        ----------
+        to_sites : sequence of sites or None (default)
+        from_sites : sequence of sites or None (default)
+        sparse : bool
+            Whether to return a sparse or a dense matrix. Defaults to `False`.
+
+        Returns
+        -------
+        hamiltonian_part : numpy.ndarray or scipy.sparse.coo_matrix
+            Submatrix of Hamiltonian of the system.
+        to_norb : array of integers
+            Numbers of orbitals on each site in to_sites.
+        from_norb : array of integers
+            Numbers of orbitals on each site in from_sites.
+
+        Notes
+        -----
         The returned submatrix contains all the Hamiltonian matrix elements
-        from `a_sites` to `b_sites`.  If no value for a_sites or `b_sites` is
-        provided, the default is to take all sites of the system in the order
-        in which they appear. If sparse is set to `True`, a
-        `scipy.sparse.coo_matrix` is returned, otherwise a dense one.
+        from `from_sites` to `to_sites`.  The default for `from_sites` and
+        `to_sites` is `None` which means to use all sites of the system in the
+        order in which they appear.
         """
-        msg = 'Hopping from site {0} to site {1} does not match the ' +\
+        msg = 'Hopping from site {0} to site {1} does not match the ' \
               'dimensions of onsite Hamiltonians of these sites.'
 
-        def create_sparse():
+        def make_sparse():
             # Calculate the data size.
             num_entries = 0
-            for n_i, i in enumerate(a_sites):
+            for n_i, i in enumerate(from_sites):
                 for j in chain((i,), gr.out_neighbors(i)):
-                    if j in b_coord:
-                        n_j = b_coord[j]
-                        num_entries += b_norb[n_j] * a_norb[n_i]
+                    if j in to_coord:
+                        n_j = to_coord[j]
+                        num_entries += to_norb[n_j] * from_norb[n_i]
 
             ij = np.empty((2, num_entries), dtype=int)
             data = np.empty(num_entries, dtype=complex)
 
-            offset = 0
-            for n_i, i in enumerate(a_sites):
+            pos = 0
+            for n_i, i in enumerate(from_sites):
                 for j in chain((i,), gr.out_neighbors(i)):
-                    if j in b_coord:
-                        n_j = b_coord[j]
-                        h = ham(j, i)
-                        # The shape check is here to prevent data corruption.
-                        shape = (1, 1) if np.isscalar(h) else h.shape
-                        if shape != (b_norb[n_j], a_norb[n_i]):
-                            raise ValueError(msg.format(i, j))
-                        if np.isscalar(h):
-                            data[offset] = h
-                            ij[0, offset] = n_j
-                            ij[1, offset] = n_i
-                            offset += 1
-                        else:
-                            h = np.ravel(h, order='F')
-                            coord = slice(offset, offset + h.size)
-                            data[coord] = h
-                            jtmp = np.arange(b_norb[n_j]) + b_off[n_j]
-                            itmp = np.arange(a_norb[n_i]) + a_off[n_i]
-                            jtmp, itmp = np.meshgrid(jtmp, itmp)
-                            ij[0, coord] = jtmp.ravel()
-                            ij[1, coord] = itmp.ravel()
-                            offset += h.shape[0]
+                    n_j = to_coord.get(j)
+                    if n_j is None:
+                        continue
+                    h = ham(j, i) if j != i else diag[i]
+                    # The shape check is here to prevent data corruption.
+                    shape = (1, 1) if np.isscalar(h) else h.shape
+                    if shape != (to_norb[n_j], from_norb[n_i]):
+                        raise ValueError(msg.format(i, j))
+                    if np.isscalar(h):
+                        data[pos] = h
+                        ij[0, pos] = n_j
+                        ij[1, pos] = n_i
+                        pos += 1
+                    else:
+                        h = np.ravel(h, order='F')
+                        coord = slice(pos, pos + h.size)
+                        data[coord] = h
+                        jtmp = np.arange(to_norb[n_j]) + to_off[n_j]
+                        itmp = np.arange(from_norb[n_i]) + from_off[n_i]
+                        jtmp, itmp = np.meshgrid(jtmp, itmp)
+                        ij[0, coord] = jtmp.ravel()
+                        ij[1, coord] = itmp.ravel()
+                        pos += h.shape[0]
             return sp.coo_matrix((data, ij), shape=result_shape)
 
-        def create_dense():
+        def make_dense():
             # Shape checks of arrays are performed by numpy upon subblock
             # assignment.
             h_sub = np.zeros(result_shape, dtype='complex')
-            for n_i, i in enumerate(a_sites):
+            for n_i, i in enumerate(from_sites):
                 for j in chain((i,), gr.out_neighbors(i)):
-                    if j in b_coord:
-                        n_j = b_coord[j]
-                        try:
-                            h_sub[b_off[n_j] : b_off[n_j + 1],
-                                    a_off[n_i] : a_off[n_i + 1]] = ham(j, i)
-                        except ValueError:
-                            raise ValueError(msg.format(i, j))
+                    n_j = to_coord.get(j)
+                    if n_j is None:
+                        continue
+                    try:
+                        h_sub[to_off[n_j] : to_off[n_j + 1],
+                              from_off[n_i] : from_off[n_i + 1]] = \
+                              ham(j, i) if j != i else diag[i]
+                    except ValueError:
+                        raise ValueError(msg.format(i, j))
             return h_sub
 
         gr = self.graph
         ham = self.hamiltonian
         n = self.graph.num_nodes
-        if a_sites is None:
-            a_sites = xrange(n)
-        if b_sites is None:
-            b_sites = xrange(n)
-        if not all(site < n for site in a_sites) or \
-           not all(site < n for site in b_sites):
-            raise KeyError('site number out of range')
-        a_norb = np.array([self.num_orbitals(i) for i in a_sites], int)
-        a_off = np.zeros(a_norb.shape[0] + 1, int)
-        a_off[1 :] = np.cumsum(a_norb)
-
-        b_norb = np.array([self.num_orbitals(i) for i in b_sites], int)
-        b_off = np.zeros(b_norb.shape[0] + 1, int)
-        b_off[1 :] = np.cumsum(b_norb)
-        # Instead of doing a double loop over a_sites and b_sites it is more
-        # efficient to check if neighbors of a_sites are in b_sites.
-        b_coord = dict((i[1], i[0]) for i in enumerate(b_sites))
-        result_shape = (b_off[-1],  a_off[-1])
-
-        if sparse:
-            return create_sparse()
+
+        if not ((from_sites is None or
+                 all(0 <= site < n for site in from_sites)) and
+                (to_sites is None or
+                 all(0 <= site < n for site in to_sites))):
+            raise IndexError('Site number out of range.')
+
+        # Cache diagonal entries.
+        isscalar = np.isscalar
+        if from_sites is None or to_sites is None:
+            # np.fromiter does not work with dtype=object.
+            diag = np.array([ham(i, i) for i in xrange(n)], dtype=object)
+            norb = np.fromiter(
+                (1 if isscalar(h) else h.shape[0] for h in diag), int, n)
+        else:
+            if (max(len(from_sites), len(to_sites)) < n // 4 > 4):
+                diag = {}
+                get = diag.get
+            else:
+                diag = np.empty(n, dtype=object)
+                get = diag.__getitem__
+
+        # Make from_norb and from_off.
+        if from_sites is None:
+            from_sites = xrange(n)
+            from_norb = norb
         else:
-            return create_dense()
+            from_norb = np.empty(len(from_sites), dtype=int)
+            for n_i, i in enumerate(from_sites):
+                h = get(i)
+                if h is None:
+                    diag[i] = h = ham(i, i)
+                from_norb[n_i] = 1 if isscalar(h) else h.shape[0]
+        from_off = np.zeros(from_norb.shape[0] + 1, int)
+        from_off[1 :] = np.cumsum(from_norb)
+
+        # Make to_norb and to_off.
+        if to_sites is None:
+            to_sites = xrange(n)
+            to_norb = norb
+        else:
+            to_norb = np.empty(len(to_sites), dtype=int)
+            for n_i, i in enumerate(to_sites):
+                h = get(i)
+                if h is None:
+                    diag[i] = h = ham(i, i)
+                to_norb[n_i] = 1 if isscalar(h) else h.shape[0]
+        to_off = np.zeros(to_norb.shape[0] + 1, int)
+        to_off[1 :] = np.cumsum(to_norb)
+
+        # Instead of doing a double loop over from_sites and to_sites it is
+        # more efficient to check if neighbors of from_sites are in to_sites.
+        to_coord = dict((i[1], i[0]) for i in enumerate(to_sites))
+        result_shape = (to_off[-1], from_off[-1])
+
+        return make_sparse() if sparse else make_dense(), to_norb, from_norb
 
 
 class FiniteSystem(System):
@@ -213,14 +267,14 @@ class InfiniteSystem(System):
     def slice_hamiltonian(self):
         """Hamiltonian of a single slice of the infinite system."""
         slice_sites = xrange(self.slice_size)
-        return self.hamiltonian_submatrix(slice_sites, slice_sites)
+        return self.hamiltonian_submatrix(slice_sites, slice_sites)[0]
 
     def inter_slice_hopping(self):
         """Hopping Hamiltonian between two slices of the infinite system."""
         slice_size = self.slice_size
         slice_sites = xrange(self.slice_size)
         neighbor_sites = xrange(self.slice_size, self.graph.num_nodes)
-        return self.hamiltonian_submatrix(slice_sites, neighbor_sites)
+        return self.hamiltonian_submatrix(slice_sites, neighbor_sites)[0]
 
     def self_energy(self, energy):
         """Return self-energy of a lead.
@@ -236,17 +290,6 @@ class InfiniteSystem(System):
         ham.flat[::ham.shape[0] + 1] -= energy
         return physics.self_energy(ham, self.inter_slice_hopping())
 
-    def num_orbitals(self, site):
-        """Return the number of orbitals of a site.
-
-        This is an inefficient general implementation.  It should be
-        overridden, if a more efficient way to calculate is available.
-        """
-        if site >= self.slice_size:
-            site -= self.slice_size
-        ham = self.hamiltonian(site, site)
-        return 1 if np.isscalar(ham) else ham.shape[0]
-
     @property
     def energies(self):
         """
diff --git a/kwant/tests/test_system.py b/kwant/tests/test_system.py
index abff175c2cd6e8ea91c1654fc169fcd88c421dd9..2411b0d771f48ffd3917d3bc25ef5903214e52a9 100644
--- a/kwant/tests/test_system.py
+++ b/kwant/tests/test_system.py
@@ -12,7 +12,7 @@ def test_hamiltonian_submatrix():
         sys[(i,), (i + 1,)] = 1j * (i + 1)
 
     sys2 = sys.finalized()
-    mat = sys2.hamiltonian_submatrix()
+    mat = sys2.hamiltonian_submatrix()[0]
     assert mat.shape == (3, 3)
     # Sorting is required due to unknown compression order of builder.
     perm = np.argsort(sys2.onsite_hamiltonians)
@@ -22,17 +22,17 @@ def test_hamiltonian_submatrix():
     mat = mat[:, perm]
     np.testing.assert_array_equal(mat, mat_should_be)
 
-    mat = sys2.hamiltonian_submatrix(sparse=True)
+    mat = sys2.hamiltonian_submatrix(sparse=True)[0]
     assert sparse.isspmatrix_coo(mat)
     mat = mat.todense()
     mat = mat[perm, :]
     mat = mat[:, perm]
     np.testing.assert_array_equal(mat, mat_should_be)
 
-    mat = sys2.hamiltonian_submatrix(perm[[0, 1]], perm[[2]])
+    mat = sys2.hamiltonian_submatrix(perm[[0, 1]], perm[[2]])[0]
     np.testing.assert_array_equal(mat, mat_should_be[: 2, 2])
 
-    mat = sys2.hamiltonian_submatrix(perm[[0, 1]], perm[[2]], sparse=True)
+    mat = sys2.hamiltonian_submatrix(perm[[0, 1]], perm[[2]], sparse=True)[0]
     mat = mat.todense()
     np.testing.assert_array_equal(mat, mat_should_be[: 2, 2])
 
@@ -45,8 +45,8 @@ def test_hamiltonian_submatrix():
     sys[(1,), (0,)] = np.mat('1 2j')
     sys[(2,), (1,)] = np.mat('3j')
     sys2 = sys.finalized()
-    mat_dense = sys2.hamiltonian_submatrix()
-    mat_sp = sys2.hamiltonian_submatrix(sparse=True).todense()
+    mat_dense = sys2.hamiltonian_submatrix()[0]
+    mat_sp = sys2.hamiltonian_submatrix(sparse=True)[0].todense()
     np.testing.assert_array_equal(mat_sp, mat_dense)
 
     # Test for shape errors.