Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

# Copyright 2011-2013 Kwant authors. 

# 

# This file is part of Kwant. It is subject to the license terms in the file 

# LICENSE.rst found in the top-level directory of this distribution and at 

# http://kwant-project.org/license. A list of Kwant authors can be found in 

# the file AUTHORS.rst at the top-level directory of this distribution and at 

# http://kwant-project.org/authors. 

  

"""Utilities to modify compressed graphs""" 

  

__all__ = ['make_undirected', 'remove_duplicates', 'induced_subgraph', 

'print_graph'] 

  

import numpy as np 

cimport numpy as np 

from libc.string cimport memset 

cimport cython 

from cpython cimport array 

import array 

from .defs cimport gint 

from .defs import gint_dtype 

from .core cimport CGraph, CGraph_malloc 

from .core import CGraph, CGraph_malloc 

  

@cython.boundscheck(False) 

def make_undirected(CGraph gr, remove_dups=True, calc_weights=False): 

"""undirected_graph(gr) expects a CGraph gr as input, which is interpreted 

as a directed graph, and returns a CGraph that is explicitely undirected, 

i.e. for every edge (i,j) there is also the edge (j,i). In the process, the 

function also removes all 'dangling' links, i.e. edges to or from 

negative node numbers. 

  

If remove_dups == True (default value is True), any duplicates of edges 

will be removed (this applies to the case where there are multiple edges 

(i,j), not to having (i,j) and (j,i)). 

  

The effect of the duplicate edges can be retained if calc_weights == True 

(default value is False), in which case a weight array is returned 

containing the multiplicity of the edges after the graph has been made 

undirected. 

  

As a (somewhat drastic but illustrative) example, if make_undirected is 

applied to a undirected graph, it will return the same graph again 

(possibly with the order of edges changed) and a weight array with 2 

everywhere. (Of course, in this case one does not need to call 

make_undirected ...) 

  

make_undirected() will always return a one-way graph, regardless of 

whether the input was a two-way graph or not (NOTE: This 

restriction could be lifted, if necessary). In addition, the 

original edge_ids are lost -- the resulting graph will have 

edge_ids that are not related to the original ones. (NOTE: there 

certainly is a relation, but as long as no-one needs it it remains 

unspecified) 

""" 

  

cdef gint i, j, p 

  

# The undirected graph will have twice as many edges than the directed one 

# (duplicates will be deleted afterwards). 

cdef CGraph_malloc ret = CGraph_malloc(False, False, gr.num_nodes, 

gr.heads_idxs[gr.num_nodes] * 2, 

0, 0) 

  

# In the following we build up the Graph directly in compressed format by 

# adding for every edge (i,j) [with i,j>=0] also the edge (j,i). Taking 

# care of possible doubling is done in a second step later. 

  

# Initialize the new index array: 

# First, compute a histogram of edges. 

memset(ret.heads_idxs, 0, (ret.num_nodes + 1) * sizeof(gint)) 

  

# This is using Christoph's trick of building up the graph without 

# additional buffer array. 

cdef gint *buffer = ret.heads_idxs + 1 

  

for i in range(gr.num_nodes): 

for p in range(gr.heads_idxs[i], gr.heads_idxs[i+1]): 

if gr.heads[p] >= 0: 

buffer[i] += 1 

buffer[gr.heads[p]] += 1 

  

cdef gint s = 0 

for i in range(ret.num_nodes): 

s += buffer[i] 

buffer[i] = s - buffer[i] 

  

for i in range(gr.num_nodes): 

for p in range(gr.heads_idxs[i], gr.heads_idxs[i+1]): 

j = gr.heads[p] 

if j >= 0: 

ret.heads[buffer[i]] = j 

buffer[i] += 1 

ret.heads[buffer[j]] = i 

buffer[j] += 1 

  

ret.num_edges = ret.heads_idxs[ret.num_nodes] 

  

# Now remove duplicates if desired. 

cdef np.ndarray[gint, ndim=1] weights 

  

if calc_weights: 

weights = np.empty(ret.heads_idxs[ret.num_nodes], dtype=gint_dtype) 

weights[:] = 1 

  

if remove_dups: 

if calc_weights: 

remove_duplicates(ret, weights) 

else: 

remove_duplicates(ret) 

  

if calc_weights: 

return ret, weights 

else: 

return ret 

  

  

@cython.boundscheck(False) 

def remove_duplicates(CGraph gr, np.ndarray[gint, ndim=1] edge_weights=None): 

"""Remove duplicate edges in the CGraph gr (this applies to the case where 

there are multiple edges (i,j), not to having (i,j) and (j,i)). This 

function modifes the graph in place. 

  

If edge_weights is provided, edge_weights is modified such that the new 

edge weights are the sum of the old edge weights if there are duplicate 

edges. 

  

This function only works on simple graphs (not two-way graphs), and 

it does not work on graphs which have a relation between the edge number 

(given by the order the edges are added) and the edge_id (given by the 

order the edges appear in the graph), see the documentation of CGraph. 

(Both restrictions could be lifted if necessary.) Furthermore, the 

function does not support negative node numbers, i.e. dangling links 

(the concept of being duplicate is more complicated there.) 

""" 

cdef gint i, j , p, q, nnz 

cdef np.ndarray[gint, ndim=1] w 

  

if gr.twoway: 

raise RuntimeError("remove_duplicates does not support two-way " 

"graphs") 

  

if gr.edge_ids_by_edge_nr: 

raise RuntimeError("remove_duplicates does not support graphs with " 

"a relation between the edge number and the edge " 

"id") 

  

# The array w will hold the position of head j in the heads array. 

w = np.empty(gr.num_nodes, dtype=gint_dtype) 

w[:]=-1 

  

nnz=0 

  

for i in range(gr.num_nodes): 

q = nnz 

  

for p in range(gr.heads_idxs[i], gr.heads_idxs[i+1]): 

j = gr.heads[p] 

  

# Check if we have found a previous entry (i,j). (In this case w 

# will have an index larger than the indices of all previous edges 

# with tails < i, as stored in q.) 

if w[j] >= q: 

# entry is a duplicate 

if edge_weights is not None: 

edge_weights[w[j]] += edge_weights[p] 

else: 

w[j] = nnz 

gr.heads[nnz] = j 

nnz += 1 

  

# Fix the index array. 

gr.heads_idxs[i] = q 

  

# Finally the new number of nonzeros 

gr.heads_idxs[gr.num_nodes] = nnz 

gr.num_edges = nnz 

  

# Release memory that is not needed any more. 

array.resize(gr._heads, nnz) 

gr.heads = <gint *>gr._heads.data.as_ints 

if not gr.heads: 

raise MemoryError 

  

if edge_weights is not None: 

edge_weights.resize(nnz, refcheck=False) 

  

@cython.boundscheck(False) 

def induced_subgraph(CGraph gr, select, 

np.ndarray[gint, ndim=1] edge_weights=None): 

"""Return a subgraph of the CGraph gr by picking all nodes 

[0:gr.num_nodes] for which select is True. select can be either a 

NumPy array, or a function that takes the node number as 

input. This function returns a CGraph as well. 

  

The nodes in the new graph are again numbered sequentially from 0 

to num_nodes-1, where num_nodes is the number of nodes in the 

subgraph. The numbering is done such that the ordering of the node 

numbers in the original and the subgraph are preserved (i.e. 

if nodes n1 and n2 are both in the subgraph, and 

original node number of n1 < original node number of n2, 

then also subgraph node number n1 < subgraph node number n2). 

  

If edge_weights is provided, the function also returns the edge 

weights for the subgraph which are simply a subset of the original 

weights. 

  

This function returns a simple graph, regardless of whether the 

input was a two-way graph or not (NOTE: This restriction could be 

lifted, if necessary). Also, the resulting edge_ids are not 

related to the original ones in any way (NOTE: There certainly is 

a relation, but as long no-one needs it, we do not specify 

it). Also, negative nodes are discarded (NOTE: this restriction 

can also be lifted). 

""" 

  

cdef np.ndarray[gint, ndim=1] indextab 

cdef CGraph_malloc subgr 

cdef np.ndarray[gint, ndim=1] sub_edge_weights = None 

cdef gint sub_num_nodes, sub_num_edges 

cdef gint i, iedge, edge_count 

  

# First figure out the new number of nodes. 

sub_num_nodes = 0 

indextab = np.empty(gr.num_nodes, dtype=gint_dtype) 

indextab[:] = -1 

  

# Pre-evaluating the select functions seems to be more than twice as fast 

# as calling select() repeatedly in the loop. The thing is that one cannot 

# type ndarray as bool in Cython (yet) [Taking bint results in a strange 

# crash]. It would be possible to cast it into a cython type using 

# .astype(), but this didn't seem to make any relevant speed difference. 

if isinstance(select, np.ndarray): 

selecttab = select 

else: 

selecttab = select(np.arange(gr.num_nodes, dtype=gint_dtype)) 

  

for i in range(gr.num_nodes): 

if selecttab[i]: 

indextab[i] = sub_num_nodes 

sub_num_nodes += 1 

  

# Now count the number of new edges. 

sub_num_edges = 0 

  

for i in range(gr.num_nodes): 

if indextab[i] > -1: 

for iedge in range(gr.heads_idxs[i], gr.heads_idxs[i + 1]): 

if indextab[gr.heads[iedge]] > -1: 

sub_num_edges += 1 

  

# Allocate the new graph. 

subgr = CGraph_malloc(False, False, sub_num_nodes, sub_num_edges, 0, 0) 

  

if edge_weights is not None: 

sub_edge_weights = np.empty(sub_num_edges, dtype=gint_dtype) 

  

# Now fill the new edge array. 

edge_count = 0 

  

for i in range(gr.num_nodes): 

if indextab[i]>-1: 

subgr.heads_idxs[indextab[i]] = edge_count 

for iedge in range(gr.heads_idxs[i], gr.heads_idxs[i+1]): 

if indextab[gr.heads[iedge]] > -1: 

subgr.heads[edge_count] = indextab[gr.heads[iedge]] 

if edge_weights is not None: 

sub_edge_weights[edge_count] = edge_weights[iedge] 

edge_count += 1 

subgr.heads_idxs[sub_num_nodes] = edge_count 

  

subgr.num_edges = edge_count 

  

if edge_weights is not None: 

return subgr, sub_edge_weights 

else: 

return subgr 

  

  

def print_graph(gr): 

for i in range(gr.num_nodes): 

print(i, "->", *gr.out_neighbors(i))