ioftools / networkxMiCe / networkxmaster / networkx / algorithms / bipartite / matching.py @ 5cef0f13
History  View  Annotate  Download (17.5 KB)
1 
# matching.py  bipartite graph maximum matching algorithms


2 
#

3 
# Copyright 2015 Jeffrey Finkelstein <jeffrey.finkelstein@gmail.com>.

4 
#

5 
# This file is part of NetworkX.

6 
#

7 
# NetworkX is distributed under a BSD license; see LICENSE.txt for more

8 
# information.

9 
#

10 
# This module uses material from the Wikipedia article HopcroftKarp algorithm

11 
# <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on

12 
# January 3, 2015, which is released under the Creative Commons

13 
# AttributionShareAlike License 3.0

14 
# <http://creativecommons.org/licenses/bysa/3.0/>. That article includes

15 
# pseudocode, which has been translated into the corresponding Python code.

16 
#

17 
# Portions of this module use code from David Eppstein's Python Algorithms and

18 
# Data Structures (PADS) library, which is dedicated to the public domain (for

19 
# proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUTPADS.txt>).

20 
"""Provides functions for computing a maximum cardinality matching in a

21 
bipartite graph.

22 

23 
If you don't care about the particular implementation of the maximum matching

24 
algorithm, simply use the :func:`maximum_matching`. If you do care, you can

25 
import one of the named maximum matching algorithms directly.

26 

27 
For example, to find a maximum matching in the complete bipartite graph with

28 
two vertices on the left and three vertices on the right:

29 

30 
>>> import networkx as nx

31 
>>> G = nx.complete_bipartite_graph(2, 3)

32 
>>> left, right = nx.bipartite.sets(G)

33 
>>> list(left)

34 
[0, 1]

35 
>>> list(right)

36 
[2, 3, 4]

37 
>>> nx.bipartite.maximum_matching(G)

38 
{0: 2, 1: 3, 2: 0, 3: 1}

39 

40 
The dictionary returned by :func:`maximum_matching` includes a mapping for

41 
vertices in both the left and right vertex sets.

42 

43 
"""

44 
import collections 
45 
import itertools 
46  
47 
from networkx.algorithms.bipartite import sets as bipartite_sets 
48 
import networkx as nx 
49  
50 
__all__ = ['maximum_matching', 'hopcroft_karp_matching', 'eppstein_matching', 
51 
'to_vertex_cover']

52  
53 
INFINITY = float('inf') 
54  
55  
56 
def hopcroft_karp_matching(G, top_nodes=None): 
57 
"""Returns the maximum cardinality matching of the bipartite graph `G`.

58 

59 
Parameters

60 


61 
G : NetworkX graph

62 

63 
Undirected bipartite graph

64 

65 
top_nodes : container

66 

67 
Container with all nodes in one bipartite node set. If not supplied

68 
it will be computed. But if more than one solution exists an exception

69 
will be raised.

70 

71 
Returns

72 


73 
matches : dictionary

74 

75 
The matching is returned as a dictionary, `matches`, such that

76 
``matches[v] == w`` if node `v` is matched to node `w`. Unmatched

77 
nodes do not occur as a key in mate.

78 

79 
Raises

80 


81 
AmbiguousSolution : Exception

82 

83 
Raised if the input bipartite graph is disconnected and no container

84 
with all nodes in one bipartite set is provided. When determining

85 
the nodes in each bipartite set more than one valid solution is

86 
possible if the input graph is disconnected.

87 

88 
Notes

89 


90 

91 
This function is implemented with the `HopcroftKarp matching algorithm

92 
<https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for

93 
bipartite graphs.

94 

95 
See :mod:`bipartite documentation <networkx.algorithms.bipartite>`

96 
for further details on how bipartite graphs are handled in NetworkX.

97 

98 
See Also

99 


100 

101 
eppstein_matching

102 

103 
References

104 


105 
.. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for

106 
Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**

107 
2.4 (1973), pp. 225231. <https://doi.org/10.1137/0202019>.

108 

109 
"""

110 
# First we define some auxiliary search functions.

111 
#

112 
# If you are a human reading these auxiliary search functions, the "global"

113 
# variables `leftmatches`, `rightmatches`, `distances`, etc. are defined

114 
# below the functions, so that they are initialized close to the initial

115 
# invocation of the search functions.

116 
def breadth_first_search(): 
117 
for v in left: 
118 
if leftmatches[v] is None: 
119 
distances[v] = 0

120 
queue.append(v) 
121 
else:

122 
distances[v] = INFINITY 
123 
distances[None] = INFINITY

124 
while queue:

125 
v = queue.popleft() 
126 
if distances[v] < distances[None]: 
127 
for u in G[v]: 
128 
if distances[rightmatches[u]] is INFINITY: 
129 
distances[rightmatches[u]] = distances[v] + 1

130 
queue.append(rightmatches[u]) 
131 
return distances[None] is not INFINITY 
132  
133 
def depth_first_search(v): 
134 
if v is not None: 
135 
for u in G[v]: 
136 
if distances[rightmatches[u]] == distances[v] + 1: 
137 
if depth_first_search(rightmatches[u]):

138 
rightmatches[u] = v 
139 
leftmatches[v] = u 
140 
return True 
141 
distances[v] = INFINITY 
142 
return False 
143 
return True 
144  
145 
# Initialize the "global" variables that maintain state during the search.

146 
left, right = bipartite_sets(G, top_nodes) 
147 
leftmatches = {v: None for v in left} 
148 
rightmatches = {v: None for v in right} 
149 
distances = {} 
150 
queue = collections.deque() 
151  
152 
# Implementation note: this counter is incremented as pairs are matched but

153 
# it is currently not used elsewhere in the computation.

154 
num_matched_pairs = 0

155 
while breadth_first_search():

156 
for v in left: 
157 
if leftmatches[v] is None: 
158 
if depth_first_search(v):

159 
num_matched_pairs += 1

160  
161 
# Strip the entries matched to `None`.

162 
leftmatches = {k: v for k, v in leftmatches.items() if v is not None} 
163 
rightmatches = {k: v for k, v in rightmatches.items() if v is not None} 
164  
165 
# At this point, the left matches and the right matches are inverses of one

166 
# another. In other words,

167 
#

168 
# leftmatches == {v, k for k, v in rightmatches.items()}

169 
#

170 
# Finally, we combine both the left matches and right matches.

171 
return dict(itertools.chain(leftmatches.items(), rightmatches.items())) 
172  
173  
174 
def eppstein_matching(G, top_nodes=None): 
175 
"""Returns the maximum cardinality matching of the bipartite graph `G`.

176 

177 
Parameters

178 


179 
G : NetworkX graph

180 

181 
Undirected bipartite graph

182 

183 
top_nodes : container

184 

185 
Container with all nodes in one bipartite node set. If not supplied

186 
it will be computed. But if more than one solution exists an exception

187 
will be raised.

188 

189 
Returns

190 


191 
matches : dictionary

192 

193 
The matching is returned as a dictionary, `matching`, such that

194 
``matching[v] == w`` if node `v` is matched to node `w`. Unmatched

195 
nodes do not occur as a key in mate.

196 

197 
Raises

198 


199 
AmbiguousSolution : Exception

200 

201 
Raised if the input bipartite graph is disconnected and no container

202 
with all nodes in one bipartite set is provided. When determining

203 
the nodes in each bipartite set more than one valid solution is

204 
possible if the input graph is disconnected.

205 

206 
Notes

207 


208 

209 
This function is implemented with David Eppstein's version of the algorithm

210 
HopcroftKarp algorithm (see :func:`hopcroft_karp_matching`), which

211 
originally appeared in the `Python Algorithms and Data Structures library

212 
(PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUTPADS.txt>`_.

213 

214 
See :mod:`bipartite documentation <networkx.algorithms.bipartite>`

215 
for further details on how bipartite graphs are handled in NetworkX.

216 

217 
See Also

218 


219 

220 
hopcroft_karp_matching

221 

222 
"""

223 
# Due to its original implementation, a directed graph is needed

224 
# so that the two sets of bipartite nodes can be distinguished

225 
left, right = bipartite_sets(G, top_nodes) 
226 
G = nx.DiGraph(G.edges(left)) 
227 
# initialize greedy matching (redundant, but faster than full search)

228 
matching = {} 
229 
for u in G: 
230 
for v in G[u]: 
231 
if v not in matching: 
232 
matching[v] = u 
233 
break

234 
while True: 
235 
# structure residual graph into layers

236 
# pred[u] gives the neighbor in the previous layer for u in U

237 
# preds[v] gives a list of neighbors in the previous layer for v in V

238 
# unmatched gives a list of unmatched vertices in final layer of V,

239 
# and is also used as a flag value for pred[u] when u is in the first

240 
# layer

241 
preds = {} 
242 
unmatched = [] 
243 
pred = {u: unmatched for u in G} 
244 
for v in matching: 
245 
del pred[matching[v]]

246 
layer = list(pred)

247  
248 
# repeatedly extend layering structure by another pair of layers

249 
while layer and not unmatched: 
250 
newLayer = {} 
251 
for u in layer: 
252 
for v in G[u]: 
253 
if v not in preds: 
254 
newLayer.setdefault(v, []).append(u) 
255 
layer = [] 
256 
for v in newLayer: 
257 
preds[v] = newLayer[v] 
258 
if v in matching: 
259 
layer.append(matching[v]) 
260 
pred[matching[v]] = v 
261 
else:

262 
unmatched.append(v) 
263  
264 
# did we finish layering without finding any alternating paths?

265 
if not unmatched: 
266 
unlayered = {} 
267 
for u in G: 
268 
# TODO Why is extra inner loop necessary?

269 
for v in G[u]: 
270 
if v not in preds: 
271 
unlayered[v] = None

272 
# TODO Originally, this function returned a threetuple:

273 
#

274 
# return (matching, list(pred), list(unlayered))

275 
#

276 
# For some reason, the documentation for this function

277 
# indicated that the second and third elements of the returned

278 
# threetuple would be the vertices in the left and right vertex

279 
# sets, respectively, that are also in the maximum independent set.

280 
# However, what I think the author meant was that the second

281 
# element is the list of vertices that were unmatched and the third

282 
# element was the list of vertices that were matched. Since that

283 
# seems to be the case, they don't really need to be returned,

284 
# since that information can be inferred from the matching

285 
# dictionary.

286  
287 
# All the matched nodes must be a key in the dictionary

288 
for key in matching.copy(): 
289 
matching[matching[key]] = key 
290 
return matching

291  
292 
# recursively search backward through layers to find alternating paths

293 
# recursion returns true if found path, false otherwise

294 
def recurse(v): 
295 
if v in preds: 
296 
L = preds.pop(v) 
297 
for u in L: 
298 
if u in pred: 
299 
pu = pred.pop(u) 
300 
if pu is unmatched or recurse(pu): 
301 
matching[v] = u 
302 
return True 
303 
return False 
304  
305 
for v in unmatched: 
306 
recurse(v) 
307  
308  
309 
def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, 
310 
targets): 
311 
"""Returns True if and only if the vertex `v` is connected to one of

312 
the target vertices by an alternating path in `G`.

313 

314 
An *alternating path* is a path in which every other edge is in the

315 
specified maximum matching (and the remaining edges in the path are not in

316 
the matching). An alternating path may have matched edges in the even

317 
positions or in the odd positions, as long as the edges alternate between

318 
'matched' and 'unmatched'.

319 

320 
`G` is an undirected bipartite NetworkX graph.

321 

322 
`v` is a vertex in `G`.

323 

324 
`matched_edges` is a set of edges present in a maximum matching in `G`.

325 

326 
`unmatched_edges` is a set of edges not present in a maximum

327 
matching in `G`.

328 

329 
`targets` is a set of vertices.

330 

331 
"""

332 
def _alternating_dfs(u, along_matched=True): 
333 
"""Returns True if and only if `u` is connected to one of the

334 
targets by an alternating path.

335 

336 
`u` is a vertex in the graph `G`.

337 

338 
If `along_matched` is True, this step of the depthfirst search

339 
will continue only through edges in the given matching. Otherwise, it

340 
will continue only through edges *not* in the given matching.

341 

342 
"""

343 
if along_matched:

344 
edges = itertools.cycle([matched_edges, unmatched_edges]) 
345 
else:

346 
edges = itertools.cycle([unmatched_edges, matched_edges]) 
347 
visited = set()

348 
stack = [(u, iter(G[u]), next(edges))] 
349 
while stack:

350 
parent, children, valid_edges = stack[1]

351 
try:

352 
child = next(children)

353 
if child not in visited: 
354 
if ((parent, child) in valid_edges 
355 
or (child, parent) in valid_edges): 
356 
if child in targets: 
357 
return True 
358 
visited.add(child) 
359 
stack.append((child, iter(G[child]), next(edges))) 
360 
except StopIteration: 
361 
stack.pop() 
362 
return False 
363  
364 
# Check for alternating paths starting with edges in the matching, then

365 
# check for alternating paths starting with edges not in the

366 
# matching.

367 
return (_alternating_dfs(v, along_matched=True) or 
368 
_alternating_dfs(v, along_matched=False))

369  
370  
371 
def _connected_by_alternating_paths(G, matching, targets): 
372 
"""Returns the set of vertices that are connected to one of the target

373 
vertices by an alternating path in `G` or are themselves a target.

374 

375 
An *alternating path* is a path in which every other edge is in the

376 
specified maximum matching (and the remaining edges in the path are not in

377 
the matching). An alternating path may have matched edges in the even

378 
positions or in the odd positions, as long as the edges alternate between

379 
'matched' and 'unmatched'.

380 

381 
`G` is an undirected bipartite NetworkX graph.

382 

383 
`matching` is a dictionary representing a maximum matching in `G`, as

384 
returned by, for example, :func:`maximum_matching`.

385 

386 
`targets` is a set of vertices.

387 

388 
"""

389 
# Get the set of matched edges and the set of unmatched edges. Only include

390 
# one version of each undirected edge (for example, include edge (1, 2) but

391 
# not edge (2, 1)). Using frozensets as an intermediary step we do not

392 
# require nodes to be orderable.

393 
edge_sets = {frozenset((u, v)) for u, v in matching.items()} 
394 
matched_edges = {tuple(edge) for edge in edge_sets} 
395 
unmatched_edges = {(u, v) for (u, v) in G.edges() 
396 
if frozenset((u, v)) not in edge_sets} 
397  
398 
return {v for v in G if v in targets or 
399 
_is_connected_by_alternating_path(G, v, matched_edges, 
400 
unmatched_edges, targets)} 
401  
402  
403 
def to_vertex_cover(G, matching, top_nodes=None): 
404 
"""Returns the minimum vertex cover corresponding to the given maximum

405 
matching of the bipartite graph `G`.

406 

407 
Parameters

408 


409 

410 
G : NetworkX graph

411 

412 
Undirected bipartite graph

413 

414 
matching : dictionary

415 

416 
A dictionary whose keys are vertices in `G` and whose values are the

417 
distinct neighbors comprising the maximum matching for `G`, as returned

418 
by, for example, :func:`maximum_matching`. The dictionary *must*

419 
represent the maximum matching.

420 

421 
top_nodes : container

422 

423 
Container with all nodes in one bipartite node set. If not supplied

424 
it will be computed. But if more than one solution exists an exception

425 
will be raised.

426 

427 
Returns

428 


429 

430 
vertex_cover : :class:`set`

431 

432 
The minimum vertex cover in `G`.

433 

434 
Raises

435 


436 
AmbiguousSolution : Exception

437 

438 
Raised if the input bipartite graph is disconnected and no container

439 
with all nodes in one bipartite set is provided. When determining

440 
the nodes in each bipartite set more than one valid solution is

441 
possible if the input graph is disconnected.

442 

443 
Notes

444 


445 

446 
This function is implemented using the procedure guaranteed by `Konig's

447 
theorem

448 
<https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,

449 
which proves an equivalence between a maximum matching and a minimum vertex

450 
cover in bipartite graphs.

451 

452 
Since a minimum vertex cover is the complement of a maximum independent set

453 
for any graph, one can compute the maximum independent set of a bipartite

454 
graph this way:

455 

456 
>>> import networkx as nx

457 
>>> G = nx.complete_bipartite_graph(2, 3)

458 
>>> matching = nx.bipartite.maximum_matching(G)

459 
>>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)

460 
>>> independent_set = set(G)  vertex_cover

461 
>>> print(list(independent_set))

462 
[2, 3, 4]

463 

464 
See :mod:`bipartite documentation <networkx.algorithms.bipartite>`

465 
for further details on how bipartite graphs are handled in NetworkX.

466 

467 
"""

468 
# This is a Python implementation of the algorithm described at

469 
# <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.

470 
L, R = bipartite_sets(G, top_nodes) 
471 
# Let U be the set of unmatched vertices in the left vertex set.

472 
unmatched_vertices = set(G)  set(matching) 
473 
U = unmatched_vertices & L 
474 
# Let Z be the set of vertices that are either in U or are connected to U

475 
# by alternating paths.

476 
Z = _connected_by_alternating_paths(G, matching, U) 
477 
# At this point, every edge either has a right endpoint in Z or a left

478 
# endpoint not in Z. This gives us the vertex cover.

479 
return (L  Z)  (R & Z)

480  
481  
482 
#: Returns the maximum cardinality matching in the given bipartite graph.

483 
#:

484 
#: This function is simply an alias for :func:`hopcroft_karp_matching`.

485 
maximum_matching = hopcroft_karp_matching 