import networkx as nx
import matplotlib.pyplot as plt 
from itertools import chain

# Load in directed graph with edge data
D = nx.read_edgelist("out.link-dynamic-simplewiki.data", create_using=nx.DiGraph(), data=(("exists",int),("timestamp",int)))
D.order()
D.size()

# Instead of visualizing the graph, visualize the degree distribution
# - Likely, the graph is too large to trivially visualize (hairball)
# To do so, we can calculate (degree, count) pairs for each degree in D 
out_degrees = {}
in_degrees = {}
for v in D.nodes():
	d = D.out_degree(v)
	if d not in out_degrees:
		out_degrees[d] = 1
	else:
		out_degrees[d] += 1
	d = D.in_degree(v)
	if d not in in_degrees:
		in_degrees[d] = 1
	else:
		in_degrees[d] += 1


sorted_out_degrees = sorted(out_degrees.items())
sorted_in_degrees = sorted(in_degrees.items())
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot([k for (k,v) in sorted_out_degrees], [v for (k, v) in sorted_out_degrees])
ax.plot([k for (k,v) in sorted_in_degrees], [v for (k, v) in sorted_in_degrees])
ax.set_xscale('log')
ax.set_yscale('log')
plt.show()


# We have functions to start to look at basic connectivity info of our graph
nx.is_strongly_connected(D)
nx.is_weakly_connected(D)
nx.number_strongly_connected_components(D)
nx.number_weakly_connected_components(D)


# We can also visualize the component distributions, as we did with degrees
W = nx.weakly_connected_components(D)
S = nx.strongly_connected_components(D)

weak_sizes_counts = {}
strong_sizes_counts = {}
for w in W:
	size = len(w)
	if size not in weak_sizes_counts:
		weak_sizes_counts[size] = 1
	else:
		weak_sizes_counts[size] += 1

for s in S:
	size = len(s)
	if size not in strong_sizes_counts:
		strong_sizes_counts[size] = 1
	else:
		strong_sizes_counts[size] += 1

sorted_weak_sizes = sorted(weak_sizes_counts.items())
sorted_strong_sizes = sorted(strong_sizes_counts.items())
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot([k for (k,v) in sorted_weak_sizes], [v for (k, v) in sorted_weak_sizes])
ax.plot([k for (k,v) in sorted_strong_sizes], [v for (k, v) in sorted_strong_sizes])
ax.set_xscale('log')
ax.set_yscale('log')
plt.show()


# What if we wanted to determine reachability?
# I.e., what set of vertices are reachable from some 'root'?
def bfs_successors(G, root):
	levels = {}
	for v in G.nodes():
		levels[v] = -1
		
	levels[root] = 0
	level = 0
	
	Q = []
	Q.append(root)
	while len(Q) > 0:
		level += 1
		cur_num = len(Q)
		for i in range(0, cur_num):
			v = Q.pop()
			for u in G.successors(v):
				if levels[u] == -1:
					levels[u] = level
					Q.append(u)
					
	return levels

# We can call our function and see what vertices we've reached
L = bfs_successors(D, '1')
O = []
for v in D.nodes():
	if L[v] != -1:
		O.append(v)
		
# But what if we wanted to see what vertices can reach our 'root' instead?
# What about the overlap of these two sets: vertices that can reach and can be
#		reached from out root. Can all vertices in this overlapped set all reach 
#		each other? (FW-BW algorithm)