import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import random
import operator

################################################################################
# Triadic closure
# Suppose we wish to evaluate whether triadic closure appears in a dataset. We
# can do this by taking a temporal dataset, and seeing how the network evolves 
# in terms of edge creation over time. If triadic closure is a driving force
# behind edge creation, we should observe vertices with a large portion of
# common neighbors have a higher likelihood of being connected than any random
# pair of unconnected vertices. Let's experiment.

# We have a contact network for the DNC of emails over time.
G = nx.read_weighted_edgelist("out.dnc-temporalGraph.data", create_using=nx.MultiGraph(), comments="%")

# Let's consider edges in temporal order
# Note: the defaul label is 'weight', but it actually is our creation time
edges = sorted(G.edges(data=True), key=lambda t: t[2].get('weight', 1))

# Start with an empty graph, add edges in order, and see if those edges are in
# fact closing triads.
G1 = nx.MultiGraph()

# We'll initially consider the graph state after 5% of edges are added
# We'll compare our measurements to the final graph with all edges added
t_0 = G.size() / 20

counter = 0
max_k = 0
for e in edges:
  # Add our new edges
  G1.add_edge(e[0], e[1])
  counter += 1
  if counter > t_0:
    # We now take our initial measurements
    t_0 = G.size() + 1
    commons = {}
    for v in G.nodes():
      for u in G.nodes():
        if v > u and G1.has_edge(v, u) == False:
          # For edge that doesn't exist, find the number of common neighbors
          # We expect: higher # common neighbors, the more likely this edge
          # will eventually be created
          k = len(list(nx.common_neighbors(G, v, u)))
          if k > 0:
            commons[(v,u)] = k
          if k > max_k:
            max_k = k

# Using our initial measurements, determine whether that edge was actually
# created. We'll have the total counts of vertex pairs with k common neighbors
# and the actual edge creation counts vs. k for these vertex pairs.
total_counts = [0]*(max_k+1)
edge_counts = [0]*(max_k+1)
for c in commons:
  total_counts[commons[c]] += 1
  if G1.has_edge(c[0], c[1]):
    edge_counts[commons[c]] += 1

# We'll use the two counts above to calculate probability of edge creation vs.
# number of common neighbors. We expect a positive relationship if triadic 
# closure holds on this network. I.e., higher common neighbors -> higher 
# probability that the edge was created.
probs = [0]*(max_k+1)
for i in range(0, max_k+1):
  if total_counts[i] > 0:
    probs[i] = edge_counts[i] / total_counts[i]

# Plot it out. What do we have?
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(probs)
plt.show()

