import networkx as nx
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import math
import random

################################################################################
# Now we're going to use matrix factorization for collaborate filtering.
# Specifically, we're going to use product reviews from an amazon dataset to
# predict person->movie ratings. The Netflix challenge data is far too large
# for demonstrative purposes.


################################################################################
# Below are our training functions. Note the differences with our prior code.
# How we formulated our optimization function for this problems makes 
# calculating our gradient descent updates much faster. 

# Here we're training on available data (nonzeros in our matrix)
def train(A, NZ, U, V, alpha, beta):
	nnz = len(NZ[0])
	for n in range(nnz):
		(i, j) = (NZ[0][n], NZ[1][n])
		eij = A[i,j] - np.dot(U[i,:],V[:,j])
		U[i,:] = U[i,:] + alpha * (2 * eij * V[:,j] - beta * U[i,:])
		V[:,j] = V[:,j] + alpha * (2 * eij * U[i,:] - beta * V[:,j])

# Compute the current error
def compute_error(A, NZ, U, V, beta):
	error = 0
	nnz = len(NZ[0])
	for n in range(nnz):
		(i, j) = (NZ[0][n], NZ[1][n])
		error += pow(A[i,j] - np.dot(U[i,:], V[:,j]), 2)
			
	error = error + beta / 2 * np.linalg.norm(U, 'fro')
	error = error + beta / 2 * np.linalg.norm(V, 'fro')
	return error

# Primary loop for training
def factorize_matrix(A, U, V, iterations, alpha, beta):
	NZ = A.nonzero()
	for i in range(iterations):
		train(A, NZ, U, V, alpha, beta)
		error = compute_error(A, NZ, U, V, beta)
		print(error)
	return U, V.T

################################################################################
# We'll use the amazon video dataset for training and testing
B = nx.read_edgelist("amazon_video.data", create_using=nx.Graph(), comments="%", data=(("rating", float),("time",int)))

# We can extract the users and videos and create an biadjacency matrix from 
# them. Note that this matrix is specifically for bipartite graphs, where a
# nonzero and i,j indicates an edge from vertex i in set B1 to vertex j in set
# B2.
users = nx.bipartite.basic.sets(B)[0]
videos = nx.bipartite.basic.sets(B)[1]
A = nx.bipartite.biadjacency_matrix(B, row_order=users, column_order=videos, weight='rating')

# We'll select a subset of A for testing. We'll grab these values and then
# zero our that index within A.
NZ = A.nonzero()
test_size = 100
test = random.sample([item for item in range(0, len(NZ[0]))],test_size)

test_truths = {}
for t in test:
	i = NZ[0][t]
	j = NZ[1][t]
	test_truths[t] = A[i, j]
	A[i, j] = 0

# We'll also consider a naive predictor for comparison. ideally, we should beat
# this naive predictor with our approach.
p_naive = {}
for t in test:
	user = NZ[0][t]
	video = NZ[1][t]
	p_naive[t] = (A[user,:].data.mean() + A[:,video].data.mean()) / 2

# Set up our parameters and do our business
n = A.shape[0]
m = A.shape[1]
k = 40
U = np.random.rand(n, k)
V = np.random.rand(m, k)

iterations = 10
alpha = 0.01
beta = 0.02
(U, V) = factorize_matrix(A, U, V.T, iterations, alpha, beta)

# Compute our prediction and compare
P = np.dot(U, V.T)
test_error = 0
test_error_naive = 0
for t in test:
	i = NZ[0][t]
	j = NZ[1][t]
	print("Test:", test_truths[t], P[i, j])
	test_error += abs(P[i, j] - test_truths[t])
	test_error_naive += abs(p_naive[t] - test_truths[t])

print("Test Error:", test_error / test_size)
print("Test Error Naive:", test_error_naive / test_size)