from random import *
from cs368lib import *
from math import *
from collections import Counter, defaultdict

# Below are naive implementations that you can use to test gradescope
# Please replace with a more memory efficient implementation
# Any variables must be allocated through the API in cs368lib
# Unfortunately, this means many of the nice data structures in python
# (sets, dicts, etc) are not supported. If you find that you need these
# data structures, please post on piazza and I can add the support to the
# cs368lib.py file.

# Weighted matching in the semi-streaming model
# =============================================
# TODO: Modify the code to implement the (2+\eps)-approximation for
# weighted matching as described in the online write-up. Bonus points 
# for topping the scoreboard!

# I've implemented a basic version of the classic greedy algorithm for
# maximum weighted matching below. This is known to give a 2-approx.
# Unfortunately it uses uses O(n^2) memory for large graphs! 

# Input: Stream of edges and their weights (u, v, wt) and an accuracy 
# parameter epsilon. You may assume that the weights are positive and that
# if there are n nodes, then 0 <= u < v < n. You can assume that there are
# no self-loops or repeated edges in the stream.
# Output: A (2+\eps)-approximation for the maximum matching
def wm_streamer(stream, eps):
    # TODO: Modify the code below to give the most memory efficient F0 estimator
    # that you can. Bonus points for topping the scoreboard!
    
    # Copy the edges into our own list (since we are not allowed to alter
    # the input stream).
    L = tracked_list()
    n = tracked_int(0)
    for e in stream:
        L.append([tracked_int(e[0]), tracked_int(e[1]), tracked_double(e[2])])
        # Get number of nodes in the stream
        n = max(n, e[0] + 1, e[1] + 1)
     
    # Sort all the edges by their weight
    L.sort(reverse = True, key = lambda e: e[2])
    M = tracked_list()
    used = tracked_list([0] * n)
    for e in L:
        if used[e[0]] or used[e[1]]:
            continue
        
        # Mark the nodes as used and add to our matching
        M.append([tracked_int(e[0]), tracked_int(e[1])])
        used[e[0]], used[e[1]] = True, True
        
    return M

# Input: Stream of edges and their weights (u, v, wt) and an accuracy 
# parameter epsilon. You may assume that the weights are positive and that
# if there are n nodes, then 0 <= u < v < n. You can assume that there are
# no self-loops or repeated edges in the stream.
# Output: A (2+\eps)-approximation for the maximum matching
def wm_improved_streamer(stream, eps):
    # TODO: Modify the code below to give the most memory efficient F0 estimator
    # that you can. Bonus points for topping the scoreboard!
    
    # Copy the edges into our own list (since we are not allowed to alter
    # the input stream).
    L = tracked_list()
    n = tracked_int(0)
    for e in stream:
        L.append([tracked_int(e[0]), tracked_int(e[1]), tracked_double(e[2])])
        # Get number of nodes in the stream
        n = max(n, e[0] + 1, e[1] + 1)
     
    # Sort all the edges by their weight
    L.sort(reverse = True, key = lambda e: e[2])
    M = tracked_list()
    used = tracked_list([0] * n)
    for e in L:
        if used[e[0]] or used[e[1]]:
            continue
        
        # Mark the nodes as used and add to our matching
        M.append([tracked_int(e[0]), tracked_int(e[1])])
        used[e[0]], used[e[1]] = True, True
        
    return M
    
# The part below is just for you to test your submission locally
# Please delete below before submitting
if __name__ == '__main__':
    # Here's a large random set that we can test on.
    # We generate O(n^2) random edges with random weights between
    # 1 and n for n = 200. (We should hit every node with an edge
    # with high probability. There are some self-loops and repeated
    # edges which the greedy algorithm automatically takes care of).
    # In the judge, you can assume there are no self-loops or repeated
    # edges in the stream.
    n = 123
    L = tracked_list()
    for i in range(n):
        for j in range(i+1, n):
            if random() < 0.1:
                L.append([tracked_int(i), tracked_int(j), tracked_double(randint(1, n))])
    reset_mem()
    
    # Here's how the judge will be estimating the memory in your submission
    M = wm_streamer(L, 0.01)
    print 'Memory units (greedy):', report_mem()
    reset_mem()
    
    value = 0
    for e in M:
        for e_ in L:
            if e[0] == e_[0] and e[1] == e_[1]:
                value += e_[2]
                break
    print 'Matching value:', value