from random import *
from cs368lib import *
from math import *
from collections import Counter, defaultdict

# Below are naive implementations that you can use to test gradescope
# Please replace with a more memory efficient implementation
# Any variables must be allocated through the API in cs368lib
# Unfortunately, this means many of the nice data structures in python
# (sets, dicts, etc) are not supported. If you find that you need these
# data structures, please post on piazza and I can add the support to the
# cs368lib.py file.

# PART I: Heavy hitters sketch (140 pts)
# ======================================
# TODO: Modify the code to implement the CountMin and CountSketch sketches
# we've learned in class. Bonus points for topping the scoreboard!
# I've implemented a basic version that uses O(n) memory below. Feel free to
# add any functions you want to the class below. The test harness will only 
# issue calls to "build" and "query".
class BaseSketcher:
    def __init__(self):
        pass
        
    # Input: Stream of integers and their frequencies and an accuracy 
    # parameter epsilon.    
    def build(self, stream, eps):
        pass
        
    # The number of frequency vector of the stream up to additive
    # error epsilon. Let f_i be the frequency of element i in the stream and v
    # be the vector returned. The returned vector v must satisfy
    #           f_i - eps * F1 <= v(i) <= f_i + eps * F1
    def query(self, e):
        return 0
        
# A brute force exact solution that takes O(F1) time.
class ExactSketcher(BaseSketcher):
    def __init__(self):
        self.sorted_stream = tracked_sortedlist()
    
    def build(self, stream, eps=0):
        self.sorted_stream.clear()
        for e, c in stream:
            for j in range(c):
                self.sorted_stream.add(e)
        
    def query(self, e):
        return self.sorted_stream.count(e)
        
# CountMin Sketch
# ===============
# TODO: Your code below.
class CMSketcher(BaseSketcher):
    def __init__(self):
        pass
        
    # Input: Stream of integers and their frequencies and an accuracy 
    # parameter epsilon.    
    def build(self, stream, eps):
        pass

    # The number of frequency vector of the stream up to additive
    # error epsilon. Let f_i be the frequency of element i in the stream and v
    # be the vector returned. The returned vector v must satisfy
    #           f_i - eps * F1 <= v(i) <= f_i + eps * F1
    def query(self, e):
        pass

# Count Sketch
# ============
# TODO: Your code below.
class CSSketcher(BaseSketcher):
    def __init__(self):
        pass
        
    # Input: Stream of integers, maximum integer streamed, and an accuracy 
    # parameter epsilon.    
    def build(self, stream, eps):
        pass
        
    # The number of frequency vector of the stream up to additive
    # error epsilon. Let f_i be the frequency of element i in the stream and v
    # be the vector returned. The returned vector v must satisfy
    #           f_i - eps * F1 <= v(i) <= f_i + eps * F1
    def query(self, e):
        pass

# PART II: Adversarial Input Generation (60 pts)
# ==============================================
# TODO: Create two functions below to output streams of at least 10000 distinct 
# integers. The first of these two functions must create a stream where CountSketch
# uses more memory than CountMin (with an eps = 0.04). The second of these functions
# must create a stream where CountMin uses more memory than CountSketch (with an
# eps = 0.04). This part will be graded manually once you submit to GradeScope.
# You will receive full marks as long as the streams you output are adversarial
# for your implementations of CountMin and CountSketch. You may fund the functions 
# `compute_best_memory` and `test_adversarial` useful below.

# NOTE: There is no memory constraint for the two functions below. Feel free to 
# use any python built-in libraries / data structures that you wish. 
def generate_cs_adversarial():
    return []

def generate_cm_adversarial():
    return []
    
# The function below computes, for a given stream and eps, the minimum
# amount of memory a sketch can use to produce a frequency estimate with
# the given epsilon. The function does this through a binary search
# on the eps' we provide when building the sketch, to find the largest
# eps' that has empiral error eps. Since the data structure may fail
# probabilistically, the `check` in the binary search is run 10 times
# (this is the `recheck` parameter) for high probability (feel free to 
# adjust this number).
# NOTE: This function takes an extremely long time to run, so try on
# smaller streams first.
def compute_best_memory(stream, sketch, eps):
    counts = defaultdict(int)
    F1 = 0
    for e, c in stream:
        counts[e] += c
        F1 += c
       
    def check(thresh):
        sketch.build(stream, thresh)
        pos_err, neg_err = 0, 0
        for e in counts:
            pos_err = max(pos_err, sketch.query(e) - counts[e])
            neg_err = max(neg_err, counts[e] - sketch.query(e))
            if pos_err + neg_err >= eps * F1:
                return False
                
        return pos_err + neg_err < eps * F1 
    
    # Binary search on the "true" eps input to the sketch that
    # produces the given epsilon
    l, r = eps, 3
    rechecks = 10
    while abs(r - l) > 1e-3:
        m = (l + r) / 2
        if sum(check(m) for i in range(rechecks)) > rechecks / 2:
            l = m
        else:
            r = m
    
    # Rebuild the data structure again to find the best memory
    reset_mem()
    thresh = (l + r) / 2
    sketch.build(stream, thresh)
    retval = report_mem()
    reset_mem()
    
    return retval

# Use this function to test your adversarial stream implementation
# This function could take a few minutes to run, so please try with smaller 
# streams first.
def test_adversarial():
    CM = CMSketcher()
    CS = CSSketcher()
    
    L0 = generate_cm_adversarial()
    L1 = generate_cs_adversarial()
    
    print 'Memory used (CM, CS):', compute_best_memory(L0, CM, 0.04), compute_best_memory(L0, CS, 0.04)
    print 'Memory used (CM, CS):', compute_best_memory(L1, CM, 0.04), compute_best_memory(L1, CS, 0.04)
    
# The part below is just for you to test your submission locally
# Please delete below before submitting
if __name__ == '__main__':
    # Here's a large random set that we can test on
    maxR = 1234
    nL = 10000
    L = tracked_list((randint(1, maxR), 1) for i in range(nL))
    reset_mem()
    
    # Here's how the judge will be estimating the memory in your submission
    exact = ExactSketcher()
    exact.build(L)
    print 'Memory units (exact):', report_mem()
    reset_mem()
    print [exact.query(i) for i in range(10)]