"""
Program to display frequency with which certain values appear within a column of given spreadsheet

Example:
    python3 data_processing.py staff_.csv 3 CS EE SymSys

    Would display a bar chart with the number of people on staff with concentrations in CS, EE and SymSys
    Note: column 3 (not zero-indexed) in the staff_info file is the "concentration" column
    Note: a column might have multiple values, (like column 3 in the last row of staff.csv),
    just test to see if a given value is IN the column, not EQUAL TO column.
"""

import sys

from graphics import Canvas

CANVAS_WIDTH = 1450
CANVAS_HEIGHT = 500

BORDER = 40

def make_bar_chart(canvas, x_labels, bar_heights, y_min, y_max):
    """
    Creates a bar chart on parameter canvas with categories for each item in
    x_labels, bars of heights specified by bar_heights and a y axis
    from y_min to y_max
    """
    # make axes
    canvas.create_line(BORDER, BORDER, BORDER, CANVAS_HEIGHT - BORDER)
    canvas.create_line(BORDER, CANVAS_HEIGHT - BORDER, CANVAS_WIDTH - BORDER, CANVAS_HEIGHT - BORDER)

    # make y-labels
    canvas.create_text(0, CANVAS_HEIGHT - BORDER, str(y_min), anchor='w', font='Courier 24')
    canvas.create_text(0, BORDER, str(y_max), anchor='w', font='Courier 24')

    # make x-labels
    bucket_width = (CANVAS_WIDTH - 2*BORDER) // len(x_labels)
    bucket_margin = bucket_width // 4
    for i in range(len(x_labels)):
        canvas.create_text(BORDER + i*bucket_width, CANVAS_HEIGHT - BORDER, x_labels[i], anchor='nw', font='Courier 24')

    # make bars
    for i in range(len(bar_heights)):
        height = (bar_heights[i] / (y_max - y_min)) * (CANVAS_HEIGHT - 2 * BORDER)
        rect = canvas.create_rectangle(BORDER + i*bucket_width, CANVAS_HEIGHT - BORDER - height, BORDER + (i + 1)*bucket_width - bucket_margin, CANVAS_HEIGHT - BORDER, 'pink')
        canvas.create_text(BORDER + i*bucket_width + bucket_margin, CANVAS_HEIGHT - BORDER - height - BORDER / 2, str(bar_heights[i]), anchor='nw', font='Courier 24')
        canvas.set_outline_color(rect, 'black')
def get_bar_values(filename, col_num, x_labels):
    """
    function reads in the give .csv file (filename) and looks at column
    col_num num in each row. It will return a list of numbers, bar_heights.
    Bar heights is constructed like so: If the column in a row contains any of the x_labels
    strings, increment the corresponding element in barheights.
    See the examples below (and staff_info.csv) for examples
    >>> get_bar_values('staff_info.csv.csv', 1, ['BS', 'MS', 'PHD'])
    [3, 2, 1]
    >>> get_bar_values('staff_info.csv', 0, ['Frankie', 'Ecy', 'Chris'])
    [1, 1, 1]
    >>> get_bar_values('staff_info.csv', 2, ['CS', 'SymSys', 'EE', 'Bio'])
    [2, 1, 1, 0]
    """
    f = open(filename)
    next(f)  # skip line with column names
    # construct our result list to start with 0s for each x_label
    result = []
    for i in range(len(x_labels)):
        result.append(0)

    # read through each line in the file
    # j = 0
    for line in f:
        line = line.strip()
        cols = line.split(',')
        # print(j)
        # get the column whose value we want to check
        interest = cols[col_num]
        # j += 1
        for i in range(len(x_labels)):
            val = x_labels[i]
            if val in interest:
                result[i] += 1
    return result

def main():
    canvas = Canvas(CANVAS_WIDTH, CANVAS_HEIGHT, "Data Science!")

    # first process command line args
    # format: filename col_num(not zero indexed) y_min y_max x_label x_label x_label ...
    args = sys.argv[1:]
    if len(args) < 5:
        print("Please run with at least 5 arguments")
        return # stop right here if we dont have enough args

    filename = args[0]
    col_num = int(args[1]) - 1
    y_min = int(args[2])
    y_max = int(args[3])
    x_labels = args[4:]

    bar_values = get_bar_values(filename, col_num, x_labels)
    make_bar_chart(canvas, x_labels, bar_values, y_min, y_max)
    # leave this in!
    canvas.mainloop()

if __name__ == '__main__':
    main()