#!/usr/bin/env python3

"""
Explore data sets to play with matplotlib
in particular 2020 election data.
This code is complete.
Nick Parlante

Election data from here:
https://github.com/TheUpshot/presidential-precinct-map-2020/blob/main/README.md
"""

import sys

# This is the standard way to import matplotlib
# as the symbol "plt"
import matplotlib.pyplot as plt


# Data file uses a number to identify each county - this is the translation
COUNTIES = {1: 'Alameda', 30: 'Orange', 2: 'Alpine', 31: 'Placer', 3: 'Amador',
            32: 'Plumas', 4: 'Butte', 33: 'Riverside', 5: 'Calaveras', 34: 'Sacramento',
            6: 'Colusa', 35: 'San Benito', 7: 'Contra Costa', 36: 'San Bernardino',
            8: 'Del Norte', 37: 'San Diego', 9: 'El Dorado', 38: 'San Francisco',
            10: 'Fresno', 39: 'San Joaquin', 11: 'Glenn', 40: 'San Luis Obispo',
            12: 'Humboldt', 41: 'San Mateo', 13: 'Imperial', 42: 'Santa Barbara',
            14: 'Inyo', 43: 'Santa Clara', 15: 'Kern', 44: 'Santa Cruz', 16: 'Kings',
            45: 'Shasta', 17: 'Lake', 46: 'Sierra', 18: 'Lassen', 47: 'Siskiyou',
            19: 'Los Angeles', 48: 'Solano', 20: 'Madera', 49: 'Sonoma', 21: 'Marin',
            50: 'Stanislaus', 22: 'Mariposa', 51: 'Sutter', 23: 'Mendocino',
            52: 'Tehama', 24: 'Merced', 53: 'Trinity', 25: 'Modoc', 54: 'Tulare',
            26: 'Mono', 55: 'Tuolumne', 27: 'Monterey', 56: 'Ventura', 28: 'Napa',
            57: 'Yolo', 29: 'Nevada', 58: 'Yuba'}

def read_ca(filename):
    """
    Return dict with county e.g. 'Santa Clara' as key, value is total votes.
    """
    votes = {}
    with open(filename) as f:
        for line in f:
            parts = line.split(',')
            # Skip lines which are not numeric data
            if len(parts) >= 20 and parts[0].isdigit():
                # Have to know what column number is what data
                county_num = int(parts[0])  # 0 is county, 2 is precinct
                county = COUNTIES[county_num]
                # 6 is total votes, 7 is dem, 8 is repub
                tot = int(parts[6])
                if county not in votes:
                    votes[county] = 0
                votes[county] += tot
    return votes


def read_texas(filename):
    """Return dict with county-num as key, value is vote total for that county (tx data)."""
    votes = {}
    with open(filename) as f:
        for line in f:
            # line like: "0010001",1,357,791,13,1,2
            parts = line.split(',')
            if len(parts) == 7 and parts[1].isdigit():
                county = int(parts[1])
                dem = int(parts[2])
                rep = int(parts[3])
                tot = dem + rep  # close estimate: total = dem + rep
                if county not in votes:
                    votes[county] = 0
                votes[county] += tot
    return votes


def read_potato(filename):
    """return dict with country-year as key, value is int tons potato data"""
    # one line: Afghanistan,AFG,1961,130000
    tons = {}
    with open(filename) as f:
        for line in f:
            parts = line.split(',')
            if len(parts) == 4 and parts[3][0].isdigit():
                key = parts[0] + '-' + parts[2]
                value = int(parts[3])
                tons[key] = value
    return tons


def first_digit(num):
    """Return first digit of int"""
    while num >= 10:
        num //= 10
    return num


def first_digits(nums):
    """
    Given list of nums, return dict counting
    how often each first digit appears.
    """
    counts = {}
    for num in nums:
        digit = first_digit(num)
        if digit not in counts:
            counts[digit] = 0
        counts[digit] += 1
    return counts


def last_digits(nums):
    """
    Given list of nums, return dict counting
    how often each last digit appears.
    """
    counts = {}
    for num in nums:
        digit = num % 10
        if digit not in counts:
            counts[digit] = 0
        counts[digit] += 1
    return counts


def plot_ca1(filename):
    """Plot 3 counties - basic matplotlib"""
    votes = read_ca(filename)
    plt.figure(figsize=(8, 4))  # each unit is about 0.5 inch
    x_vals = ['Santa Cruz', 'Santa Clara', 'San Mateo']
    y_vals = [votes['Santa Cruz'],
              votes['Santa Clara'],
              votes['San Mateo']]
    # y_vals ends up as:
    #   [146024, 857609, 377876]
    plt.bar(x_vals, y_vals, color='green')
    plt.title('Votes Per County')
    # plt.xlabel('County')  # could have more titling
    # plt.ylabel('Votes')
    plt.show()


def plot_ca2(filename):
    """Plot more ca counties, using comprehension"""
    votes = read_ca(filename)
    plt.figure(figsize=(8, 4))
    # Expand to 7 bay-area counties
    x_vals = ['Santa Clara', 'San Mateo',
              'Alameda', 'San Francisco',
              'Marin', 'Sonoma', 'Napa']
    # Instead of typing each county again,
    # comprehension pulls each county name
    # out of the x_vals list - nice!
    y_vals = [votes[county] for county in x_vals]
    # y_vals ends up as
    #  [857609, 377876, 777781, 442345, 156801, 268569, 72700]
    plt.bar(x_vals, y_vals, color='green')
    plt.title('Votes Per County')
    plt.show()


def plot_ca3(filename):
    """Plot first digits of all ca counties"""
    votes = read_ca(filename)
    nums = votes.values()
    counts = first_digits(nums)

    plt.figure(figsize=(8, 4))
    x_vals = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
    y_vals = [counts[int(x)] for x in x_vals]
    plt.bar(x_vals, y_vals, color='green')
    plt.title('CA First Digits')
    plt.show()


def plot_tx(filename):
    """Plot first digits of tx counties"""
    votes = read_texas(filename)
    nums = votes.values()
    counts = first_digits(nums)

    plt.figure(figsize=(8, 4))
    x_vals = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
    plt.bar(x_vals, [counts[int(x)] for x in x_vals], linewidth=2, color='green')
    plt.title('TX First Digits')
    plt.show()


def plot_potato(filename):
    tons = read_potato(filename)
    nums = tons.values()
    counts = first_digits(nums)

    plt.figure(figsize=(8, 4))
    x_vals = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
    plt.bar(x_vals, [counts[int(x)] for x in x_vals], linewidth=2, color='green')
    plt.title('Potato Harvest First Digits')
    plt.show()


def main():
    """
    Each flag like -ca corresponds to a graph/data-source, e.g.
    -ca ca-2020-election.csv
    """
    args = sys.argv[1:]

    if len(args) == 2 and args[0] == '-ca':
        counts = read_ca(args[1])
        print(counts)

    if len(args) == 2 and args[0] == '-ca1':
        plot_ca1(args[1])

    if len(args) == 2 and args[0] == '-ca2':
        plot_ca2(args[1])

    if len(args) == 2 and args[0] == '-ca3':
        plot_ca3(args[1])

    if len(args) == 2 and args[0] == '-tx':
        plot_tx(args[1])

    if len(args) == 2 and args[0] == '-potato':
        plot_potato(args[1])


if __name__ == '__main__':
    main()
