import pickle
import matplotlib.pyplot as plt
import seaborn as sns

"""
Install:
pip install matplotlib
pip install seaborn
pip install pickle

Coding:
all_years is the range [1914, 2014]
get_count(name, year, count_map) is super helpful
If you don't want to use pickle, you can code in create_count_map.py

Ethics considerations:
Only includes US Social Security applicants
Does not include folks who change their name. Structural bias!
SS data does not include names with < 5 people

"""


def main():
	print('Loading data...')
	data = pickle.load(open('data/count_map.pkl', 'rb'))
	all_years = pickle.load(open('data/all_years.pkl', 'rb'))
	all_names = pickle.load(open('data/all_names.pkl', 'rb'))

	# repeatedly run a query
	while True:
		name = input('Enter a name: ')
		if name not in all_names:
			print('Not in dataset')
		else:
			run_name_query(name, all_years, data)

def get_count(name, year, data):
	"""
	Returns the number of babies born in a given year (all genders) 
	who register for US Social Security. Returns 0 if the (name, year)
	pair is not in the dataset
	"""
	key = (name, year)
	if key not in data:
		return 0
	return data[key]

def run_name_query(query, all_years, data):
	"""
	TODO: calculate the conditional probability of age | name and plot it
	"""
	x = []
	y = []

	total_count = 0
	for year in all_years:
		total_count += get_count(query, year, data)
	for year in all_years:
		pr_year = get_count(query, year, data) / total_count
		age = 2021 - year

		x.append(age)
		y.append(pr_year)

	plot_distribution(query, x, y)

def plot_distribution(name, x, y):
	"""
	Plots an x,y distribution as a line. Uses seaborn (sns)
	a library written by Michael Waskom while at Stanford
	"""
	sns.set_theme()

	plt.plot(x, y)
	plt.title(name)
	plt.xlabel('age')
	plt.ylabel('P(Age = age | Name = {})'.format(name))
	plt.show()

if __name__ == '__main__':
	main()