import util
import matplotlib.pyplot as plt
import numpy as np
import random
import copy
from tqdm import tqdm

ITERATIONS = 10000

def main():
	# load our data
	pop1 = util.load('p1.csv')
	pop2 = util.load('p2.csv')

	# make the universal population
	totalPop = copy.deepcopy(pop1)
	totalPop.extend(pop2)

	# Run a bootstrap experiment
	countDiffGreaterThanObserved = 0
	visualizationDist = []
	print('starting bootstrap')
	for i in tqdm(range(ITERATIONS)):
		# resample and recalculate the statistic
		sample1 = resample(totalPop, len(pop1))
		sample2 = resample(totalPop, len(pop2))
		sampleMean1 = np.mean(sample1)
		sampleMean2 = np.mean(sample2)
		diff = abs(sampleMean2 - sampleMean1)

		# count how many times the statistic is more extreme
		if diff >= 0.7:
			countDiffGreaterThanObserved += 1

		visualizationDist.append(diff)

	# compute the p-value
	p = float(countDiffGreaterThanObserved) / ITERATIONS
	print('p-value:', p)
	plotHistogram(visualizationDist, 100)

def resample(original, n):
	return np.random.choice(original, n, replace=True)

def plotHistogram(x, numBins):
	binWidth = (max(x) - min(x)) / float(numBins)
	bins = np.arange(min(x), max(x) + binWidth, binWidth)
	n, bins, patches = plt.hist(x, bins = bins, facecolor='green', alpha=0.75)
	plt.xlabel('Value')
	plt.ylabel('Count')
	plt.grid(True)
	plt.title('Data Histogram')
	plt.show()

if __name__ == '__main__':
	main()