% This template was designed by Victor Lin (victorlin@cs.stanford.edu) with edits by Lisa Yan.
% You should compile this with PDFLaTeX (default), not XeLaTeX.
% This is a comment!
% This is the document declaration
\documentclass[12pt]{article}
% these are some useful packages. these add functionality to your document
\usepackage{newtxtext} % sets font
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{mathdots}
\usepackage[pdftex]{graphicx}
\usepackage{fancyhdr}
%\usepackage[margin=1in]{geometry}
\usepackage{multicol}
\usepackage{bm}
\usepackage{listings}
\PassOptionsToPackage{usenames,dvipsnames}{color} %% Allow color names
\usepackage{pdfpages}
\usepackage{algpseudocode}
\usepackage{tikz}
\usepackage{enumitem}
\usepackage[T1]{fontenc}
\usepackage{inconsolata}
\usepackage{framed}
\usepackage[thinlines]{easytable}
\usepackage{minted}
\def\code#1{\texttt{#1}} % add some code in-line
\usepackage{mdframed}
\usepackage{colortbl}
\usepackage{makecell}
\usepackage{amsmath} % math symbols
\usepackage{wrapfig} % in case you want to use a figure
\usepackage{hyperref} % add hyper links
% This package controls the margins of the page.
\usepackage[top=1in, bottom=1in, left=0.8in, right=1in]{geometry}
\usepackage{multicol} % in case you want to use multiple columns
\setlength{\columnsep}{0.1pc}
% document headers!
\title{CS 109: Probability for Computer Scientists\\Problem Set\#2}
\author{author} % Change this to your name!
\date{\today} % today does the right thing
\lhead{author}
\chead{Problem Set \#2}
\rhead{\today}
\newcommand{\abs}[1]{\lvert #1 \rvert}
\newcommand{\absfit}[1]{\left\lvert #1 \right\rvert}
\newcommand{\norm}[1]{\left\lVert #1 \right\rVert}
\newcommand{\eval}[3]{\left[#1\right]_{#2}^{#3}}
\renewcommand{\(}{\left(}
\renewcommand{\)}{\right)}
\newcommand{\floor}[1]{\left\lfloor#1\right\rfloor}
\newcommand{\ceil}[1]{\left\lceil#1\right\rceil}
\newcommand{\pd}[1]{\frac{\partial}{\partial #1}}
\newcommand{\inner}[1]{\langle#1\rangle}
\newcommand{\cond}{\bigg|}
\newcommand{\rank}[1]{\mathbf{rank}(#1)}
\newcommand{\range}[1]{\mathbf{range}(#1)}
\newcommand{\nullsp}[1]{\mathbf{null}(#1)}
\newcommand{\repr}[1]{\left\langle#1\right\rangle}
\DeclareMathOperator{\Var}{Var}
\newcommand{\SD}{\operatorname{SD}}
\newcommand{\Cov}{\operatorname{Cov}}
\DeclareMathOperator{\tr}{tr}
\DeclareMathOperator{\Tr}{\mathbf{Tr}}
\DeclareMathOperator{\diag}{\mathbf{diag}}
\DeclareMathOperator{\dist}{\mathbf{dist}}
\DeclareMathOperator{\prob}{\mathbf{prob}}
\DeclareMathOperator{\dom}{\mathbf{dom}}
\DeclareMathOperator{\E}{\mathbf{E}}
\DeclareMathOperator{\R}{\mathbb{R}}
\DeclareMathOperator{\var}{\mathbf{var}}
\DeclareMathOperator{\quartile}{\mathbf{quartile}}
\DeclareMathOperator{\conv}{\mathbf{conv}}
\DeclareMathOperator{\VC}{VC}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\Uni}{\operatorname{Uni}}
\newcommand{\Ber}{\operatorname{Ber}}
\newcommand{\Bin}{\operatorname{Bin}}
\newcommand{\Geo}{\operatorname{Geo}}
\newcommand{\NegBin}{\operatorname{NegBin}}
\newcommand{\Zipf}{\operatorname{Zipf}}
\newcommand{\HypG}{\operatorname{HypG}}
\newcommand{\Poi}{\operatorname{Poi}}
\newcommand{\Beta}{\operatorname{Beta}}
\newcommand{\Normal}{\operatorname{\mathcal{N}}}
\newcommand{\N}{\operatorname{\mathcal{N}}}
\newcommand{\Exp}{\operatorname{Exp}}
\DeclareMathOperator{\NP}{\mathbf{NP}}
\DeclareMathOperator{\coNP}{\mathbf{coNP}}
\DeclareMathOperator{\TIME}{\mathsf{TIME}}
\DeclareMathOperator{\polytime}{\mathbf{P}}
\DeclareMathOperator{\PH}{\mathbf{PH}}
\DeclareMathOperator{\SIZE}{\mathbf{SIZE}}
\DeclareMathOperator{\ATIME}{\mathbf{ATIME}}
\DeclareMathOperator{\SPACE}{\mathbf{SPACE}}
\DeclareMathOperator{\ASPACE}{\mathbf{ASPACE}}
\DeclareMathOperator{\NSPACE}{\mathbf{NSPACE}}
\DeclareMathOperator{\Z}{\mathbb{Z}}
\DeclareMathOperator{\EXP}{\mathbf{EXP}}
\DeclareMathOperator{\NEXP}{\mathbf{NEXP}}
\DeclareMathOperator{\NTIME}{\mathbf{NTIME}}
\DeclareMathOperator{\DTIME}{\mathbf{DTIME}}
\DeclareMathOperator{\poly}{poly}
\DeclareMathOperator{\BPP}{\mathbf{BPP}}
\DeclareMathOperator{\ZPP}{\mathbf{ZPP}}
\DeclareMathOperator{\RP}{\mathbf{RP}}
\DeclareMathOperator{\coRP}{\mathbf{coRP}}
\DeclareMathOperator{\BPL}{\mathbf{BPL}}
\DeclareMathOperator{\IP}{\mathbf{IP}}
\DeclareMathOperator{\PSPACE}{\mathbf{PSPACE}}
\DeclareMathOperator{\NPSPACE}{\mathbf{NPSPACE}}
\DeclareMathOperator{\SAT}{\mathsf{SAT}}
\DeclareMathOperator{\NL}{\mathbf{NL}}
\DeclareMathOperator{\PCP}{\mathbf{PCP}}
\DeclareMathOperator{\PP}{\mathbf{PP}}
\DeclareMathOperator{\cost}{cost}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbf{Pr}}
\usepackage{booktabs}
\usepackage{diagbox}
% configure display of tables
\newcommand{\ra}[1]{\renewcommand{\arraystretch}{#1}}
\newcommand\tab[1][1cm]{\hspace*{#1}}
\newcommand\tabhead[1]{\small\textbf{#1}}
\theoremstyle{definition}
\newtheorem*{answer}{Answer}
\definecolor{shadecolor}{gray}{0.95}
\setlength{\parindent}{0pt}
\pagestyle{fancy}
\setlength{\headheight}{15pt}
\renewcommand{\thefootnote}{\fnsymbol{footnote}}
% begin the document
\begin{document}
% actually insert the title.
\maketitle
%%%% Cite any collaboration here %%%
Cited collaboration: N/A
\begin{enumerate}
% 1
\item Say in Silicon Valley, 35\% of engineers program in Java and 28\% of the engineers who program in Java also program in C++. Furthermore, 40\% of engineers program in C++.
\begin{enumerate}[label=\alph*.]
\item What is the probability that a randomly selected engineer programs in Java and C++?
\item What is the conditional probability that a randomly selected engineer programs in Java given that they program in C++?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 2
\item A website wants to detect if a visitor is a robot or a human. They give the visitor five CAPTCHA tests that are hard for robots but easy for humans. If the visitor fails one of the tests, they are flagged as a robot. The probability that a human succeeds at a single test is 0.95, while a robot only succeeds with probability 0.3. Assume all tests are independent. The percentage of visitors on this website that are robots is 5\%; all other visitors are human.
\begin{enumerate}[label=\alph*.]
\item If a visitor is actually a robot, what is the probability they get flagged (the probability they fail at least one test)?
\item If a visitor is human, what is the probability they get flagged?
\item Suppose a visitor gets flagged. Using your answers from part (a) and (b), what is the probability that the visitor is a robot?
\item If a visitor is human, what is the probability that they pass exactly three of the five tests?
\item Building off of your answer from part (d), what is the probability that a visitor with unknown identity passes exactly three of the five tests?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 3
\item Say all computers either run operating system W or X. A computer running operating system W is twice as likely to get infected with a virus as a computer running operating system X. If 70\% of all computers are running operating system W, what percentage of computers infected with a virus are running operating system W?
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 4
\item The Superbowl institutes a new way to determine which team receives the kickoff first. The referee chooses with equal probability one of three coins. Although the coins look identical, they have probability of heads 0.1, 0.5 and 0.9, respectively. Then the referee tosses the chosen coin 3 times. If more than half the tosses come up heads, one team will kick off; otherwise, the other team will kick off. If the tosses resulted in the sequence H, T, H, what is the probability that the fair coin was actually used?
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 5
\item After a long night of programming, you have built a powerful, but slightly buggy, email spam filter. When you don't encounter the bug, the filter works very well, always marking a spam email as SPAM and always marking a non-spam email as GOOD. Unfortunately, your code contains a bug that is encountered 10\% of the time when the filter is run on an email. When the bug is encountered, the filter always marks the email as GOOD. As a result, emails that are actually spam will be erroneously marked as GOOD when the bug is encountered. Let $p$ denote the probability that an email is actually non-spam, and let $q$ denote the conditional probability that an email is non-spam given that it is marked as GOOD by the filter.
\begin{enumerate}[label=\alph*.]
\item Determine $q$ in terms of $p$.
\item Using your answer from part (a), explain mathematically whether $q$ or $p$ is greater. Also, provide an intuitive justification for your answer.
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 6
\item Two cards are randomly chosen without replacement from an ordinary deck of 52 cards. Let $E$ be the event that both cards are Aces. Let $F$ be the event that the Ace of Spades is one of the chosen cards, and let $G$ be the event that at least one Ace is chosen.
\begin{enumerate}[label=\alph*.]
\item Compute $P(E \mid F)$.
\item Are $E$ and $F$ independent? Justify your answer using your response to part (a).
\item Compute $P(E \mid G)$.
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 7
\item Your colleagues in a comp-bio lab have sequenced DNA from a large population in order to understand how a gene ($G$) influences two particular traits ($T_1$ and $T_2$). They find that $P(G) = 0.6$, $P(T_1\|G) = 0.7$, and $P(T_2\|G) = 0.9$. They also observe that if a subject does not have the gene $G$, they express neither $T_1$ nor $T_2$. The probability of a patient having both $T_1$ and $T_2$ given that they have the gene $G$ is 0.63.
\begin{enumerate}[label=\alph*.]
\item Are $T_1$ and $T_2$ conditionally independent given $G$?
\item Are $T_1$ and $T_2$ conditionally independent given $G^C$?
\item What is $P(T_1)$?
\item What is $P(T_2)$?
\item Are $T_1$ and $T_2$ independent?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 8
\item The color of a person's eyes is determined by a pair of eye-color genes, as follows:
\begin{itemize}
\item if both of the eye-color genes are blue-eyed genes, then the person will have blue eyes
\item if one or more of the genes is a brown-eyed gene, then the person will have brown eyes
\end{itemize}
A newborn child independently receives one eye-color gene from each of its parents, and the gene it receives from a parent is equally likely to be either of the two eye-color genes of that parent. Suppose William and both of his parents have brown eyes, but William's sister (Claire) has blue eyes. (We assume that blue and brown are the only eye-color genes.)
\begin{enumerate}[label=\alph*.]
\item What is the probability that William possesses a blue-eyed gene?
\item Suppose that William's wife has blue eyes. What is the probability that their first child will have blue eyes?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 9
\item Consider the following algorithm for betting in roulette (\url{https://en.wikipedia.org/wiki/Roulette}). At each round (``spin''), you bet \$1 on a color (``red'' or ``black''). If that color comes up on the wheel, you keep your bet AND win \$1; otherwise, you lose your bet.
\begin{enumerate}[label=\roman*.]
\item Bet \$1 on ``red''
\item If ``red'' comes up on the wheel (with probability 18/38), then you win \$1 (and keep your original \$1 bet) and you \textbf{immediately} quit (i.e., you do not do step (iii) below).
\item If ``red'' did not come up on the wheel (with probability 20/38), then you lose your initial \$1 bet. But, then you bet \$1 on ``red'' on \textit{each} of the next \textbf{two} spins of the wheel. After those two spins, you quit (no matter what the outcome of the next two spins).
\end{enumerate}
Let $X$ denote your ``winnings'' when you quit, i.e., the total amount of money won minus any amounts lost while playing. This value may be negative.
\begin{enumerate}
\item Determine $P(X > 0)$.
\item Determine $E[X]$. (Rhetorical question: Would you play this game?)
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 10
\item See Problem Set \#2 for a full write-up of this problem. You will submits parts (a) and (b) as code in Gradescope, and write up answers to parts (c) and (d) (and optionally part (e)) in this document.
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 11
\item \textbf{[Written, Extra Credit]} Suppose we want to write an algorithm \texttt{fairRandom} for randomly generating a 0 or a 1 with equal probability (= 0.5). Unfortunately, all we have available to us is a function:
~~~~\texttt{def unknownRandom() -> int}
that randomly generates bits, where on each call a 1 is returned with some unknown probability $p$ that need not be equal to 0.5 (and a 0 is returned with probability $1 - p$).
Consider the following algorithm for \texttt{fairRandom}:
\begin{minted}[frame=single]{python}
def fairRandom():
r1, r2 = 0, 0 # set r1 = 0 and r2 = 0
while True:
r1 = unknownRandom()
r2 = unknownRandom()
if (r1 != r2): break
return r2
\end{minted}
\begin{enumerate}[label=\alph*.]
\item Show mathematically that \texttt{fairRandom} does indeed return a 0 or a 1 with equal probability.
\vfill
\begin{center}
\textcolor{gray}{\textit{Continued on next page\dots}}
\end{center}
\newpage
\item Say we want to simplify the function, so we write the \texttt{simpleRandom} function below. Would the \texttt{simpleRandom} function also generate 0's and 1's with equal probability? Explain why or why not. Determine P(\texttt{simpleRandom} returns 1) in terms of $p$.
\end{enumerate}
\begin{minted}[frame=single]{python}
def simpleRandom():
r1, r2 = 0, 0 # set r1 = 0 and r2 = 0
r1 = unknownRandom()
while True:
r2 = unknownRandom()
if (r1 != r2): break
return r2
\end{minted}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 12
\item \textbf{[Coding, Extra Credit]} Please submit your article as code through Gradescope. No need to write anything here.
\end{enumerate}
\end{document}