% This template was designed by Victor Lin (victorlin@cs.stanford.edu) with edits by Lisa Yan.
% You should compile this with PDFLaTeX (default), not XeLaTeX.
% This is a comment!
% This is the document declaration
\documentclass[12pt]{article}
% these are some useful packages. these add functionality to your document
\usepackage{newtxtext} % sets font
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{mathdots}
\usepackage[pdftex]{graphicx}
\usepackage{fancyhdr}
%\usepackage[margin=1in]{geometry}
\usepackage{multicol}
\usepackage{bm}
\usepackage{listings}
\PassOptionsToPackage{usenames,dvipsnames}{color} %% Allow color names
\usepackage{pdfpages}
\usepackage{algpseudocode}
\usepackage{tikz}
\usepackage{enumitem}
\usepackage[T1]{fontenc}
\usepackage{inconsolata}
\usepackage{framed}
\usepackage[thinlines]{easytable}
\usepackage{minted}
\def\code#1{\texttt{#1}} % add some code in-line
\usepackage{mdframed}
\usepackage{colortbl}
\usepackage{makecell}
\usepackage{amsmath} % math symbols
\usepackage{wrapfig} % in case you want to use a figure
\usepackage{hyperref} % add hyper links
% This package controls the margins of the page.
\usepackage[top=1in, bottom=1in, left=0.8in, right=1in]{geometry}
\usepackage{multicol} % in case you want to use multiple columns
\setlength{\columnsep}{0.1pc}
% document headers!
\title{CS 109: Probability for Computer Scientists\\Problem Set\#2}
\author{author} % Change this to your name!
\date{\today} % today does the right thing
\lhead{author}
\chead{Problem Set \#2}
\rhead{\today}
\newcommand{\abs}[1]{\lvert #1 \rvert}
\newcommand{\absfit}[1]{\left\lvert #1 \right\rvert}
\newcommand{\norm}[1]{\left\lVert #1 \right\rVert}
\newcommand{\eval}[3]{\left[#1\right]_{#2}^{#3}}
\renewcommand{\(}{\left(}
\renewcommand{\)}{\right)}
\newcommand{\floor}[1]{\left\lfloor#1\right\rfloor}
\newcommand{\ceil}[1]{\left\lceil#1\right\rceil}
\newcommand{\pd}[1]{\frac{\partial}{\partial #1}}
\newcommand{\inner}[1]{\langle#1\rangle}
\newcommand{\cond}{\bigg|}
\newcommand{\rank}[1]{\mathbf{rank}(#1)}
\newcommand{\range}[1]{\mathbf{range}(#1)}
\newcommand{\nullsp}[1]{\mathbf{null}(#1)}
\newcommand{\repr}[1]{\left\langle#1\right\rangle}
\DeclareMathOperator{\Var}{Var}
\newcommand{\SD}{\operatorname{SD}}
\newcommand{\Cov}{\operatorname{Cov}}
\DeclareMathOperator{\tr}{tr}
\DeclareMathOperator{\Tr}{\mathbf{Tr}}
\DeclareMathOperator{\diag}{\mathbf{diag}}
\DeclareMathOperator{\dist}{\mathbf{dist}}
\DeclareMathOperator{\prob}{\mathbf{prob}}
\DeclareMathOperator{\dom}{\mathbf{dom}}
\DeclareMathOperator{\E}{\mathbf{E}}
\DeclareMathOperator{\R}{\mathbb{R}}
\DeclareMathOperator{\var}{\mathbf{var}}
\DeclareMathOperator{\quartile}{\mathbf{quartile}}
\DeclareMathOperator{\conv}{\mathbf{conv}}
\DeclareMathOperator{\VC}{VC}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\Uni}{\operatorname{Uni}}
\newcommand{\Ber}{\operatorname{Ber}}
\newcommand{\Bin}{\operatorname{Bin}}
\newcommand{\Geo}{\operatorname{Geo}}
\newcommand{\NegBin}{\operatorname{NegBin}}
\newcommand{\Zipf}{\operatorname{Zipf}}
\newcommand{\HypG}{\operatorname{HypG}}
\newcommand{\Poi}{\operatorname{Poi}}
\newcommand{\Beta}{\operatorname{Beta}}
\newcommand{\Normal}{\operatorname{\mathcal{N}}}
\newcommand{\N}{\operatorname{\mathcal{N}}}
\newcommand{\Exp}{\operatorname{Exp}}
\DeclareMathOperator{\NP}{\mathbf{NP}}
\DeclareMathOperator{\coNP}{\mathbf{coNP}}
\DeclareMathOperator{\TIME}{\mathsf{TIME}}
\DeclareMathOperator{\polytime}{\mathbf{P}}
\DeclareMathOperator{\PH}{\mathbf{PH}}
\DeclareMathOperator{\SIZE}{\mathbf{SIZE}}
\DeclareMathOperator{\ATIME}{\mathbf{ATIME}}
\DeclareMathOperator{\SPACE}{\mathbf{SPACE}}
\DeclareMathOperator{\ASPACE}{\mathbf{ASPACE}}
\DeclareMathOperator{\NSPACE}{\mathbf{NSPACE}}
\DeclareMathOperator{\Z}{\mathbb{Z}}
\DeclareMathOperator{\EXP}{\mathbf{EXP}}
\DeclareMathOperator{\NEXP}{\mathbf{NEXP}}
\DeclareMathOperator{\NTIME}{\mathbf{NTIME}}
\DeclareMathOperator{\DTIME}{\mathbf{DTIME}}
\DeclareMathOperator{\poly}{poly}
\DeclareMathOperator{\BPP}{\mathbf{BPP}}
\DeclareMathOperator{\ZPP}{\mathbf{ZPP}}
\DeclareMathOperator{\RP}{\mathbf{RP}}
\DeclareMathOperator{\coRP}{\mathbf{coRP}}
\DeclareMathOperator{\BPL}{\mathbf{BPL}}
\DeclareMathOperator{\IP}{\mathbf{IP}}
\DeclareMathOperator{\PSPACE}{\mathbf{PSPACE}}
\DeclareMathOperator{\NPSPACE}{\mathbf{NPSPACE}}
\DeclareMathOperator{\SAT}{\mathsf{SAT}}
\DeclareMathOperator{\NL}{\mathbf{NL}}
\DeclareMathOperator{\PCP}{\mathbf{PCP}}
\DeclareMathOperator{\PP}{\mathbf{PP}}
\DeclareMathOperator{\cost}{cost}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbf{Pr}}
\usepackage{booktabs}
\usepackage{diagbox}
% configure display of tables
\newcommand{\ra}[1]{\renewcommand{\arraystretch}{#1}}
\newcommand\tab[1][1cm]{\hspace*{#1}}
\newcommand\tabhead[1]{\small\textbf{#1}}
\theoremstyle{definition}
\newtheorem*{answer}{Answer}
\definecolor{shadecolor}{gray}{0.95}
\setlength{\parindent}{0pt}
\pagestyle{fancy}
\setlength{\headheight}{15pt}
\renewcommand{\thefootnote}{\fnsymbol{footnote}}
% begin the document
\begin{document}
% actually insert the title.
\maketitle
\begin{enumerate}
% 1
\item Let $E$ and $F$ be events defined on the same sample space $S$. Prove that:
\[P(EF) \geq P(E) + P(F) - 1\]
(This formula is known as Bonferroni's Inequality.)
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 2
\item Say in Silicon Valley, 35\% of engineers program in Java and 28\% of the engineers who program in Java also program in C++. Furthermore, 40\% of engineers program in C++.
\begin{enumerate}[label=\alph*.]
\item What is the probability that a randomly selected engineer programs in Java and C++?
\item What is the conditional probability that a randomly selected engineer programs in Java given that they program in C++?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 3
\item A website wants to detect if a visitor is a robot. They give the visitor three CAPTCHA tests that are hard for robots but easy for humans. If the visitor fails one of the tests, they are flagged as a robot. The probability that a human succeeds at a single test is 0.95, while a robot only succeeds with probability 0.15. Assume all tests are independent.
\begin{enumerate}[label=\alph*.]
\item If a visitor is actually a robot, what is the probability they get flagged (the probability they fail at least one test)?
\item If a visitor is human, what is the probability they get flagged?
\item The percentage of visitors on the site that are robots is 5\%. Suppose a visitor gets flagged. Using your answers from part (a), what is the probability that the visitor is a robot?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 4
\item Say all computers either run operating system W or X. A computer running operating system W is twice as likely to get infected with a virus as a computer running operating system X. If 70\% of all computers are running operating system W, what percentage of computers infected with a virus are running operating system W?
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 5
\item Two cards are randomly chosen without replacement from an ordinary deck of 52 cards. Let $E$ be the event that both cards are Aces. Let $F$ be the event that the Ace of Spades is one of the chosen cards, and let $G$ be the event that at least one Ace is chosen. Compute:
\begin{enumerate}[label=\alph*.]
\item $P(E \mid F)$
\item $P(E \mid G)$
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 6
\item Two emails are received at a mail server. Suppose that each email is spam with probability 0.8 and that whether each email message is spam is an independent event from the other.
\begin{enumerate}[label=\alph*.]
\item Suppose that you are told that at least one of the two emails is spam. Compute the conditional probability that both emails are spam.
\item Suppose now that one of the emails is randomly (accidentally) forwarded from the server to your account, and you see that this email is spam. What is the probability that both emails originally received by the server are spam in this case? Explain your answer.
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 7
\item After a long night of programming, you have built a powerful, but slightly buggy, email spam filter. When you don't encounter the bug, the filter works very well, always marking a spam email as SPAM and always marking a non-spam email as GOOD. Unfortunately, your code contains a bug that is encountered 10\% of the time when the filter is run on an email. When the bug is encountered, the filter always marks the email as GOOD. As a result, emails that are actually spam will be erroneously marked as GOOD when the bug is encountered. Let $p$ denote the probability that an email is actually non-spam, and let $q$ denote the conditional probability that an email is non-spam given that it is marked as GOOD by the filter.
\begin{enumerate}[label=\alph*.]
\item Determine $q$ in terms of $p$.
\item Using your answer from part (a), explain mathematically whether $q$ or $p$ is greater. Also, provide an intuitive justification for your answer.
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 8
\item Consider a hash table with 15 buckets, of which 9 are empty (have no strings hashed to them) and the other 6 buckets are non-empty (have at least one string hashed to each of them already). Now, 2 new strings are independently hashed into the table, where each string is equally likely to be hashed into any bucket. Later, another 2 strings are hashed into the table (again, independently and equally likely to get hashed to any bucket). What is the probability that both of the final 2 strings are each hashed to empty buckets in the table?
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 9
\item Five servers are located in a computer cluster. After one year, each server independently is still working with probability $p$, and otherwise fails (with probability $1 - p$).
\begin{enumerate}[label=\alph*.]
\item What is the probability that \emph{at least} 1 server is still working after one year?
\item What is the probability that \emph{exactly} 2 servers are still working after one year?
\item What is the probability that \emph{at least} 2 servers are still working after one year?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 10
\item The Superbowl institutes a new way to determine which team receives the kickoff first. The referee chooses with equal probability one of three coins. Although the coins look identical, they have probability of heads 0.1, 0.5 and 0.9, respectively. Then the referee tosses the chosen coin 3 times. If more than half the tosses come up heads, one team will kick off; otherwise, the other team will kick off. If the tosses resulted in the sequence H, T, H, what is the probability that the fair coin was actually used?
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 11
\item A robot, which only has a camera as a sensor, can either be in one of two locations: $L_1$ (which does not have a window) or $L_2$ (which has a window). The robot doesn't know exactly where it is and it represents this uncertainty by keeping track of two probabilities: $P(L_1)$ and $P(L_2)$. Based on all past observations, the robot thinks that there is a 0.7 probability it is in $L_1$ and a 0.3 probability that it is in $L_2$.
The robot then observes a window through its camera, and although there is only a window in $L_2$, it can't conclude with certainty that it is in fact in $L_2$, since its image recognition algorithm is not perfect. The probability of observing a window given there is no window at its location is 0.2, and the probability of observing a window given there is a window is 0.9. After incorporating the observation of a window, what are the robot's new probabilities for being in $L_1$ and $L_2$, respectively?
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 12
\item The color of a person's eyes is determined by a pair of eye-color genes, as follows:
\begin{itemize}
\item if both of the eye-color genes are blue-eyed genes, then the person will have blue eyes
\item if one or more of the genes is a brown-eyed gene, then the person will have brown eyes
\end{itemize}
A newborn child independently receives one eye-color gene from each of its parents, and the gene it receives from a parent is equally likely to be either of the two eye-color genes of that parent. Suppose William and both of his parents have brown eyes, but William's sister (Claire) has blue eyes. (We assume that blue and brown are the only eye-color genes.)
\begin{enumerate}[label=\alph*.]
\item What is the probability that William possesses a blue-eyed gene?
\item Suppose that William's wife has blue eyes. What is the probability that their first child will have blue eyes?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 13
\item Your colleagues in a comp-bio lab have sequenced DNA from a large population in order to understand how a gene ($G$) influences two particular traits ($T_1$ and $T_2$). They find that $P(G) = 0.6$, $P(T_1\|G) = 0.7$, and $P(T_2\|G) = 0.9$. They also observe that if a subject does not have the gene $G$, they express neither $T_1$ nor $T_2$. The probability of a patient having both $T_1$ and $T_2$ given that they have the gene $G$ is 0.63.
\begin{enumerate}[label=\alph*.]
\item Are $T_1$ and $T_2$ conditionally independent given $G$?
\item Are $T_1$ and $T_2$ conditionally independent given $G^C$?
\item What is $P(T_1)$?
\item What is $P(T_2)$?
\item Are $T_1$ and $T_2$ independent?
\end{enumerate}
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
% 14
\item See Problem Set \#2 for a full write-up of this problem. You will submit parts a and b as code in Gradescope and write up answers to c and d (and optionally e) in this document.
\begin{shaded}
\begin{answer}
\end{answer}
\end{shaded}
\newpage
\end{enumerate}
\end{document}