% This template was designed by Victor Lin (victorlin@cs.stanford.edu) with edits by Lisa Yan.
% You should compile this with PDFLaTeX (default), not XeLaTeX.

% This is a comment!

% This is the document declaration
\documentclass[12pt]{article}

% these are some useful packages. these add functionality to your document
\usepackage{newtxtext} % sets font
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{mathdots}
\usepackage[pdftex]{graphicx}
\usepackage{fancyhdr}
%\usepackage[margin=1in]{geometry}
\usepackage{multicol}
\usepackage{bm}
\usepackage{listings}
\PassOptionsToPackage{usenames,dvipsnames}{color}  %% Allow color names
\usepackage{pdfpages}
\usepackage{algpseudocode}
\usepackage{tikz}
\usepackage{enumitem}
\usepackage[T1]{fontenc}
\usepackage{inconsolata}
\usepackage{framed}
\usepackage[thinlines]{easytable}
\usepackage{minted}
\def\code#1{\texttt{#1}} % add some code in-line
\usepackage{mdframed}
\usepackage{colortbl}
\usepackage{makecell}
\usepackage{amsmath}  % math symbols
\usepackage{wrapfig}  % in case you want to use a figure
\usepackage{hyperref} % add hyper links

% This package controls the margins of the page.
\usepackage[top=1in, bottom=1in, left=0.8in, right=1in]{geometry}

\usepackage{multicol} % in case you want to use multiple columns
\setlength{\columnsep}{0.1pc}

% document headers!
\title{CS 109: Probability for Computer Scientists\\Problem Set\#6}
\author{author} % Change this to your name!
\date{\today} % today does the right thing

\lhead{author}
\chead{Problem Set \#6}
\rhead{\today}

\newcommand{\abs}[1]{\lvert #1 \rvert}
\newcommand{\absfit}[1]{\left\lvert #1 \right\rvert}
\newcommand{\norm}[1]{\left\lVert #1 \right\rVert}
\newcommand{\eval}[3]{\left[#1\right]_{#2}^{#3}}
\renewcommand{\(}{\left(}
\renewcommand{\)}{\right)}
\newcommand{\floor}[1]{\left\lfloor#1\right\rfloor}
\newcommand{\ceil}[1]{\left\lceil#1\right\rceil}
\newcommand{\pd}[1]{\frac{\partial}{\partial #1}}
\newcommand{\inner}[1]{\langle#1\rangle}
\newcommand{\cond}{\bigg|}
\newcommand{\rank}[1]{\mathbf{rank}(#1)}
\newcommand{\range}[1]{\mathbf{range}(#1)}
\newcommand{\nullsp}[1]{\mathbf{null}(#1)}
\newcommand{\repr}[1]{\left\langle#1\right\rangle}

\DeclareMathOperator{\Var}{Var}
\newcommand{\SD}{\operatorname{SD}}
\newcommand{\Cov}{\operatorname{Cov}}
\DeclareMathOperator{\tr}{tr}
\DeclareMathOperator{\Tr}{\mathbf{Tr}}
\DeclareMathOperator{\diag}{\mathbf{diag}}
\DeclareMathOperator{\dist}{\mathbf{dist}}
\DeclareMathOperator{\prob}{\mathbf{prob}}
\DeclareMathOperator{\dom}{\mathbf{dom}}
\DeclareMathOperator{\E}{\mathbf{E}}
\DeclareMathOperator{\R}{\mathbb{R}}
\DeclareMathOperator{\var}{\mathbf{var}}
\DeclareMathOperator{\quartile}{\mathbf{quartile}}
\DeclareMathOperator{\conv}{\mathbf{conv}}
\DeclareMathOperator{\VC}{VC}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\Uni}{\operatorname{Uni}}
\newcommand{\Ber}{\operatorname{Ber}}
\newcommand{\Bin}{\operatorname{Bin}}
\newcommand{\Geo}{\operatorname{Geo}}
\newcommand{\NegBin}{\operatorname{NegBin}}
\newcommand{\Zipf}{\operatorname{Zipf}}
\newcommand{\HypG}{\operatorname{HypG}}
\newcommand{\Poi}{\operatorname{Poi}}
\newcommand{\Beta}{\operatorname{Beta}}
\newcommand{\Normal}{\operatorname{\mathcal{N}}}
\newcommand{\N}{\operatorname{\mathcal{N}}}
\newcommand{\Exp}{\operatorname{Exp}}
\DeclareMathOperator{\NP}{\mathbf{NP}}
\DeclareMathOperator{\coNP}{\mathbf{coNP}}
\DeclareMathOperator{\TIME}{\mathsf{TIME}}
\DeclareMathOperator{\polytime}{\mathbf{P}}
\DeclareMathOperator{\PH}{\mathbf{PH}}
\DeclareMathOperator{\SIZE}{\mathbf{SIZE}}
\DeclareMathOperator{\ATIME}{\mathbf{ATIME}}
\DeclareMathOperator{\SPACE}{\mathbf{SPACE}}
\DeclareMathOperator{\ASPACE}{\mathbf{ASPACE}}
\DeclareMathOperator{\NSPACE}{\mathbf{NSPACE}}
\DeclareMathOperator{\Z}{\mathbb{Z}}
\DeclareMathOperator{\EXP}{\mathbf{EXP}}
\DeclareMathOperator{\NEXP}{\mathbf{NEXP}}
\DeclareMathOperator{\NTIME}{\mathbf{NTIME}}
\DeclareMathOperator{\DTIME}{\mathbf{DTIME}}
\DeclareMathOperator{\poly}{poly}
\DeclareMathOperator{\BPP}{\mathbf{BPP}}
\DeclareMathOperator{\ZPP}{\mathbf{ZPP}}
\DeclareMathOperator{\RP}{\mathbf{RP}}
\DeclareMathOperator{\coRP}{\mathbf{coRP}}
\DeclareMathOperator{\BPL}{\mathbf{BPL}}
\DeclareMathOperator{\IP}{\mathbf{IP}}
\DeclareMathOperator{\PSPACE}{\mathbf{PSPACE}}
\DeclareMathOperator{\NPSPACE}{\mathbf{NPSPACE}}
\DeclareMathOperator{\SAT}{\mathsf{SAT}}
\DeclareMathOperator{\NL}{\mathbf{NL}}
\DeclareMathOperator{\PCP}{\mathbf{PCP}}
\DeclareMathOperator{\PP}{\mathbf{PP}}
\DeclareMathOperator{\cost}{cost}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbf{Pr}}

\usepackage{booktabs}
\usepackage{diagbox}
  % configure display of tables
\newcommand{\ra}[1]{\renewcommand{\arraystretch}{#1}}
\newcommand\tab[1][1cm]{\hspace*{#1}}
\newcommand\tabhead[1]{\small\textbf{#1}}

\theoremstyle{definition}
\newtheorem*{answer}{Answer}

\definecolor{shadecolor}{gray}{0.95}

\setlength{\parindent}{0pt}

\pagestyle{fancy}
\setlength{\headheight}{15pt}

\renewcommand{\thefootnote}{\fnsymbol{footnote}}

% begin the document
\begin{document}

% actually insert the title.
\maketitle
  
\begin{enumerate}
% 1
\item We are writing a WebMD program that is slightly larger than the one we worked through in class. In this program we  predict whether a user has a flu ($F=1$) or cold ($C = 1$) based on knowing any subset of 10 potential binary symptoms (e.g., headache, sniffles, fatigue, cough, etc) and a subset of binary risk factors (exposure, stress). 

\vspace{-3mm}
% to include this image with your code:
% 1) download the website image: http://web.stanford.edu/class/cs109/img/psets/pset5_webmd.png
% 2) save the image to the same folder as this file (e.g., upload to overleaf)
% otherwise just comment out everything from \begin{figure} to \end{figure}.
\begin{figure}[h]
\centering
\includegraphics[width=0.85\textwidth]{pset5_webmd.png}
\end{figure}

\vspace{-5mm}
\begin{itemize}
\item We know the prior probability for Stress is 0.5 and Exposure is 0.1. 
 
\item The functions \code{probCold(s, e)} and \code{probFlu(s, e)}  return the probability that a patient has a cold or flu, given the state of the risk factors stress (\code{s}) and exposure (\code{e}).
 
\item The function \code{probSymptom(i, f, c)} which returns the probability that the \code{i}th symptom ($X_i$) takes on value 1, given the state of cold (\code{c}) and flu (\code{f}): $P(X_i = 1 | F = f, C = c)$.
\end{itemize}

We would like to write pseudocode to calculate the probability of flu \emph{conditioned on observing} that the patient has had exposure to a sick friend and that they are experiencing Symptom 2 (sore throat). In terms of random variables $P$(Flu = 1 | Exposure = 1, X$_2$ = 1):
\begin{mdframed}
\code{def inferProbFlu()   \#}  $P$(Flu = 1 | Exposure = 1 and $X_2$ = 1)
\end{mdframed}

Write pseudocode that calculates \code{inferProbFlu()} using \textbf{Rejection Sampling}.

    \begin{shaded}
    \begin{answer}

    \end{answer}
    \end{shaded}
    \newpage


% 2
\item Consider the Exponential distribution. It is your friend \dots really. Specifically, consider a sample of I.I.D.\ exponential random variables $X_1, X_2, \dotsc, X_n$, where each $X_i \sim \Exp(\lambda)$. Derive the maximum likelihood estimate for the parameter $\lambda$ in the Exponential distribution.

    \begin{shaded}
    \begin{answer}

    \end{answer}
    \end{shaded}
    \newpage


% 3
\item Say you have a set of binary input features/variables $X_1, X_2, \dotsc, X_m$ that can be used to make a prediction about a discrete binary output variable $Y$ (i.e., each of the $X_i$ as well as $Y$ can only take on the values 0 or 1). Say that the first $k$ input variables $X_1, X_2, \dotsc, X_k$ are actually all identical copies of each other, so that when one has the value 0 or 1, they all do.  Explain informally, but precisely, why this may be problematic for the model learned by the Na\"{i}ve Bayes classifier.

    \begin{shaded}
    \begin{answer}

    \end{answer}
    \end{shaded}
    \newpage

% 4 SUBMIT YOUR CODE FOR THIS PROBLEM ON GRADESCOPE
\item Implement a Na\"{i}ve Bayes classifier. Detailed instructions are provided in the comments of the starter code.

\begin{enumerate}[label=\alph*.]
    \item \textbf{[Coding]} Implement the function \texttt{fit} in \texttt{naive\_bayes.py}.
    \item \textbf{[Coding]} Implement the function \texttt{predict} in \texttt{naive\_bayes.py}.
\end{enumerate}


% 5 SUBMIT YOUR CODE FOR THIS PROBLEM ON GRADESCOPE
\item Implement a Logistic Regression classifier. Specifically, you should implement the gradient ascent algorithm described in class. Detailed instructions are provided in the comments of the starter code.

\begin{enumerate}[label=\alph*.]
    \item \textbf{[Coding]} Implement the function \verb|fit| in \verb|logistic_regression.py|.
    \item \textbf{[Coding]} Implement the function \verb|predict| in \verb|logistic_regression.py|.
\end{enumerate}


\end{enumerate}


\end{document}
  