\documentclass[11pt]{article}
% Problem set: do not define TEMPLATE or SOL
% LaTeX template: define TEMPLATE but not SOL
% Solution: define both TEMPLATE and SOL
%\def\sol{1}
%\def\template{1}
\usepackage{fullpage,graphicx}
\usepackage{amsmath,amsfonts,amsthm,amssymb,multirow}
\usepackage{algorithmic,comment,url,enumerate}
\usepackage{tikz}
\usepackage{framed}
\usetikzlibrary{decorations.pathreplacing, shapes}
\usepackage[ruled,vlined,commentsnumbered,titlenotnumbered]{algorithm2e}
\newcommand{\expecting}[1]{\noindent{[\textbf{We are expecting:} \em #1\em]}}
\newcommand{\hint}[1]{\noindent{[\textbf{HINT:} \em #1 \em]}}
\newcommand{\pts}[1]{\textbf{(#1 pt.)}}
\newcommand{\sgn}{\mathrm{sgn}}
\definecolor{shadecolor}{gray}{0.95}
\newtheorem{assumption}{Assumption}
\newcommand{\eps}{\epsilon}
\newcommand{\Ex}[1]{\operatorname*{\mathbb{E}}\left[#1\right]}
\newcommand{\N}{\mathbb{N}}
\newcommand{\note}[1]{\noindent{[\textbf{NOTE:} \em #1 \em]}}
\newcommand{\pr}[1]{\Pr\left[#1\right]}
\newcommand{\R}{\mathbb{R}}
\begin{document}
\noindent
\textbf{Problem Set 3 \ifdefined\sol Solution \fi} \hfill CS265, Fall 2020
\ifdefined\sol\else
\newline
Due: October 9 (Friday) at 23:59 (Pacific Time)
\ifdefined\template
\newline
Group members: INSERT NAMES HERE
\fi
%\today
\vspace{.4cm}\noindent
Please follow the homework policies on the course website.
\fi
\noindent
\rule{\linewidth}{0.4pt}
\begin{enumerate}
\item \pts{7} \textbf{Sub-Gaussian Properties}
We say that a random variable $X$ is $\sigma$-sub-Gaussian for $\sigma > 0$ if the following holds:
\[\Ex{e^{t(X - \Ex{X})}} \le e^{\sigma^2t^2/2} \text{ for all } t \in \R,\]
i.e., the moment-generating function of $X - \Ex{X}$ is upper bounded by $e^{\sigma^2t^2/2}$ at every point. Recall from lecture videos that if $X$ follows the Gaussian distribution $N(\mu, \sigma^2)$, the moment-generating function of $X - \Ex{X}$ is exactly $e^{\sigma^2t^2/2}$, so the term ``sub-Gaussian'' makes sense.
\begin{enumerate}
\item \pts{2} Suppose that independent random variables $X$ and $Y$ are $\sigma_X$-sub-Gaussian and $\sigma_Y$-sub-Gaussian respectively. Prove that $X + Y$ is $\sigma$-sub-Gaussian for $\sigma = \sqrt{\sigma_X^2 + \sigma_Y^2}$.
\item \pts{3} Let $X$ be $\sigma$-sub-Gaussian. Prove the following concentration inequality for $X$:
\[
\Pr[|X - \Ex{X}| \ge \eps] \le 2\exp\left(-\frac{\eps^2}{2\sigma^2}\right) \text{ for any } \eps > 0.
\]
\hint{Mimic the proof of Chernoff bounds and upper bound the moment-generating function of $X - \Ex{X}$ using the sub-Gaussian property of $X$.}
\item \pts{2} \emph{Hoeffding's lemma} states that any random variable $X$ that satisfies $X \in [a, b]$ almost surely is $\frac{b-a}{2}$-sub-Gaussian. Using Hoeffding's lemma without proof, prove Hoeffding's Inequality: Suppose that $X_1, \ldots, X_n$ are independent random variables with $X_i \in [a_i, b_i]$ almost surely for each $i \in [n]$. Then for any $\eps > 0$,
\[
\Pr\left[\left|\sum_{i=1}^{n}X_i - \sum_{i=1}^{n}\Ex{X_i}\right| \ge \eps\right] \le 2\exp\left(\frac{-2\eps^2}{\sum_{i=1}^{n}(b_i - a_i)^2}\right).
\]
\end{enumerate}
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/sub-gaussian.tex}
\fi
\end{shaded}
\fi
\item \pts{11} \textbf{Concentration without Independence}
A computer system has $n$ different failure modes, each of which happens with a small probability. Fortunately, the system is designed to be sufficiently robust in the following sense: as long as less than half of the failures occur, things are fine; otherwise, a large-scale crash will happen. We want to make sure that the probability of crashing is small enough.
To model the above scenario, we define $n$ Bernoulli random variables $X_1, \ldots, X_n$. Each $X_i$ is the indicator of the $i$-th failure mode, i.e., $X_i = 1$ if failure $i$ occurs and $X_i = 0$ otheriwse. Our goal is to upper bound the probability $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right]$.
\begin{enumerate}
\item \pts{2} Let's first assume that the $n$ failure events are independent and the probability of each failure is at most $1/3$. Formally, we have:
\begin{assumption}\label{assump:independence}
$\Pr[X_i = 1] \le 1/3$ for every $i \in [n]$ and $X_1, \ldots, X_n$ are independent.
\end{assumption}
Prove that under Assumption~\ref{assump:independence}, for some constant $C > 0$ that does not depend on $n$,
\begin{equation}\label{eq:p2-concentration}
\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right] \le e^{-Cn}.
\end{equation}
Thus, the probability of a crash is exponentially small in $n$.
\hint{Feel free to use (without proof) any of the Chernoff bounds in lecture note \#5 (including Theorem 2 and Corollaries 5 and 6) and also the inequality $\frac{e^{\delta}}{(1+\delta)^{1+\delta}} \le e^{-\delta^2/3}$ for $\delta \in [0, 1]$.}
\item \label{part:counterexample} \pts{1} Now we decide that Assumption~\ref{assump:independence} is too unrealistic, since many of the failure modes are known to be strongly correlated. Show that only assuming $\Pr[X_i = 1] \le 1/3$ (and not the independence), the probability of crashing can be as large as $\Omega(1)$. In particular, prove that for any $n \ge 1$, there exist random variables $X_1, \ldots, X_n$ that satisfy: (1) $\Pr[X_i = 1] \le 1/3$ for every $i \in [n]$; (2) $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right] \ge 1/3$.
\item \pts{2} Let's try the following relaxation of Assumption~\ref{assump:independence}, which states that the probability for $k$ different failures to occur simultaneously is exponentially small in $k$:
\begin{assumption}\label{assump:group}
For any $S \subseteq [n]$, $\Pr\left[X_i = 1\text{ for all } i \in S\right] \le (1/3)^{|S|}$.
\end{assumption}
Show that Assumption~\ref{assump:group} is strictly weaker than Assumption~\ref{assump:independence} by proving: (1) Assumption~\ref{assump:independence} implies Assumption~\ref{assump:group}; (2) the implication on the other direction does not hold, i.e., there exist some $n$ and $X_1, \ldots, X_n$ that satisfy Assumption~\ref{assump:group} but not Assumption~\ref{assump:independence}.
\hint{For (2), there exists a counterexample for $n=2$.}
\item \pts{6} Prove that under Assumption~\ref{assump:group}, inequality~\eqref{eq:p2-concentration} holds for some constant $C > 0$. In your proof, you can appeal to the proof of the Chernoff bounds from lecture videos/notes if you need to write it out verbatim at some point. For example, if you manage to upper bound $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right]$ by an expression involving the moment-generating function of some random variable $Y$ that is the sum of $n$ independent Bernoulli random variables, you can simply say that ``the rest of the proof is exactly the proof of Theorem 2 from Lecture \#5''.
\hint{Consider independent Bernoulli random variables $Y_1, \ldots, Y_n$ with $Pr[Y_i = 1] = 1/3$ for each $i \in [n]$. For distinct indices $i, j, \ell \in [n]$, does $\Ex{X_iX_jX_\ell} \le \Ex{Y_iY_jY_\ell}$ hold? Can you extend your proof of the inequality to the case with repeating indices?}
\hint{Let $X = \sum_{i=1}^{n}X_i$ and $Y = \sum_{i=1}^{n}Y_i$. What can we say about $\Ex{X^k}$ and $\Ex{Y^k}$ for integer $k \ge 0$? Considering the identity $e^z = \sum_{k=0}^{+\infty}\frac{z^k}{k!}$, what can we say about $\Ex{e^{tX}}$ and $\Ex{e^{tY}}$ for any $t > 0$?}
\item \pts{0} \textbf{[Optional: this won't be graded.]} Can you construct counterexamples for Part~\ref{part:counterexample} that satisfy \emph{pairwise independence} but have a crashing probability of $\Omega(1/n)$? Formally, prove that there exists $C > 0$ such that for any $n \ge 2$, there exist $X_1, \ldots, X_n$ that satisfy: (1) $\Pr[X_i = 1] \le 1/3$; (2) $X_i$ and $X_j$ are independent for distinct $i, j \in [n]$; (3) $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right] \ge C/n$.
\note{This shows that unlike Chebyshev's inequality, Chernoff bounds do not hold if we only assume pairwise independence.}
\hint{Recall pairwise independent hash functions if you have seen them before. You can use the Bertrand-Chebyshev theorem, which states that for any integer $n \ge 1$, there exists a prime number $p$ with $n < p < 2n$.}
\end{enumerate}
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/concentration.tex}
\fi
\end{shaded}
\fi
\item \pts{8} \textbf{Poisson Approximation}
Suppose that $n$ balls are thrown into $n$ bins independently and uniformly at random. Let random variable $X_i$ denote the number of balls that end up in the $i$-th bin.
\begin{enumerate}
\item \pts{1}
Consider $\Pr[X_1 = \cdots = X_n = 1]$, the probability that each bin receives exactly one ball. Explain why this is equal to $n!/n^n$.
\item \pts{3} Alternatively, we can approximate the above probability by replacing $X_1, \ldots, X_n$ with independent Poisson random variables $Y_1, \ldots, Y_n \sim \mathrm{Poi}(1)$. Find $\Pr[Y_1 = \cdots = Y_n = 1]$, and prove that it is smaller than the actual probability by a factor of $\Theta(\sqrt{n})$, i.e., for some constants $C_1, C_2 > 0$:
\begin{equation}\label{eq:p3-special}
C_1\sqrt{n}
\le \frac{\Pr[X_1 = \cdots = X_n = 1]}{\Pr[Y_1 = \cdots = Y_n = 1]}
\le C_2\sqrt{n}.
\end{equation}
\hint{Stirling's approximation $\sqrt{2\pi}n^{n+1/2}e^{-n} \le n! \le en^{n+1/2}e^{-n}$ might be handy.}
\item \pts{0} While the above approximation is off by a $\Theta(\sqrt{n})$ factor, we will show in the remainder of this problem that the previous example is essentially the worst case: informally,
$\Ex{ \text{ \em any \em function of $X_1, \ldots, X_n$} }$ is within a $\Theta(\sqrt{n})$ factor of the corresponding expression for $Y_1, \ldots, Y_n$.
We'll state that formally (and you'll prove it!) in the next part, but we'll need the following two facts about $(X_1)_{i=1}^n$ and $(Y_i)_{i=1}^n$:
\begin{itemize}
\item $\Pr[Y_1 + \cdots + Y_n = n] \ge \frac{1}{e\sqrt{n}}$.
\item Conditioning on $Y_1 + \cdots + Y_n = n$, the distribution of $(Y_1, \ldots, Y_n)$ is the same as that of $(X_1, \ldots, X_n)$.
\end{itemize}
You may assume these facts without proof for the next part.
\note{There is no question here, we're just stating the above facts.}
\item \pts{4} Let $\N$ denote $\{0, 1, 2,\ldots \}$ and $f: \N^n \to [0, +\infty)$ be an arbitrary function that maps $n$ nonnegative integers to a nonnegative real number. Random variables $(X_i)_{i=1}^{n}$ and $(Y_i)_{i=1}^{n}$ are defined as above. Prove the following inequality:
\begin{equation}\label{eq:p3-general}
\frac{\Ex{f(X_1, \ldots, X_n)}}{\Ex{f(Y_1, \ldots, Y_n)}}
\le e\sqrt{n}.
\end{equation}
\note{This states that the Poisson approximation $\Ex{f(Y)}$ underestimates $\Ex{f(X)}$ by a factor of at most $O(\sqrt{n})$. When the function $f(t_1, \ldots, t_n)$ is chosen to be the indicator of $t_1=\cdots=t_n=1$, $\Ex{f(X)}$ and $\Ex{f(Y)}$ are exactly $\Pr[X_1 = \cdots = X_n = 1]$ and $\Pr[Y_1 = \cdots = Y_n = 1]$, and the bound \eqref{eq:p3-general} is consistent with inequality~\eqref{eq:p3-special}.}
\item \pts{0} \textbf{[Optional: this part won't be graded]} Prove the facts from part (c).
\end{enumerate}
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/poisson.tex}
\fi
\end{shaded}
\fi
\item \pts{0} \textbf{[Optional: this won't be graded.] Moment vs Chernoff Bounds}
Let $X$ be a non-negative random variable and fix $\eps > 0$. So far we have seen two approaches to upper bounding the tail probability $\Pr[X \ge \eps]$. One is based on the moments of $X$: assuming that we know (either exactly or a good upper bound of) $\Ex{X^1}, \Ex{X^2}, \ldots$, for any integer $k \ge 1$ we have
$\Pr[X \ge \eps] = \Pr[X^k \ge \eps^k] \le \frac{\Ex{X^k}}{\eps^k}$.
Choosing the $k$ that minimizes the right-hand side gives us the best \emph{moment bound}:
\[
\inf_{k\in\mathbb{Z}, k\ge 1}\frac{\Ex{X^k}}{\eps^k}.
\]
Another approach is based on the moment-generating function of $X$: for any $t > 0$, we have
$\Pr[X \ge \eps] = \Pr[e^{tX} \ge e^{t\eps}] \le \frac{\Ex{e^{tX}}}{e^{t\eps}}$.
Similarly, the best \emph{Chernoff bound} is obtained by choosing $t$ optimally:
\[
\inf_{t > 0}\frac{\Ex{e^{tX}}}{e^{t\eps}}.
\]
Prove that the best \emph{moment bound} is always as good as the best \emph{Chernoff bound}, i.e.,
\[
\min\left\{\inf_{k\in\mathbb{Z}, k\ge 1}\frac{\Ex{X^k}}{\eps^k}, 1\right\}
\le
\inf_{t > 0}\frac{\Ex{e^{tX}}}{e^{t\eps}}.
\]
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/moment.tex}
\fi
\end{shaded}
\fi
\end{enumerate}
\end{document}