\documentclass[11pt]{article}
% Problem set: do not define TEMPLATE or SOL
% LaTeX template: define TEMPLATE but not SOL
% Solution: define both TEMPLATE and SOL
%
%\def\sol{1}
\def\template{1}
% STUDENTS:
% You can enter your solutions into this template by just
% typing them right after it says "SOLUTION".
% Ignore the \ifdefined\sol blocks; you can delete them if you want.
% (Note that it won't compile if you try to uncomment the
% solution flag :))
%
\usepackage{fullpage,graphicx}
\usepackage{amsmath,amsfonts,amsthm,amssymb,multirow}
\usepackage{algorithmic,comment,url,enumerate}
\usepackage{tikz}
\usepackage{framed}
\usetikzlibrary{decorations.pathreplacing, shapes}
\usepackage[ruled,vlined,commentsnumbered,titlenotnumbered]{algorithm2e}
\usepackage{mathtools}
\newtheorem{assumption}{Assumption}

\newcommand{\expecting}[1]{\noindent{[\textbf{We are expecting:} \em #1\em]}}
\newcommand{\hint}[1]{\noindent{[\textbf{HINT:} \em #1 \em]}}
\newcommand{\pts}[1]{\textbf{(#1 pt.)}}
\newcommand{\sgn}{\mathrm{sgn}}
\definecolor{shadecolor}{gray}{0.95}
\newcommand{\bE}{\ensuremath{\mathbb{E}}}
\newcommand{\Ex}[1]{\operatorname*{\mathbb{E}}\left[#1\right]}
\newcommand{\R}{\ensuremath{\mathbb{R}}}
\newcommand{\Var}{\operatorname*{Var}}
\newtheorem{theorem}{Theorem}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\note}[1]{\noindent{[\textbf{NOTE:} \em #1 \em]}}
\begin{document}
\noindent
\textbf{Problem Set 2} \hfill CS265, Winter 2025 \newline
Due:  1/31 (Friday) at 11:59pm on Gradescope
% \ifdefined\sol
% % \newline
% % Group members: INSERT NAMES HERE
% \fi
%\today

\vspace{.4cm}\noindent
Please follow the homework policies on the course website.  

\noindent
\rule{\linewidth}{0.4pt}
\begin{enumerate}

%%%%%%%%%%%%%%%% Graph Coloring

\item \pts{11} \textbf{[Graph Coloring]}

The vertices of a simple graph (a graph with no self loops or multiple edges) $G = (V,E)$ are each independently assigned one of three colors: red, green, or blue, chosen uniformly at random.
\begin{enumerate}
    \item \pts{3} Let $n = |V|$ be the number of vertices in the graph. Show that the probability that more than half of the vertices are red is $\exp(-\Omega(n))$.
    \item \pts{4} Let $m=|E|$ be the number of edges in the graph. Show that the probability that more than half of the edges are monochromatic (i.e., both endpoints have the same color) is $O(1/m)$.
    \item \pts{4} Suppose $G$ is a complete graph (that is, all of the ${n \choose 2}$ possible edges are in the graph). Show that the probability that more than half of the edges are monochromatic is $\exp(-\Omega(\sqrt{m}))$.
    \item \pts{0} \textbf{[Optional: This won't be graded.]} Improve the bound from part (b).
\end{enumerate}
\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/graphcol.tex}
\fi
\end{shaded}
\fi

%%%%%%%%%%%%%%%%% Markov and Chebyshev

\item \pts{12} \textbf{[Tightness of Markov's and Chebyshev's Inequalities]}
\begin{enumerate}
\item \pts{4} Show that Markov's inequality is tight. Specifically, for each value $c > 1$, describe a distribution $D_c$ supported on non-negative real numbers such that if the random variable $X$ is drawn according to $D_c$ then 
\begin{itemize}
    \item[(1)] $\bE[X] > 0$, and
    \item[(2)] $\Pr[X \ge c \bE[X]] = 1/c.$ 
\end{itemize}  
\item \pts{4} Show that Chebyshev's inequality is tight. Specifically, for each value $c > 1$, describe a distribution $D_c$ supported on real numbers such that if the random variable $X$ is drawn according to $D_c$ then 
\begin{itemize}
    \item[(1)] $\bE[X] = 0$ and $\Var[X]=1$, and 
    \item[(2)] $\Pr[|X-\bE[X]| \ge c \sqrt{\Var[X]}] = 1/c^2.$
\end{itemize}
\item \pts{4} \textbf{[One-sided version of Chebyshev's Inequality] }
    Consider the following statement: Let $X$ be a random variable with $\text{Var}[X] = 1$. For all $t \geq 0$, \[\Pr[X -\bE[X] \geq t] \leq \frac{1}{1 + t^2}.\] 
    \begin{enumerate}
        \item Prove this statement.
        \item Show that this is tight: For any $t\geq 0$, come up with a random variable $X$ with distribution $D_t$ and variance $1$ for which $\Pr[X - \bE[X] \geq t] = \frac{1}{1 + t^2}$.
    \end{enumerate}

\end{enumerate}

\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/MCtightness.tex}
\fi
\end{shaded}
\fi

\newpage
%%%%%%%%%%%%%%%%%%%% Concentration without Independence

\item \pts{11} \textbf{[Concentration without Independence]}

A computer system has $n$ different failure modes, each of which happens with a small probability. Fortunately, the system is designed to be robust in the following sense: As long as less than half of the failure modes occur, things are fine; otherwise, a large-scale crash will happen. We want to make sure that the probability of crashing is small enough.

To model the above scenario, we define $n$ Bernoulli random variables $X_1, \ldots, X_n$. Each $X_i$ is the indicator of the $i$-th failure mode, i.e., $X_i = 1$ if failure $i$ occurs and $X_i = 0$ otheriwse. Our goal is to upper bound the probability $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right]$.

\begin{enumerate}
\item \pts{2} Let's first assume that the $n$ failure events are independent and the probability of each failure is at most $1/3$. Formally, we have:
\begin{assumption}\label{assump:independence}
    $\Pr[X_i = 1] \le 1/3$ for every $i \in [n]$ and $X_1, \ldots, X_n$ are independent.
\end{assumption}
Prove that under Assumption~\ref{assump:independence}, for some constant $C > 0$ that does not depend on $n$,
\begin{equation}\label{eq:p2-concentration}
    \Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right] \le e^{-Cn}.
\end{equation}
Thus, the probability of a crash is exponentially small in $n$.

\hint{Feel free to use (without proof) any of the Chernoff bounds in lecture note \#5 (including Theorem 2 and Corollaries 5 and 6) and also the inequality $\frac{e^{\delta}}{(1+\delta)^{1+\delta}} \le e^{-\delta^2/3}$ for $\delta \in [0, 1]$.}

\item \label{part:counterexample} \pts{1} Now we decide that Assumption~\ref{assump:independence} is too unrealistic, since many of the failure modes are known to be strongly correlated. Show that only assuming $\Pr[X_i = 1] \le 1/3$ (and not the independence), the probability of crashing can be as large as $\Omega(1)$. In particular, prove that for any $n \ge 2$, there exist random variables $X_1, \ldots, X_n$ that satisfy: (1) $\Pr[X_i = 1] \le 1/3$ for every $i \in [n]$; (2) $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right] \ge 1/3$.

\item \pts{2} Let's try the following relaxation of Assumption~\ref{assump:independence}, which states that the probability for $k$ different failures to occur simultaneously is exponentially small in $k$:
\begin{assumption}\label{assump:group}
    For any $S \subseteq [n]$, $\Pr\left[X_i = 1\text{ for all } i \in S\right] \le (1/3)^{|S|}$.
\end{assumption}

Show that Assumption~\ref{assump:group} is strictly weaker than Assumption~\ref{assump:independence} by proving: 
\begin{enumerate}
\item Assumption~\ref{assump:independence} implies Assumption~\ref{assump:group}; and
\item the implication on the other direction does not hold, i.e., there exist some $n$ and $X_1, \ldots, X_n$ that satisfy Assumption~\ref{assump:group} but not Assumption~\ref{assump:independence}.
\end{enumerate}

\hint{For (ii), there exists a counterexample for $n=2$.} 

\item \pts{6} Prove that under Assumption~\ref{assump:group}, inequality~\eqref{eq:p2-concentration} holds for some constant $C > 0$. In your proof, you can appeal to the proof of the Chernoff bounds from lecture videos/notes if you need to write it out verbatim at some point. For example, if you manage to upper bound $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right]$ by an expression involving the moment-generating function of some random variable $Y$ that is the sum of $n$ independent Bernoulli random variables, you can simply say that ``the rest of the proof is exactly the proof of Theorem 2 from Lecture \#5''.

\hint{Consider independent Bernoulli random variables $Y_1, \ldots, Y_n$ with $Pr[Y_i = 1] = 1/3$ for each $i \in [n]$. For distinct indices $i, j, \ell \in [n]$, does $\Ex{X_iX_jX_\ell} \le \Ex{Y_iY_jY_\ell}$ hold? Can you extend your proof of the inequality to the case with repeating indices?}


\hint{Let $X = \sum_{i=1}^{n}X_i$ and $Y = \sum_{i=1}^{n}Y_i$. What can we say about $\Ex{X^k}$ and $\Ex{Y^k}$ for integer $k \ge 0$? Considering the identity $e^z = \sum_{k=0}^{+\infty}\frac{z^k}{k!}$, what can we say about $\Ex{e^{tX}}$ and $\Ex{e^{tY}}$ for any $t > 0$?}

\item \pts{0} \textbf{[Optional: this won't be graded.]} Can you construct counterexamples for Part~\ref{part:counterexample} that satisfy \emph{pairwise independence} but have a crashing probability of $\Omega(1/n)$? Formally, prove that there exists $C > 0$ such that for any $n \ge 2$, there exist $X_1, \ldots, X_n$ that satisfy: (1) $\Pr[X_i = 1] \le 1/3$; (2) $X_i$ and $X_j$ are independent for distinct $i, j \in [n]$; (3) $\Pr\left[\sum_{i=1}^{n}X_i \ge n/2\right] \ge C/n$.

\note{This shows that unlike Chebyshev's inequality, Chernoff bounds do not hold if we only assume pairwise independence.}

\hint{Recall pairwise independent hash functions if you have seen them before. You can use the Bertrand-Chebyshev theorem, which states that for any integer $n \ge 1$, there exists a prime number $p$ with $n < p < 2n$.}

\end{enumerate}
\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/concentration.tex}
\fi
\end{shaded}
\fi

\end{enumerate}
\end{document}
