\documentclass[11pt]{article}
% Problem set: do not define TEMPLATE or SOL
% LaTeX template: define TEMPLATE but not SOL
% Solution: define both TEMPLATE and SOL
%\def\sol{1}
%\def\template{1}
\usepackage{fullpage,graphicx}
\usepackage{mathtools,amsfonts,amsthm,amssymb,multirow}
\usepackage{algorithmic,comment,url,enumerate}
\usepackage{tikz}
\usepackage{framed}
\usetikzlibrary{decorations.pathreplacing, shapes}
\usepackage[ruled,vlined,commentsnumbered,titlenotnumbered]{algorithm2e}
\newcommand{\expecting}[1]{\noindent{[\textbf{We are expecting:} \em #1\em]}}
\newcommand{\hint}[1]{\noindent{[\textbf{HINT:} \em #1 \em]}}
\newcommand{\pts}[1]{\textbf{(#1 pt.)}}
\newcommand{\sgn}{\mathrm{sgn}}
\definecolor{shadecolor}{gray}{0.95}
\newtheorem{claim}{Claim}
\newcommand{\eps}{\epsilon}
\newcommand{\Ex}[1]{\operatorname*{\mathbb{E}}\left[#1\right]}
\newcommand{\note}[1]{\noindent{[\textbf{NOTE:} \em #1 \em]}}
\newcommand{\pmin}{p_{\textrm{min}}}
\newcommand{\R}{\mathbb{R}}
\begin{document}
\noindent
\textbf{Problem Set 7 \ifdefined\sol Solution \fi} \hfill CS265, Fall 2020
\ifdefined\sol\else
\newline
Due: November 13 (Friday) at 23:59 (Pacific Time)
\ifdefined\template
\newline
Group members: INSERT NAMES HERE
\fi
\vspace{.4cm}\noindent
Please follow the homework policies on the course website.
\fi
\noindent
\rule{\linewidth}{0.4pt}
\begin{enumerate}
\item \pts{4} \textbf{Coin Tosses Modulo $k$}
A biased coin with probability of heads $p \in (0, 1)$ is tossed repeatedly. Let $X_t$ denote the number of heads obtained in the first $t$ tosses. Prove that for any integer $k \ge 1$ and $0 \le j \le k-1$,
\[
\lim_{t\to+\infty}\Pr[(X_t \bmod k) = j] = \frac{1}{k}.
\]
\hint{If your proof applies the fundamental theorem of Markov chains at some point, make sure to prove that the Markov chain that you defined satisfies all the preconditions.}
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/coin.tex}
\fi
\end{shaded}
\fi
\item \pts{12} \textbf{Fundamental Theorem of Markov Chains: A Special Case}
Let $X_0, X_1, \ldots$ be a Markov chain over $n$ states (labeled $1, 2, \ldots, n$) with transition matrix $P \in \R^{n\times n}$, i.e., for any $t \ge 0$, $\Pr[X_{t+1} = j| X_t = i] = P_{ij}$. In addition, we assume that $P_{ij} > 0$ for all $i, j \in [n]$, and define $\pmin \coloneqq \min_{i,j\in[n]}P_{ij} > 0$. In this problem, we will prove part of the fundamental theorem of Markov chains for this special case. In particular, we will show that there exists a unique stationary distribution $\pi$ such that for all $i, j \in [n]$,
\[
\lim_{t\to+\infty}\Pr[X_t = j|X_0 = i] = \pi_j.
\]
\begin{enumerate}
\item \pts{1} As a warmup, show that the assumption $P_{ij} > 0$ for all $i, j \in [n]$ implies that the Markov chain is irreducible and aperiodic. Thus, the assumption that we made is not weaker than the one in the original theorem.
\item \pts{4} Suppose that vectors $a, b \in \R^n$ satisfy $\sum_{i=1}^{n}a_i = 0$ and $\min_{i\in[n]}b_i \ge \eps > 0$. Prove that $\left|\sum_{i=1}^na_ib_i\right|\le \sum_{i=1}^{n}|a_i|b_i - \frac{\eps}{2}\sum_{i=1}^{n}|a_i|$.
\hint{It might be useful to define $S^+ \coloneqq \{i \in [n]: a_i \ge 0\}$ and $S^- \coloneqq \{i \in [n]: a_i < 0\}$, and then relate $\sum_{i=1}^{n}a_ib_i$ to $\sum_{i\in S^+}|a_i|b_i$ and $\sum_{i\in S^-}|a_i|b_i$.}
\hint{It is possible to prove a stronger bound with the $\frac{\eps}{2}$ factor replaced by $\eps$.}
\item\label{part:1-norm} \pts{2} Let $a = \begin{bmatrix}a_1 & a_2 & \cdots & a_n\end{bmatrix}$ be a row vector that satisfies $\sum_{i=1}^{n}a_i = 0$. Prove that $\|aP\|_1 \le (1 - n\pmin /2)\|a\|_1$.
\hint{Use the previous part.}
\item\label{part:pi} \pts{3} Prove that there exists an $n$-dimensional row vector $\pi = \begin{bmatrix}\pi_1 & \pi_2 & \cdots & \pi_n\end{bmatrix}$ such that: (1) $\pi = \pi P$; (2) $\sum_{i=1}^{n}\pi_i = 1$.
\hint{First prove the existence of a non-zero vector $\pi$ satisfying $\pi = \pi P$, and then show that the second condition can be satisfied by scaling $\pi$. For the first step, you may use the following fact without proof: if $\lambda$ is an eigenvalue of a square matrix $A$, $\lambda$ is also an eigenvalue of $A^T$. Part~\ref{part:1-norm} might be helpful for the second step.}
\item \pts{2} Let $v = \begin{bmatrix}v_1 & v_2 & \cdots & v_n\end{bmatrix}$ be a row vector that satisfies $\sum_{i=1}^nv_i = 1$. Let $\pi$ be a vector chosen as in Part~\ref{part:pi}. Prove that $\lim_{t\to+\infty}vP^t = \pi$. Then, derive that for all $i, j \in [n]$,
\[\lim_{t\to+\infty}\Pr[X_t = j|X_0 = i] = \pi_j.\]
\hint{Apply Part~\ref{part:1-norm} to $(v - \pi), (v - \pi)P, (v - \pi)P^2, \ldots$.}
\item \pts{0} \textbf{[Optional: this won't be graded.]} Extend the proof to the general case, where the Markov chain is irreducible and aperiodic but $P_{ij} > 0$ might not hold.
\end{enumerate}
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/proof.tex}
\fi
\end{shaded}
\fi
\item \pts{14} \textbf{Coupon Collection on Markov Chains}
Let $X_1, X_2, \ldots$ be an irreducible and aperiodic Markov chain over $n \ge 2$ states (labeled $1, 2, \ldots, n$) with a uniformly distributed initial state and transition matrix $P \in \R^{n\times n}$, i.e., $\Pr[X_1 = i] = 1/n$ for all $i \in [n]$ and $\Pr[X_{t+1} = j|X_t = i] = P_{ij}$ for any $t \ge 1$ and $i, j \in [n]$.\footnote{Note that in this problem, the initial state is denoted by $X_1$ (instead of $X_0$, which is used in the lecture notes). This slight change makes the analogy to coupon collecting in Part~\ref{part:coupon} more natural.} Furthermore, we assume that $P$ is \em doubly stochastic, \em which means that $\sum_{i=1}^{n}P_{ij} = 1$ for every $j \in [n]$, as well as
$\sum_{j=1}^{n}P_{ij} = 1$ for $i \in [n]$. (Note that a general transition matrix need only satisfy the second of these).
\begin{enumerate}
\item\label{part:uniform} \pts{1} Prove that the unique stationary distribution of this Markov chain is uniform over $[n]$. Moreover, show that $\Pr[X_t = i] = 1/n$ for every $t \ge 1$ and $i \in [n]$.
\item\label{part:coupon} \pts{5} Let $T$ denote the earliest time when all the states have been visited at least once and the chain returns to the initial state $X_1$. Formally, random variable $T$ is defined as
\[
T = \min\{t\ge 1: X_t = X_1\text{ and } \{X_1, X_2, \ldots, X_t\} = [n].\}
\]
Consider the following argument for bounding $\Ex{T}$:
\begin{quote}\it
The conclusion of Part~\ref{part:uniform} says that each $X_t$ is uniformly distributed over $[n]$. Therefore, $X_1, X_2, \ldots$ can be viewed as the coupons that we draw in the coupon collector's problem. Then, $T$ is exactly the number of coupons needed for first collecting the $n$ coupons and then waiting for coupon $X_1$ to appear again. In expectation, the first part takes $n\sum_{i=1}^{n}\frac{1}{i}$ steps and the second part takes $\frac{1}{1/n} = n$ steps, so $\Ex{T} = n\sum_{i=1}^{n}\frac{1}{i} + n = \Theta(n\log n)$.
\end{quote}
Disprove the above by doing the following: (1) point out the flaw in the reasoning; (2) construct counterexamples to show that $\Ex{T}$ might be as small as $O(n)$; (3) construct counterexamples to show that $\Ex{T}$ might not be upper bounded by any function of $n$.
More concretely, for Part (2), show that for every $n \ge 2$ there exists a Markov chain with $n$ states satisfying all the assumptions and $\Ex{T} \le 2n$. (You will also get full credit if $\Ex{T} \le Cn$ for some other constant $C$.)
For Part (3), show that for every $M > 0$ there exists a Markov chain with $n = 2$ states satisfying all the assumptions and $\Ex{T} \ge M$. (You will also get full credit if the number of states is $n = C$ for some other constant $C$.)
\item \pts{3} Define $\pmin \coloneqq \min_{i,j\in[n]}P_{ij}$. Prove that if $\pmin > 0$, we have $\Ex{T} \le O\left(\frac{\log n}{\pmin}\right)$.
\hint{It can be proved that $\Ex{T} \le 1 + \frac{1}{\pmin}\left(1 + \sum_{i=1}^{n-1}\frac{1}{i}\right)$.}
\item \pts{5} Let $\mu \coloneqq \Ex{T}$. Show that $T$ has a \em sub-exponential tail, \em i.e., that there exist constants $c_0, c_1 > 0$ (that do not depend on $n$ or any other parameters of the Markov chain) such that for every $\lambda \ge c_0$:
\[\Pr[T > \lambda\mu] \le e^{-c_1\lambda}.\]
You may use the following claim without proof:
\begin{claim}\label{claim}
There exists a constant $c_2 > 0$ such that the following holds for any Markov chain with $n$ states that satisfies all the assumptions, and for any $i \in [n]$: Let $M = \lfloor c_2\mu\rfloor$. Conditioning on $X_1 = i$, with probability at least $1 - 1/e$, there exists $t \in [M]$ such that $\{X_1, X_2, \ldots, X_t\} = \{X_{t+1}, X_{t+2}, \ldots, X_M\} = [n]$.
\end{claim}
\hint{You might find the argument at the end of the proof of Theorem 1 in Lecture Note \#13 useful.}
\item \pts{0} \textbf{[Optional: this won't be graded.]} Prove Claim~\ref{claim}.
\end{enumerate}
\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/coupon.tex}
\fi
\end{shaded}
\fi
\end{enumerate}
\end{document}