\documentclass[11pt]{article}
% Problem set: do not define TEMPLATE or SOL
% LaTeX template: define TEMPLATE but not SOL
% Solution: define both TEMPLATE and SOL
%
% \def\sol{1}
\def\template{1}
% STUDENTS:
% You can enter your solutions into this template by just
% typing them right after it says "SOLUTION".
% Ignore the \ifdefined\sol blocks; you can delete them if you want.
% (Note that it won't compile if you try to uncomment the
% solution flag :))
%
\usepackage{fullpage,graphicx}
\usepackage{amsmath,amsfonts,amsthm,amssymb,multirow}
\usepackage{algorithmic,comment,url,enumerate}
\usepackage{tikz}
\usepackage{framed}
\usepackage{enumitem}
\usetikzlibrary{decorations.pathreplacing, shapes}
\usepackage[ruled,vlined,commentsnumbered,titlenotnumbered]{algorithm2e}

\newcommand{\expecting}[1]{\noindent{[\textbf{We are expecting:} \em #1\em]}}
\newcommand{\hint}[1]{\noindent{[\textbf{HINT:} \em #1 \em]}}
\newcommand{\pts}[1]{\textbf{(#1 pt.)}}
\newcommand{\pr}[1]{\Pr\left[#1\right]}
\definecolor{shadecolor}{gray}{0.95}
\newcommand{\note}[1]{\noindent{[\textbf{NOTE:} \em #1 \em]}}

\newcommand{\N}{\mathbb{N}}
\newcommand{\Ex}[1]{\operatorname*{\mathbb{E}}\left[#1\right]}
\newcommand{\R}{\ensuremath{\mathbb{R}}}
\newcommand{\Var}{\operatorname*{Var}}
\newcommand{\E}{\ensuremath{\mathcal{E}}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\rank}{\ensuremath{\mathrm{rank}}}

\begin{document}
\noindent
\textbf{Problem Set 3 \ifdefined\sol Solution \fi} \hfill CS265/CME309, Winter 2025
\ifdefined\sol\else
\newline
Due: Friday 2/7, at 11:59pm on Gradescope
\ifdefined\template
\newline 
Group members: INSERT NAMES HERE
\fi

%\today
\vspace{.4cm}\noindent
Please follow the homework policies on the course website.
\fi

\noindent
\rule{\linewidth}{0.4pt}

\begin{enumerate}

\item \pts{8} \textbf{Poisson Approximation}

Suppose that $n$ balls are thrown into $n$ bins independently and uniformly at random. Let the random variable $X_i$ denote the number of balls that end up in the $i$-th bin.

\begin{enumerate}
\item \pts{1} 
Consider $\Pr[X_1 = \cdots = X_n = 1]$, the probability that each bin receives exactly one ball.  Explain why this is equal to $n!/n^n$.

\item \pts{3} Alternatively, we can approximate the above probability by replacing $X_1, \ldots, X_n$ with independent Poisson random variables $Y_1, \ldots, Y_n \sim \mathrm{Poi}(1)$. Find $\Pr[Y_1 = \cdots = Y_n = 1]$, and prove that it is smaller than the actual probability by a factor of $\Theta(\sqrt{n})$.  That is, show that there are constants $C_1, C_2 > 0$ so that:
\begin{equation}\label{eq:p3-special}
    C_1\sqrt{n}
\le \frac{\Pr[X_1 = \cdots = X_n = 1]}{\Pr[Y_1 = \cdots = Y_n = 1]}
\le C_2\sqrt{n}.
\end{equation}
\hint{Stirling's approximation $\sqrt{2\pi}n^{n+\frac{1}{2}}e^{-n} \le n! \le en^{n+\frac{1}{2}}e^{-n}$ might be handy.}
\end{enumerate}
While the above approximation is off by a $\Theta(\sqrt{n})$ factor, we will show in the remainder of this problem that the previous example is essentially the worst case: informally, 
$\Ex{ \text{\em any \em function of $X_1, \ldots, X_n$} }$ is within a $\Theta(\sqrt{n})$ factor of the corresponding expression for $Y_1, \ldots, Y_n$.

We'll state that formally (and you'll prove it!) in the next part, but we'll need the following two facts about $(X_1)_{i=1}^n$ and $(Y_i)_{i=1}^n$:
\begin{itemize}
\item $\Pr[Y_1 + \cdots + Y_n = n] \ge \frac{1}{e\sqrt{n}}$.
\item Conditioning on $Y_1 + \cdots + Y_n = n$, the distribution of $(Y_1, \ldots, Y_n)$ is the same as that of $(X_1, \ldots, X_n)$.
\end{itemize}
You may assume these facts without proof for the next part.

\begin{enumerate}[start=3]

\item \pts{4} Let $\N$ denote $\{0, 1, 2,\ldots \}$ and $f: \N^n \to [0, +\infty)$ be an arbitrary function that maps $n$ nonnegative integers to a nonnegative real number. Define the random variables $(X_i)_{i=1}^{n}$ and $(Y_i)_{i=1}^{n}$ as above. Prove the following inequality:
\begin{equation}\label{eq:p3-general}
    \frac{\Ex{f(X_1, \ldots, X_n)}}{\Ex{f(Y_1, \ldots, Y_n)}}
\le e\sqrt{n}.
\end{equation}

\note{This states that the Poisson approximation $\Ex{f(Y)}$ underestimates $\Ex{f(X)}$ by a factor of at most $O(\sqrt{n})$. When the function $f(t_1, \ldots, t_n)$ is chosen to be the indicator of $t_1=\cdots=t_n=1$, $\Ex{f(X)}$ and $\Ex{f(Y)}$ are exactly $\Pr[X_1 = \cdots = X_n = 1]$ and $\Pr[Y_1 = \cdots = Y_n = 1]$, and the bound \eqref{eq:p3-general} is consistent with inequality~\eqref{eq:p3-special}.}

\item \pts{0} \textbf{[Optional: this won't be graded]} Prove the facts listed before part (c).
\end{enumerate}

\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/poisson.tex}
\fi
\end{shaded}
\fi

\item \pts{4} We showed that Bourgain's embedding allows us to embed an arbitrary metric space $(X, d)$ with $|X| = n$ into $(\R^k, \ell_1)$ with target dimension $k$ being $O((\log n)^2)$ and distortion being $O(\log n)$. Moreover, the embedding can be computed efficiently using a randomized algorithm. Prove that the exact same embedding computed by the randomized algorithm also achieves $O(\log n)$ distortion with high probability when the target metric is $\ell_p$ for $p > 1$.
We encourage you to emphasize only the differences from the proof in the lecture notes rather than copying the entire proof.\\
\hint{Let $f:X\rightarrow \R^k$ denote the relevant embedding. For any two points $x,y\in X$, we showed that $\|f(x) - f(y)\|_1 \leq k\cdot d(x,y)$. Can we say something similar about $\|f(x) - f(y)\|_p$?}\\
\hint{For any two points $a,b\in\R^k$ and $p>1$, it holds that $\|a - b\|_p \geq k^{(1/p) - 1}\|a - b\|_1$. This is a special case of H\"older's inequality.}

\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/Bourgain.tex}
\fi
\end{shaded}
\fi

\item \pts{10}
The \emph{equilateral dimension} of a metric space is the maximum number of points in the space that are all at the same distance from each other.
In this problem, we will determine the equilateral dimension of the $d$-dimensional Euclidean space $\E^d = (\R^d, \ell_2)$, and also explore a relaxed version of the equilateral dimension where the pairwise distances are only required to be 
\emph{approximately} the same.
\begin{enumerate}
\item \pts{2} Let $X$ be a set of $d+1$ points in $\E^m$ for some arbitrary positive integer $m$. Show that $(X, \ell_2)$ can be isometrically embedded into $\E^{d}$.\\
\hint{Suppose $X = \{x_0,\ldots,x_{d}\}$. Consider the linear subspace spanned by the vectors $x_i - x_0$ for $i = 1,\ldots,d$.
How large can the dimension of the subspace be?}
\item \pts{2} For all positive integers $d$, use Part (a) to show that there exist $d+1$ points in $\E^d$ that are all at distance 1 from each other. This shows that the equilateral dimension of $\E^d$ is at least $d+1$.

\item \pts{0} \textbf{[Optional: this won't be graded.]}
For all positive integers $d$, show that one cannot find $d+2$ points in $\E^d$
that are all at distance 1 from each other. 
Together with Part (b), this shows that the equilateral dimension of $\E^d$ is exactly $d+1$.\\
\hint{Prove by contradiction. Suppose there are $d+2$ such points $x_0,x_1,\ldots,x_{d+1}$. Consider the vectors $v_i = x_i - x_0$ and the matrix $A = [v_1,\cdots,v_{d+1}]$ (with the $v_i$ as columns) of size $d\times (d+1)$. 
Explicitly compute the inner products $v_i^{\mathsf{T}}v_j$ and the matrix $A^{\mathsf{T}}A$. Use the fact that $\rank(A^{\mathsf{T}}A) = \rank(A)$ to derive a contradiction.}

\item \pts{3} Show that there exists a constant $c > 0$ such that for all positive integers $d$,
one can find at least $2^{cd}$ points in $\E^d$ that are all at distances between $1$ and $1.1$ from each other.

\item \pts{3} Show that there exists a constant $C > 0$ such that for all positive integers $d$,
one cannot find more than $2^{Cd}$ points in $\E^d$ that are all at distances between $1$ and $1.1$ from each other.\\
\hint{You can use the fact that the volume of a $d$-dimensional ball of radius $r$ is equal to $\gamma_d r^d$ for some constant $\gamma_d$ that depends on $d$ but not $r$. It may be helpful if you understand the proof of Lemma 3 in lecture notes \#9, but that is not required for working on this problem.}

\end{enumerate}

\ifdefined\template
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/equilateral.tex}
\fi
\end{shaded}
\fi


\end{enumerate}
\end{document}
