\documentclass[11pt]{article}
% Problem set: do not define TEMPLATE or SOL
% LaTeX template: define TEMPLATE but not SOL
% Solution: define both TEMPLATE and SOL
%
% \def\sol{1}
 \def\template{1}
% STUDENTS:
% You can enter your solutions into this template by just
% typing them right after it says "SOLUTION".
% Ignore the \ifdefined\sol blocks; you can delete them if you want.
% (Note that it won't compile if you try to uncomment the
% solution flag :))
%
\usepackage{fullpage,graphicx}
\usepackage{amsmath,amsfonts,amsthm,amssymb,multirow}
\usepackage{algorithmic,comment,url,enumerate}
\usepackage{tikz}
\usepackage{framed}
\usetikzlibrary{decorations.pathreplacing, shapes}
\usepackage[ruled,vlined,commentsnumbered,titlenotnumbered]{algorithm2e}
\newcommand{\expecting}[1]{\noindent{[\textbf{We are expecting:} \em #1\em]}}
\newcommand{\hint}[1]{\noindent{[\textbf{HINT:} \em #1 \em]}}
\newcommand{\pts}[1]{\textbf{(#1 pt.)}}
\newcommand{\sgn}{\mathrm{sgn}}
\definecolor{shadecolor}{gray}{0.95}
\newcommand{\bE}{\mathbb{E}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\eps}{\epsilon}
\newtheorem{theorem}{Theorem}


\begin{document}
\noindent
\textbf{Problem Set 2 \ifdefined\sol Solution \fi} \hfill CS265/CME309, Winter 2026
\ifdefined\sol\else
\newline
Due: Friday 1/23, 11:59pm on Gradescope
\ifdefined\template
\newline 
Group members: INSERT NAMES HERE
\fi

\vspace{.4cm}\noindent
Please follow the homework policies on the course website.
\fi

\noindent
\rule{\linewidth}{0.4pt}

\noindent
\textbf{Note:} Throughout this problem set (and in general in this class unless otherwise stated), you are allowed to use as a black box anything we have stated in the lecture notes.
\begin{enumerate}

%%%%%%%%% Approximating Frequencies %%%%%%%%%
\item \pts{10} \textbf{[Approximating Frequencies.]}
Suppose that $A$ is an list of length $n$, containing elements from a large universe $\mathcal{U}$.  Our goal is to estimate the frequencies of each element in $\mathcal{U}$: that is, for $x \in \mathcal{U}$, how often does $x$ appear in $A$?  
The catch is that $A$ is too big to look at all at once.  Instead, we see the elements of $A$ one at a time: $A[0], A[1], A[2], \ldots$.  Unfortunately, $\mathcal{U}$ is also really big, so we can't just keep a count of each element.

In this problem, we'll construct and analyze a randomized data structure that will keep a ``sketch'' of the list $A$, use small space, and will be able to efficiently answer queries of the form ``approximately how often did $x$ occur in $A$''? 

Specifically, our goal is the following: we would like a (small-space) data structure, which supports operations \texttt{update}$(x)$ and \texttt{count}$(x)$.  The \texttt{update} function inserts an item $x  \in \mathcal{U}$ into the data structure. 
 Given $\epsilon, \delta > 0$, the \texttt{count} function should have the following guarantee: After calling \texttt{update} $n$ times,  $\texttt{count}(x)$ should satisfy
\begin{equation}\label{eq:want}
C_x \le \texttt{count}(x) \le C_x + \epsilon n
\end{equation}
with probability at least $1 - \delta$, where $C_x$ is the number of times that $x$ occurs in $A$.

\begin{enumerate}
\item \pts{3} Consider the following strategy. We start with an array $R$ of length $b$ initialized to all zeroes, and a random hash function $h:\mathcal{U} \to \{0,1,\ldots, b - 1\}$ drawn from a \emph{universal hash family $\mathcal{H}$}: Assume that for any $x \neq y$, $\Pr[h(x) = h(y)] = 1/b$.\footnote{Recall from CS161 or elsewhere that a universal hash family $\mathcal{H}$ satisfies $\Pr_{h \in \mathcal{H}}[h(x) = h(y)] \leq 1/b$.  For simplicity, in this problem we are saying that this is an equality, although the inequality is fine for the data structure to work.}

Then the operations are:
\begin{itemize}
\item\texttt{update}$(x)$: Increment $R[h(x)]$ by $1$.
\item \texttt{count}$(x)$: Return $R[h(x)]$.
\end{itemize}

For each $i = 1, 2, \ldots, n$, suppose that we call \texttt{update}$(A[i])$.  (This inserts all of $A$ into the data structure).  After we do this, what is the expected value of $\texttt{count}(x)$?  Your answer should be in terms of $C_x, n$, and $b$, and you should justify your answer.

\item \pts{3} Show that there is a choice of $b$ that is $O(1/\eps)$ so that, 
for any fixed $x \in \mathcal{U}$, 
we have
\[ \Pr[ \texttt{count}(x) < C_x ] = 0 \]
and 
\[ \Pr[\texttt{count}(x) \geq C_x + \eps n] \leq \frac{1}{e}.\]
Note that this establishes \eqref{eq:want} with probability at least $1 - 1/e$.

\hint{The first of the requirements is true no matter what $b$ is.}

\hint{For the second requirement, use Markov's inequality and take advantage of the first requirement.}

\item \pts{4} In order to boost the success probability from $1 - 1/e$ to $1 - \delta$ for arbitrarily small $\delta$, we will repeat the strategy from part (a) $T$ times independently, so we keep $T$ arrays $R_1, \ldots, R_T$, and $T$ independently chosen hash functions $h_1, \ldots, h_T$. 
\begin{enumerate}
    \item Fill in the details: How do you implement \texttt{update} and \texttt{count} for this $T$-repeated version of the data structure? 
    \item How big do you need to take $T$ so that \eqref{eq:want} is satisfied with probability at least $1 - \delta$?
    \item With this value of $T$, how much space does the data structure use?  Asymptotic notation is fine.   You may assume that it takes $O(\log|\mathcal{U}|)$ bits to store a hash function $h$ from $\mathcal{H}$.  (And recall that it takes $O(\log|\mathcal{U}|)$ bits to store an element of $\mathcal{U}$, and $O(\log n)$ bits to store an integer in $\{0,1, \ldots, n\}$).
\end{enumerate}

\end{enumerate}

\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/sketching.tex}
\fi
\end{shaded}
\fi

%%%%%%%%% Median-of-means %%%%%%%%%
\item \pts{12} \textbf{[Median-of-means]}
Let $X_1,X_2, \ldots, X_n$ be i.i.d. samples of a random variable $X$ with $\bE[X] = \mu$ and $\Var[X] \leq 1$.  

Suppose that $k$ divides $n$, let $m = n/k$.  Consider the following algorithm for estimating $\mu$ from $n$ samples.  First, we divide the $n$ samples into $k$ groups of size $m$:
\begin{align*}
A_1 &= \{X_1, \ldots, X_m\} \\
A_2 &= \{X_{m+1}, \ldots, X_{2m} \}\\
&\vdots \\
A_k &= \{X_{(k-1)m + 1}, \ldots, X_n \}.
\end{align*}
Then let $Y_i$ be the mean of $A_i$ for each $i = 1, \ldots, k$.  Finally, let
\[ \hat{\mu} = \mathrm{Median}(Y_1, \ldots, Y_k).\]
Let $\epsilon, \delta \in (0,1)$.
Our goal in this problem will be to pick $k$ and $m$ (and hence $n = mk$) in terms of $\epsilon$ and $\delta$ so that
\[\Pr[|\hat{\mu} - \mu| > \epsilon] \leq \delta.\]
\begin{enumerate}
    \item \pts{3} Show that $\Pr[ |Y_j - \mu| > \epsilon] \leq \frac{1}{m \epsilon^2}$.
    \item \pts{6} Say that $j \in \{1, 2, \ldots, k\}$ is ``good'' if $|Y_j - \mu| \leq \epsilon$, and ``bad'' otherwise.  Let $B$ be the number of bad $j$'s.  Suppose that $m = 4/\epsilon^2$.  Use Chebyshev's inequality to show that
    \[ \Pr[ B \geq k/2 ] \leq \frac{4}{k}.\]
    \hint{First show that, with this choice of $m$, $\Pr[B \geq k/2] \leq \frac{16\Var(B)}{k^2}$}
    \item \pts{3} Using the previous parts, how should we pick $k$ and $m$ in terms of $\epsilon$ and $\delta$ so that 
    $ \Pr[|\hat{\mu} - \mu| > \epsilon] \leq \delta$\text{?}
    Explain why your choice is correct.  What is the final sample complexity (that is, $n$), in terms of $\epsilon$ and $\delta$?

    \item \pts{0} \textbf{[This part is optional and won't be graded.]}  What might be the advantages of taking a median of means as in this problem, rather than letting $\hat{\mu} = \frac{1}{n} \sum_{i=1}^n X_i$ be the mean of all the samples?  
\end{enumerate}


\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/medianOfMeans.tex}
\fi
\end{shaded}
\fi

%%%%%%%%% Tightness of Markov / Chebyshev %%%%%%%%%
\newpage
\item \pts{8} \textbf{[Tightness of Markov's and Chebyshev's Inequalities]}
\begin{enumerate}
\item \pts{4} Show that Markov's inequality is tight. Specifically, for each value $c > 1$, describe a distribution $D_c$ supported on non-negative real numbers such that if the random variable $X$ is drawn according to $D_c$ then 
\begin{itemize}
    \item[(1)] $\bE[X] > 0$, and
    \item[(2)] $\Pr[X \ge c \bE[X]] = 1/c.$ 
\end{itemize}  
\item \pts{4} Show that Chebyshev's inequality is tight. Specifically, for each value $c > 1$, describe a distribution $D_c$ supported on real numbers such that if the random variable $X$ is drawn according to $D_c$ then 
\begin{itemize}
    \item[(1)] $\bE[X] = 0$ and $\Var[X]=1$, and 
    \item[(2)] $\Pr[|X-\bE[X]| \ge c \sqrt{\Var[X]}] = 1/c^2.$
\end{itemize}
\item \pts{0} \textbf{[One-sided version of Chebyshev's Inequality] } \textbf{[Optional: this question won't be graded.]}
    Consider the following statement: Let $X$ be a random variable with $\text{Var}[X] = 1$. For all $t > 0$, \[\Pr[X -\bE[X] \geq t] \leq \frac{1}{1 + t^2}.\] 
    \begin{enumerate}
        \item Prove this statement.
        \item Show that this is tight: For any $t > 0$, come up with a random variable $X$ with distribution $D_t$ and variance $1$ for which $\Pr[X - \bE[X] \geq t] = \frac{1}{1 + t^2}$.
    \end{enumerate}

\end{enumerate}

\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/MCtightness.tex}
\fi
\end{shaded}
\fi

%%%%%%%%% Agrawal-Biswas test %%%%%%%%%
\newpage
\item \pts{0} \textbf{[This whole problem is optional and will not be graded.]}
In this problem, you'll analyze a different primality test than we saw in class.  This one is called the \em Agrawal-Biswas Primality test. \em 

Given a degree $d$ polynomial $p(x)$ with integer coefficients, for any polynomial $q(x)$ with integer coefficients, we say $q(x) \equiv t(x) \mod(p(x),n)$ if there exists some polynomial $s(x)$ such that $q(x)=s(x)\cdot p(x)+t(x) \mod n$.  
(Here, we say that $\sum_i c_i x^i = \sum_i c'_i x^i \mod n$ if and only if $c_i = c'_i \mod n$ for all $i$.)
%
For example, $x^5+6x^4+3x + 1 \equiv 3x +1 \mod(x^2+x,5)$, since $(x^3)(x^2+x) + (3x+1) = x^5 + x^4 +3x + 1 \equiv x^5 + 6x^4 +3x + 1 \mod 5.$

\bigskip

\boxed{
\begin{minipage}{\textwidth}
\noindent
\textbf{Agrawal-Biswas Primality Test}.
\newline
Given $n$:
\begin{itemize}
\item If $n$ is divisible by 2,3,5,7,11, or 13, or is a perfect power (i.e. $n=c^r$ for integers $c$ and $r$) then output \textbf{composite}. 
\item Set $d$ to be the smallest integer greater than $\log n$, and choose a random degree $d$ polynomial with leading coefficient 1: $$r(x)=x^d+c_{d-1}x^{d-1}+\ldots+c_1 x + c_0,$$ by choosing each coefficient $c_i$ uniformly at random from $\{0,1,\ldots,n-1\}.$
\item If $(x+1)^n \equiv x^n +1 \mod (r(x),n)$ then output \textbf{prime}, else output \textbf{composite}.
\end{itemize}
\end{minipage}
}

Consider the following theorem (you can assume this if you like, or for even more optional work, try to prove it!):
\begin{theorem}[Polynomial version of Fermat's little theorem]
\ 
\begin{itemize}
	\item If $n$ is prime, then for any integer $a$, $(x-a)^n=x^n-a \mod n.$ 
	\item If $n$ is not prime and is not a power of a prime, then for any $a$ s.t. $gcd(a,n)=1$ and any prime factor $p$ of $n$, $(x-a)^n \neq  x^n - a \mod p.$  %[Hint: you just need to show that if you were to expand the left side, there is one term other than $x^n$ and $-a$ that does not vanish, modulo $p$.]
\end{itemize}
\end{theorem}

First, show that if $n$ is prime, then the Agrawal-Biswas primality test will always return \textbf{prime}.

	Now, we will prove that if $n$ is composite, the probability over random choices of $r(x)$ that the algorithm successfully finds a witness to the compositeness of $n$ (and hence returns \textbf{composite}) is at least $\frac{1}{4d}$ .
\begin{enumerate}
	\item	Using the polynomial version of Fermat's Little Theorem, and the fact that, for prime $q$, every polynomial over $\Z_q$ that has leading coefficient 1 (i.e. that is ``monic'') has a unique factorization into irreducible monic polynomials, prove that the number of irreducible degree $d$ factors that the polynomial $(x+1)^n-(x^n+1)$ has over $\Z_p$ is at most $n/d$, where $p$ is any prime factor of $n$. (A polynomial is irreducible if it cannot be factored, for example $x^2 +1 = (x+1)(x+1) \mod 2$ is not irreducible over $\Z_2$, but $x^2+1$ is irreducible over $\Z_3$.) 

\hint{Even though this question sounds complicated, the proof is just one line...}
	\item Let $f(d,p)$ denote the number of irreducible monic degree $d$ polynomials over $\Z_p$.  Prove that if $n$ is composite, and not a power of a prime, the probability that $r(x)$ is a witness to the compositeness of $n$ is at least $\frac{f(d,p) - n/d}{p^d}$, where $p$ is a prime factor of $n$.  

\hint{ $p^d$ is the total number of monic degree $d$ polynomials over $\Z_p$.}
	\item  Now complete the proof, and prove that the algorithm succeeds with probability at least $1/(4d)$, leveraging the fact that the number of irreducible monic polynomials of degree $d$ over $\mathbb{Z}_p$ is at least $p^d/d-p^{d/2}.$   (You should be able to prove a much better bound, though $1/4d$ is fine.)  

\hint{You will also need to leverage the fact that we chose $d > \log n$ and also explicitly made sure that $n$ has no prime factors less than $17$.}
\end{enumerate}

\ifdefined\sol
\begin{shaded}
\textbf{SOLUTION:}
\ifdefined\sol
\input{solution/ab-primality.tex}
\fi
\end{shaded}
\fi


\end{enumerate}
\end{document}
