\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{wasysym}
\usepackage{amssymb }
\usepackage{caption}
\usepackage{tikz}
\usepackage{xcolor}
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
\newcommand{\eps}{\epsilon}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbb{R}}
\newlength\myindent
\setlength\myindent{2em}
\newcommand\bindent{%
\begingroup
\setlength{\itemindent}{\myindent}
\addtolength{\algorithmicindent}{\myindent}
}
\newcommand\eindent{\endgroup}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf CS 388R: Randomized Algorithms } \hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\em #3 \hfill #4} }
\textcolor{red}{\textbf{NOTE:} THESE NOTES HAVE NOT BEEN EDITED OR CHECKED FOR CORRECTNESS}
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
\topmargin 0pt
\advance \topmargin by -\headheight
\advance \topmargin by -\headsep
\textheight 8.9in
\oddsidemargin 0pt
\evensidemargin \oddsidemargin
\marginparwidth 0.5in
\textwidth 6.5in
\parindent 0in
\parskip 1.5ex
\usepackage{algorithm}
\usepackage{algorithmic}
\begin{document}
\lecture{1 --- August 31, 2017}{Fall 2017}{Prof.\ Eric Price}{Garrett Goble, Daniel Brown}
\section{Randomized Algorithms}
This course covers using randomness for algorithms.
\paragraph{Cost of Randomness}
\begin{itemize}
\item chance the answer is wrong (Monte Carlo Algorithm)
\item chance the algorithm takes a long time (Las Vegas Algorithm)
\end{itemize}
\paragraph{Benefits of Randomness}
\begin{itemize}
\item on average, with high probability gives a faster or simpler algorithm
\item some problems require randomness (e.g. Nash Equilibrium)
\end{itemize}
The main technique is avoiding the worst case or adversarial inputs.
\paragraph{Example Uses}
\begin{itemize}
\item Fingerprinting: compare two items by hashing down to a small fingerprint using randomness to avoid false positives
\item Load balancing by allocating randomly
\item Sampling: reducing a large collection of items to a representative subsample
\item Symmetry breaking: e.g., network protocols to elect a leader with random tie-breaking
\item Probabilistic method: if a random object satisfies a property with non-zero probability, we just proved that such an object exists
\end{itemize}
\section{Types of randomized algorithms}
\textbf{Las Vegas Algorithm (LV):}
\begin{itemize}
\item Always correct
\item Runtime is random (small time with good probability)
\item Examples : Quicksort, Hashing
\end{itemize}
\textbf{Monte Carlo Algorithm (MC):}
\begin{itemize}
\item Always bounded in runtime
\item Correctness is random
\item Examples : Karger's min-cut algorithm
\end{itemize}
It's easy to see the following:
\begin{enumerate}
\item LV $\implies$ MC. Fix a time $T$ and let the algorithm run for $T$ steps. If the algorithm terminates before $T$, we output the answer, otherwise we output 0.
\item MC does not always imply LV. The implications holds when verifying a solution can be done much faster than finding one. In that case, we test the output of MC algorithm and stop only when a correct solution is found.
\end{enumerate}
\section{Notation and General Bounds}
\begin{equation}
[n] = \{1,2,\ldots, n \}
\end{equation}
\subsection{Comparisons}
\begin{align}
f \lesssim g &\iff \exists \quad \text{a constant} \quad c > 0 \mid f \leq cg \\
f \gtrsim g &\iff g \lesssim f \\
f \eqsim g &\iff f \lesssim g \quad \text{and} \quad g \lesssim f
\end{align}
\subsection{General Bounds}
\begin{equation}
(1-a) \leq e^{-a}\;\; \forall a > 0
\end{equation}
\section{Quick Sort}
One of the most well known randomized algorithms is Quick Sort. The pseudo code is given in Algorithm~\ref{alg:quick_sort}.
\begin{algorithm}
\caption{Sort} \label{alg:quick_sort}
\begin{algorithmic}
\IF{$x==[]$}
\STATE return $[]$
\ENDIF
\STATE Choose random $t \in [n]$
\STATE return $Sort([x_i|x_i < x_t]) + [x_t] + Sort(x_i|x_i \geq x_t])$
\end{algorithmic}
\end{algorithm}
\paragraph{Question:}
Why should $t$ be random?
What if $t=1$ always? $\Theta(n^2)$ time for sorted input. Note, we don't need an adversary for bad inputs. In practice it is likely that lists that need to be sorted will consist of a mostly sorted list with a few new entries.
\paragraph{Question:}
How should we analyze the time complexity?
Let $Z_{ij} := $ event that ith largest element is compared to the jth largest element at any time during the algorithm. Because the pivot is removed, each comparison can happen at most once.
Time is proportional to the number of comparisons $=\sum_{ij$, then $i$ and $j$ aren't compared, while if the pivot is $>i$ and $2$}
\STATE choose a random edge
\STATE contract into a single vertex
\ENDWHILE
\end{algorithmic}
\end{algorithm}
Contracting an edge means that we remove that edge and combine the two vertices into a super-node. We note that self-loops thus formed are removed but any resulting parallel edges are \emph{not} removed. This means at every step, we have a multi-graph without any self-loops.
This algorithm will yield a cut. The minimum cut? Probably not. But maybe! If the graph is somewhat dense, then choosing an edge in the minimum cut is rather unlikely at the beginning...and very likely at the end.
We will prove the following:
\paragraph{Lemma:} Algorithm 1 succeeds with $\geq \frac{2}{n^2}$ probability
\paragraph{}To get started, let's prove the following lemma.
\paragraph{Lemma 1:} The chance the algorithm fails in round 1 is $\leq \frac{2}{n}$
\paragraph{Proof:}
The algorithm ``fails" in step $i$ if in that step, you choose to contract an edge in the minimum cut. We know that the chance of failure in the first step is $\frac{OPT}{m}$ where $OPT = \text{cost of true min cut} = |E(S,\bar{S})|$. We need a way to bound $|E(S,\bar{S})|$.
For all $u \in V$, let $d(u)$ be the degree of $u$. Let's use the node degrees to bound $OPT$.
\begin{align*}
OPT = |E(S,\bar{S})| &\leq \displaystyle\min_u d(u) \\
&\leq \frac{1}{n} \sum_{u \in V} d(u) \\ &
\leq \frac{2 \cdot m}{n} \\
\end{align*}
Dividing both sides by $m$ we have
\begin{equation}
\frac{|E(S,\bar{S})|}{m} \leq \frac{2}{n}
\end{equation}
\begin{flushright}
$\qed$
\end{flushright}
We can continue this analysis in each subsequent round. For example, in Round 2, we have the collapsed graph $G' = (V', E')$. Every set $S' \in V'$ corresponds to $S \in V$ and $cost(S')=cost(S)$ since the min-cut doesn't change. So again algorithm will fail with probability $\leq \frac{2}{n-1}$.
Thus, we have
\begin{align*}
\mathbb{P}(\text{fail in $1^{st}$ step}) &\leq \frac{2}{n} \\
\mathbb{P}(\text{fail in $2^{nd}$ step} \ | \ \text{success in $1^{st}$ step}) &\leq \frac{2}{n-1} \\
& \vdots \\
\mathbb{P}(\text{fail in $i^{th}$ step} \ | \ \text{success till $(i-1)^{th}$ step}) &\leq \frac{2}{n + 1 - i}
\end{align*}
We now proceed with the proof of the original Lemma.
\paragraph{Lemma:} Algorithm 1 succeeds with $\geq \frac{2}{n^2}$ probability
\paragraph{Proof:}
Let $Z_i :=$ the event that an edge from the cut set is picked in round $i$.
We have that $$Pr[Z_i | \bar{Z}_1 \cap \bar{Z}_2 \cap \dots \cap \bar{Z}_{i-1}] \leq \frac{2}{n+1-i}$$
Thus the probability of success is given by
$$Pr(Success) = Pr[\bar{Z}_1 \cap \bar{Z}_2 \cap \dots \cap \bar{Z}_{n-2}] \geq (1-\frac{2}{n})(1-\frac{2}{n-1}) \dots (1-\frac{2}{3})$$
We can simplify this by turning things into simple fractions to get
\begin{align*}
\mathbb{P}(\text{Success}) &\geq \frac{n-2}{n} \cdot \frac{n-3}{n-1} \cdot \frac{n-4}{n-2} \cdot \frac{n-5}{n-3} \cdot ... \cdot \frac{2}{4} \cdot \frac{1}{3} \\
&= \frac{1}{n} \cdot \frac{1}{n-1} \cdot \frac{2}{1} \cdot \frac{1}{1} \\
&= \frac{2}{n(n-1)}\\
& \geq \frac{2}{n^2}
\end{align*}
\begin{flushright}
$\qed$
\end{flushright}
So how do we use this? This algorithm seems pretty likely to fail if run only once.
\subsection{Algorithm 2}
\paragraph{Question:}
Can we improve the success rate of Algorithm 1 by running it multiple times?
\begin{algorithm}
\caption{}
\begin{algorithmic}
\STATE $i = 0$
\WHILE{$i < k$}
\STATE Run Algorithm 1
\STATE $i = i + 1$
\ENDWHILE
\STATE pick set of min cost
\end{algorithmic}
\end{algorithm}
If we run Algorithm 1 $k$ times and pick the set of min cost, then
\begin{equation}
Pr(failure) \leq (1 - \frac{2}{n^2})^k \leq e^{\frac{-2k}{n^2}}
\end{equation}
To get this we use the useful fact that
\begin{equation}
(1-a) \leq e^{-a} \;\; \forall a > 0
\end{equation}
Plugging in $2/n^2$ for $a$ and raising both sides to the $k$th power we get the above result.
What's neat is that now we can set $k=\frac{n^2}{2} \log (\frac{1}{\delta})$ to get $1-\delta$ success probability.
\paragraph{Question} What is the run time of Algorithm 2?
The only real computation is the contraction. At every step of the algorithm, we have to contract an edge. This takes at most $\mathcal{O}(n)$ time. We do at most $n$ such contractions. Thus for every run, we need $\mathcal{O}(n^2)$ time and the total time is $O(n^2 \cdot k)$. If we set $k=\frac{n^2}{2} \log (\frac{1}{\delta})$, then we have complexity $O(n^4 \log (\frac{1}{\delta}))$.
We can choose $\delta$ to be, say $1/4$, and thus $k = \frac{n^2}{2}\log{4}$ and this will ensure that within $\mathcal{O}(n^4)$ time the algorithm succeeds with probability at least $3/4$.
\subsection{Algorithm 3}
\paragraph{Question:}
Can we make Algorithm 1 more efficient?
Initial stages of the algorithm are very likely to be correct. In particular, the first step is wrong with probability at most $2/n$. As we contract more edges, failure probability goes up. Moreover, earlier stages take more time compared to later ones.
\textit{Idea:} Let us redistribute our iterations. Since earlier ones are more accurate and slower, why not do less of them at the beginning, and increasingly more as the number of edges decreases? \\
\begin{algorithm}
\caption{}
\begin{algorithmic}
\STATE Repeat twice
\bindent
\STATE take $n-\frac{n}{\sqrt{2}}$ steps of contraction
\STATE recursively apply this algorithm
\eindent
\STATE take better result
\end{algorithmic}
\end{algorithm}
This results in the following runtime:
$$T(n) = O(n^2) + 2T(\frac{n}{\sqrt{2}}) = n^2 \log n$$
The success probability is now given by:
\begin{align*}
p(n) &= 1 - (\text{failure probabilty of one branch})^2 \\
&=1 - (1 - success \ in \ one \ branch)^2 \\
& = 1 - (1 - (\frac{\frac{n}{\sqrt{2}}}{n})^2 \cdot p(\frac{n}{\sqrt{2}}))^2 \\
&= 1 - \left(1 - \frac{1}{2} \ p\left( \frac{n}{\sqrt{2}}\right) \right)^2 \\
&= p\left( \frac{n}{\sqrt{2}}\right) - \frac{1}{4} \ p\left( \frac{n}{\sqrt{2}}\right)^2
\end{align*}
To solve this recursion, we let $x = \log_{\sqrt{2}}{n}$. Thus, setting $f(x) = p(n)$, we get
$$f(x) = f(x-1) - f(x-1)^2$$
We observe that setting $f(x) = \frac{1}{x}$ gives:
$$f(x-1) - f(x) = \frac{1}{x-1} - \frac{1}{x} = \frac{1}{x(x-1)} \approx \frac{1}{(x-1)^2} = f(x-1)^2$$
Hence, we have $p(n) = \mathcal{O} \left( \frac{1}{\log n} \right) $.
\subsection{Algorithm 4}
We can now run Algorithm 3 multiple times.
Alg 4: Repeat algorithm $O(\log n \log \frac{1}{\delta})$ times.
\end{document}