\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\usepackage[linesnumbered,ruled]{algorithm2e}
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
\newcommand{\eps}{\epsilon}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbb{R}}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
\hbox to 5.78in { {\bf CS 388R: Randomized Algorithms } \hfill #2 }
\vspace{4mm}
\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 5.78in { {\em #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{remark}[theorem]{Remark}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
\topmargin 0pt
\advance \topmargin by -\headheight
\advance \topmargin by -\headsep
\textheight 8.9in
\oddsidemargin 0pt
\evensidemargin \oddsidemargin
\marginparwidth 0.5in
\textwidth 6.5in
\parindent 0in
\parskip 1.5ex
\begin{document}
\lecture{15 --- October 28, 2015}{Fall 2015}{Prof.\ Eric Price}{Kunal Lad, Abhishek Sinha}
\section{Overview}
In the last lecture, we looked at the all pairs shortest path problem. We saw the following:
\begin{enumerate}
\item An $O(mm(n)\log(n)) = O({n}^{\omega+\epsilon})$ algorithm which finds the shortest distance matrix for a graph $G$ by recursively finding the shortest distance matrix for the graph $G^2$.
\item The problem of determining the successor matrix for tripartite graphs.
\item $O(n^\omega)$ time algorithm for finding successor matrix in in a simple tripartite graph when there is a unique successor for any 2 vertices.
\item An $O(mm(n)\log^2(n)) = O(n^{\omega+\epsilon})$ randomized algorithm for the case when there are an unknown number of successor vertices.
\end{enumerate}
In this lecture, we will see
\begin{enumerate}
\item How to find the successor matrix for general graphs by reducing the problem to the tripartite graph case.
\item Problem of finding matchings in bipartite graphs.
\item Sufficient and necessary condition for the existence of perfect matchings in bipartite graphs.
\item An algorithm for finding perfect matching in d-regular graphs when $d = 2^k$ which is based on the idea of Eulerian Tours and has a time complexity of $O(nd)$.
\item A Las Vegas algorithm for finding perfect matchings in general $d$ regular graphs which has an expected time complexity of $O(n\log n)$.
\end{enumerate}
\section{Calculating Successor Matrix for General Graphs}
We now wish to find a successor matrix $P$ for the all pairs shortest path problem in general graphs.
\begin{definition}[Successor Matrix]
The successor matrix $P$ is defined as
\begin{equation}
{P}_{ij} = k \text{ if } k\in N(i) \text{ and $k$ lies on a shortest path from $i$ to $j$ }
\end{equation}
where $N(i)$ denotes the neighborhood of $i$.
\end{definition}
In plain words, this means that $k$ should be a neighbor of $i$ and it should lie on some shortest path from $i$ to $j$.
We saw in the last lecture that finding the successor matrix for tripartite graphs is equivalent to the problem of finding witnesses for the product of the boolean matrices A and B where A is the adjacency matrix for the left half and B is the adjacency matrix for the right half.
\begin{definition}[Witness Matrix]
The witness matrix $W$ for the matrix $C = AB$ is an integer matrix W such that $W_{ij} = k$ if $k$ is a witness for $C_{ij}$ i.e. ${A}_{ik} = 1$ and ${B}_{kj} = 1$. $W_{ij} = 0$ if there is no witness i.e. $C_{ij} = 0$.
\end{definition}
To find the successor matrix in general graphs, we thus need to construct the boolean matrices $A$ and $B$. We begin by first discussing a naive way to do so.
\subsection{Naive Algorithm}
The initial idea that was discussed in class was to construct a pair of boolean matrices for each possible distance between 2 vertices and to find the successor matrix for each case separately. We formally describe the algorithm below (along the lines of \cite{MR}).
\begin{algorithm}
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
\Input{Adjacency Matrix $A$ and distance matrix $D$}
\Output{Successor Matrix $P$}
\For{l = $1$ to $n$}{
Compute $D^{(l)}$ where ${D^{(l)}}_{ij} = 1$ if and only if $D_{ij} = l-1$\;
Compute the witness matrix $W^{(l)}$ for $A$ and $D^{(l)}$
}
Compute successor matrix $P$ such that $P_{ij} = W^{(D_{ij})}_{ij}$
\caption{Naive Algorithm}
\end{algorithm}
\begin{lemma}
\label{Lemma1}
Let ${D}^{(l)}$ be defined as above. Let $i$ and $j$ be 2 vertices such that ${D}_{ij} = l$. Then ${P}_{ij} = k $ if and only if it is one of the witnesses for ${A{D}^{(l)}}_{ij}$.
\end{lemma}
\begin{proof}
First we prove the implies direction. By the definition of $P$, we have that ${P}_{ij} = k $ implies ${A}_{ik} = 1$ and $k$ lies on a shortest path from $i$ to $j$. Since ${D}_{ij} = l$, we have that ${D}_{kj} = l-1$ which is the same as ${{D}^{(l)}}_{kj} = 1$. Thus ${A}_{ik} = 1$ and ${{D}^{(l)}}_{kj} = 1$ which is the same as $k$ being a witness for ${A{D}^{(l)}}_{ij}$.
The converse can also be proven in a similar way.
\end{proof}
This naive algorithm will have a time complexity of $O({n}^{1 + \omega + \epsilon})$ since the witness algorithm is being called $n$ times.
\subsection{Mod 3 Algorithm}
To improve the time complexity, we made the observation that for any 2 vertices $i$ and $j$, if ${D}_{ij} = l$ then the shortest distance of any neighbor of $i$ from $j$ $\in \{l-1, l, l+1\}$. Formally, we state the following lemma.
\begin{lemma}
\label{Lemma2}
Let $i$ and $j$ be two vertices such that ${D}_{ij} = l$. Then for any $k \in N(i)$, ${D}_{kj} \in \{l-1, l, l+1\}$.
\end{lemma}
\begin{proof}
First we see that ${D}_{kj} \nless l-1$. If this were the case then going from $i$ to $k$ to $j$ will make the distance of the path less than $l$ which is a contradiction.\\ Now we see that ${D}_{kj} \ngtr l+1$. If this were the case, then we could go from $k$ to $i$ to $j$ for a total path length of $l+1$ between $k$ and $j$ which is a contradiction.
\end{proof}
Hence, rather that looking at the actual distance of the neighbor of $i$ to $j$, we can only look at the distance modulo $3$. We formally describe the algorithm below (along the same lines as \cite{MR}).
\begin{algorithm}
\SetKwInOut{Input}{Input}
\SetKwInOut{Output}{Output}
\Input{Adjacency Matrix $A$ and distance matrix $D$}
\Output{Successor Matrix $P$}
\For{l = $\{0,1,2\}$}{
Compute $M^{(l)}$ where ${M^{(l)}}_{ij} = 1$ if and only if $D_{ij} + 1\equiv l \mod 3$\;
Compute the witness matrix $W^{(l)}$ for $A$ and $M^{(l)}$
}
Compute successor matrix $P$ such that $P_{ij} = W^{(D_{ij}\mod 3)}_{ij}$
\caption{Mod 3 Algorithm}
\end{algorithm}
\begin{lemma}
\label{Lemma3}
Let $i$ and $j$ be two vertices such that ${D}_{ij} = l$. Then ${P}_{ij} = k$ if and only if $k$ is one of the witnesses for ${AM^{l \mod 3}}_{ij}$.
\end{lemma}
\begin{proof}
If ${P}_{ij} = k$ then ${A}_{ik} = 1$ and ${D}_{kj} = l-1$ which implies that ${D}_{kj} \equiv l-1 \mod 3$ or $D_{ij} + 1\equiv l \mod 3$. Thus $k$ is one of the witnesses for ${AM^{l \mod 3}}_{ij}$.\\
Conversely, let $k$ be one of the witnesses for ${AM^{l \mod 3}}_{ij}$. Then $A_{ij} = 1$ and $D_{kj} \equiv l-1 \mod 3$. By Lemma~\ref{Lemma2}, we have that ${D}_{kj} \in \{l-1, l, l+1\}$. But ${D}_{kj}$ can not be $l$ or $l+1$ since they are not congruent to $l-1 \mod 3$. Hence, ${D}_{kj} = l-1$. This by Lemma~\ref{Lemma1}, we have that ${P}_{ij} = k$.
\end{proof}
The time complexity of this algorithm is $O(n^{\omega +\epsilon})$ since the witness algorithm is only run a constant number of times.
\section{Matchings in Bipartite Graphs}
\begin{definition}[Bipartite Graph]
A graph $G = (V, E)$ is said to bipartite if the vertex set $V$ can be partitioned into 2 disjoint sets $L$ and $R$ so that any edge has one vertex in $L$ and the other in $R$.
\end{definition}
\begin{definition}[Matching]
Given, an undirected graph $G = (V, E)$, a matching is a subset of edges $M \subseteq E$ that have no endpoint in common.
\end{definition}
\begin{definition}[Maximum Matching]
Given, an undirected graph $G = (V, E)$, a maximum matching $M$ is a matching of maximum size. Thus for any other matching $M'$, we have that $|M| \geq |M'|$.
\end{definition}
The problem of finding maximum matchings in bipartite graphs is a well studied problem. We describe some of the commonly known techniques for the same.
\begin{itemize}
\item \textbf{Ford Fulkerson Algorithm}
\begin{itemize}
\item We add a source and sink node to the bipartite graph and then compute the max flow on the resulting $s-t$ network assuming unit capacity for all the edges.
\item Given the max flow, we output the intermediate edges on which there is a unit flow as the matching edges.
\item The time complexity of this algorithm is $O(|V||E|)$
\end{itemize}
\item \textbf{Hungarian Algorithm} This is used in those cases where each edge of the bipartite graph has a weight or a cost associated with it. As an example, we may want to match students to rooms and each student may have a certain maximum cost s/he is willing to pay for the room. Using the Hungarian Algorithm, we can find a minimum cost matching in time $O(|V|^3)$ time.
\item \textbf{Edmond-Karp Algorithm} This algorithm is also based on the idea of augmenting paths and has a time complexity is $O(|E|\sqrt(|V|)$.
\end{itemize}
Thus, we can see that for dense graphs none of these algorithms are asymptotically better that $O(|V|^{2.5})$. Later on in the lecture, we describe a randomized algorithm for finding a maximum matching in regular bipartite graphs (which is a perfect matching) which has an expected time complexity of $O(|V|\log(|V|))$.
\subsection{Perfect Matching in Bipartite Graphs}
\begin{definition}[Perfect Matching]
Given, an bipartite graph $G = (V, E)$ , with the bipartition $V= L \cup R$ where $|L| = |R| = n$, a perfect matching is a maximum matching of size $n$.
\end{definition}
We now prove \emph{Hall's Theorem} which gives both sufficient and necessary conditions for the existence of a perfect matching in a bipartite graph.
\begin{theorem} (Hall's Theorem)
\label{HT}
A bipartite graph $G = (V, E)$ , with the bipartition $V= L \cup R$ where $|L| = |R| = n$, has a perfect matching if and only if for every subset $A \subseteq L$ , $|N(A)| \geq |A|$ where $N(A)$
denotes the neighborhood of $A$.
\end{theorem}
\begin{proof}
We first prove the necessary condition. Consider any subset $A \subseteq L$. In the perfect matching, each vertex in $A$ will be connected to a distinct vertex of $R$. Hence $|N(A)| \geq |A|$.
We now prove the sufficient condition. We present the proof along identical lines as $\cite{TR2}$ . We prove it by contrapositive i.e. given the fact that there does not exist a perfect matching, we try to construct a set $A \subset L$ such that $|N(A)| < |A|$.\\
We analyze a maximum (integral) flow in the network $G'$ corresponding to the bipartite graph $G$ which by assumption must have a value less than $n$. Hence, by the max-flow min-cut theorem an $s-t$ min-cut $(S, S^c)$ of the graph also has a capacity less than $n$. \\
Let $L_1 = S \cap L$, $R_1 = S \cap R$, $L_2 = S^c \cap L$ and $R_2 = S^c \cap R$. Since all edges have unit capacity and we are looking at integral flows, the capacity of the cut will simply be the number of edges going from $S$ to $S^c$. Hence.
\begin{align*}
capacity(S) &= |L_2| + |R_1| + edges(L_1, R_2) \\
&= n - |L_1| + |R_1| + edges(L_1, R_2)
\end{align*}
But we know that $capacity(S) \leq n-1$. Hence
\begin{equation}
n - |L_1| + |R_1| + edges(L_1, R_2) \leq n - 1
\end{equation}
This implies that
\begin{equation}
1 + |R_1| + edges(L_1, R_2) \leq |L_1|
\end{equation}
We can easily see that the quantity $|R_1| + edges(L_1, R_2)$ is an upper bound for $|N(L_1)|$ since we are overcounting by assuming that each edge from $L_1$ to $R_2$ has a different end point in $R_2$. Hence we have the result that
\begin{equation}
1 + |N(L_1)| \leq |L_1| \Leftrightarrow |N(L_1)| < |L_1|
\end{equation}
Thus $L_1$ is a set that we were looking for and this completes the proof of the sufficient condition.
\end{proof}
Using Hall's Theorem, we now show that every $d$ regular bipartite graph has a perfect matching.
\begin{theorem}
\label{Regular-Perfect}
Every $d$ regular bipartite graph has a perfect matching.
\end{theorem}
\begin{proof}
Consider any set $A \subseteq L$. We try to count the number of edges from $A$ to $N(A)$ in 2 different ways. This number is exactly equal to $|A|d$ since each vertex $A$ contributes $d$ outgoing edges. We also have that the number of incoming edges on $N(A)$ is at max $dN(A)$. This is an upper bound on the number of edges from $A$ to $N(A)$ since all the incoming edges on the set $N(A)$ need not be outgoing from $A$. Hence, we have that
\begin{equation}
d|A| \leq d|N(A)| \Leftrightarrow |A| \leq |N(A)|
\end{equation}
Hence, by Hall's theorem, the graph must have a perfect matching.
\end{proof}
\subsection{Matchings in d-regular Graphs for d = $2^k$}
In the next section, we describe an algorithm for finding perfect matchings in $d$ regular graphs where $d = 2^k$.
\begin{definition}[Euler Tour]
An Euler tour in an undirected graph is defined as a tour that traverses each edge of the graph exactly once.
Neccessary and Sufficient Condition: An undirected graph has an Euler Tour iff every vertex has even degree.
\end{definition}
Now for a d-regular graph with d = $2^k$ we can find a matching by following recursive algorithm:
\begin{itemize}
\item $d = 1$ : Then it is a perfect matching precisely.
\item $d = 2$ : In this case graph corresponds to a cycle. Choosing an orientation of the cycle gives us a matching.
\item $d = 2^k$ : In this case we can get a matching by following procedure:
\begin{itemize}
\item Walk along the edges and find an Eulerian Tour of G in O(m) time.
\item Orient the edges by the direction used in the walk.
\item Consider all forward edges, these form a regular graph with degree $d/2 = 2^{k-1}$.
Thus running time is given by:
\begin{align*}
T(m) &= O(m) + T(m/2) \\
&= O(m)
\end{align*}
\end{itemize}
\end{itemize}
\subsection{Matchings in general d-regular Bipartite Graphs}
In this section we look at a randomized algorithm proposed by Goel Kapralov and Khanna \cite{GKK} for finding matchings d-regular bipartite graphs where d may not be a perfect power of 2.
\textbf{Intuition:} There are large number of Bipartite Matchings on d-regular graphs. So a random walk should succeed in finding flow augmenting path in Ford Fulkerson very fast.
\textbf{Algorithm:}
\begin{itemize}
\item Run modified Ford-Fulkerson which uses random walk instead of BFS/DFS to find a flow augmenting paths.
\item For each flow augmenting path found in above run, let $x$ and $y$ be the vertices st. edges $(s,x)$ and $(y,t)$ are in flow augmenting path. Add $(x,y)$ to the matching.
\end{itemize}
\textbf{Note:} This algorithm assumes that we have G in adjacency array format so that we can sample edges for random walk in expected constant time.
\begin{lemma}
\label{Lemma4}
Let k be the number of unmatched vertices after we have found a partial matching. Then:
\begin{align*}
E[\text{Time for random walk from s to t}] = O(n/k)
\end{align*}
\end{lemma}
\begin{proof}
Let X and Y be the partitions of given graph G and let M be the partial matching of vertices in X and Y. We define the following wrt to M:
$X_m$ : Set of matched vertices in X.\newline
$Y_m$ : Set of matched vertices in Y.\newline
$X_u$ : Set of matched unvertices in X.\newline
$Y_u$ : Set of matched unvertices in Y.\newline
$M(x) = y$ and $M(y) = x$ if $x \in X$ is matched to $y \in Y \text{ under } M$
Let $b(v) = E[ \text{\#Back edges in random walk starting at v, ending at t}]$
Our goal is to prove $b(s) \leq n/k$
By above definition we have the following:
\begin{enumerate}
\item If $y \in Y$
\begin{align*}
b(y) & = 0 \text{\hspace{15ex} if } y \in Y_u \\
& = 1 + b(M(y)) \text {\hspace{4ex} if } y \in Y_m
\end{align*}
\item If $x \in X$
\begin{itemize}
\item if $x \in X_u$ then
\begin{align*}
b(x) & = 1/d \sum_{y \in N(x)} b(y) \\
\implies d b(x) & = \sum_{y \in N(x)} b(y)
\end{align*}
\item if $x \in X_m$ then
\begin{align*}
b(x) & = 1/(d-1) \sum_{y \in \{N(x) - M(x)\}} b(y) \\
\implies (d-1) b(x) & = \sum_{y \in \{N(x) - M(x)\}} b(y) \\
\implies (d-1) b(x) & = -b(M(x)) + \sum_{y \in \{N(x)\}} b(y) \\
\implies (d-1) b(x) & = -(1 + b(x)) + \sum_{y \in \{N(x)\}} b(y) \\
\implies d b(x) & = -1 + \sum_{y \in \{N(x)\}} b(y)
\end{align*}
\end{itemize}
Thus from 2 we get:
\begin{align*}
d \sum_{x \in X} b(x) & = -(n-k) + \sum_{(x,y) \in E} b(y) \\
& = -(n-k) + d \sum_y b(y) \\
& = -(n-k) + d (|M| + \sum_{x \in X_m} b(x)) \\ \\
d \sum_{x \in X_u} b(x) & = (d-1) (n-k) \\ \\
b(s) & = \frac{1}{|u|} \sum_{x \in X_u} b(x) \\
& = \frac{1}{k} \frac{d-1}{d} (n-k) \\
& \leq \frac{n}{k}
\end{align*}
\end{enumerate}
\end{proof}
The above lemma implies:
\begin{align*}
E[\text{Running Time to find a matching}] \lesssim \sum_1^n (n/k) = nH_n \lesssim n \log n
\end{align*}
\bibliographystyle{alpha}
\begin{thebibliography}{42}
\bibitem[TR2]{TR2}
Trevisan, Luca.
\newblock Section 14.2, Combinatorial Optimization: Exact and Approximate Algorithms.
\newblock {Standford University (2011)}
\bibitem[MR]{MR}
Rajeev~Motwani, Prabhakar~Raghavan
\newblock Randomized Algorithms.
\newblock {\em Cambridge University Press}, 0-521-47465-5, 1995.
\bibitem[GKK]{GKK}
Goel, Ashish, Michael Kapralov, and Sanjeev Khanna.
\newblock Perfect Matchings in O(n logn) Time in Regular Bipartite Graphs.
\newblock {SIAM Journal on Computing 42.3 (2013): 1392-1404.}
\end{thebibliography}
\end{document}