\documentclass[11pt]{amsart}
\textwidth 6.3in

\evensidemargin=-0.1cm \oddsidemargin=-0.1cm

\textheight 8.1in

\usepackage{tikz-cd}

\begin{document}


\begin{center}
\huge HW 1 \\
50 points \\

\Large Posted Tuesday, September 2

\Large Due Thursday, September 18 at midnight

\normalsize
\end{center}


Note: Lecture 2 covers material for 1-4 (except for 3(e)). Lectures 3 and 4 cover 5 and 6. Submit solutions in file \texttt{HW1Solutions.pdf}. Submission size limit in Submitty is set to 2.5MB.

\vspace{0.15in}

\noindent{\bf Problem 1.} Describe the languages denoted by the following regular expressions using the highest level characterization possible.


\begin{enumerate}

\item[(a)] (0.5pts) $\texttt{a(a|b)}^*\texttt{a}$

\item[(b)] (0.5pts) $\texttt{((}\epsilon \texttt{|a)b}^\texttt{*}\texttt{)}^\texttt{*}$

\item[(c)] (0.5pts) $\texttt{(a|b)}^*\texttt{a(a|b)(a|b)}$

\item[(d)] (0.5pts) $\texttt{a}^\texttt{*}\texttt{ba}^\texttt{*}\texttt{ba}^\texttt{*}$

\item[(e)] (3pts) $\texttt{(aa|bb)}^\texttt{*}\texttt{((ab|ba)(aa|bb)}^\texttt{*}\texttt{(ab|ba)(aa|bb)}^\texttt{*}\texttt{)}^\texttt{*}  \texttt{((ab|ba)(aa|bb)}^\texttt{*}\texttt{a|b)} $

\end{enumerate}

\vspace{0.15cm}

\noindent{\bf Problem 2}. The following grammar generates binary strings that have \emph{even} values. $S$ is the start symbol. 
 
\begin{enumerate}

\item[] $S\rightarrow A$ 

\item[] $A\rightarrow B\ \texttt{0} \mid A\ \texttt{0} \mid \texttt{0}$

\item[] $B\rightarrow A\ \texttt{1} \mid B\ \texttt{1} \mid \texttt{1}$

\end{enumerate}

\vspace{.05in}

Your task is to construct a similar grammar that generates strings with values divisible by 3. 

\vspace{.05in}

\begin{enumerate}

\item [(a)] (3pts) Fill in the blanks to complete the grammar:

\begin{enumerate}

\item[] $S\rightarrow A$

\item[] $A\rightarrow A\ \texttt{0} \mid ... $

\item[] $B\rightarrow ... $

\item[] ... 

\end{enumerate}


\item[(b)] (1pts) Using your grammar, show a derivation that generates 21 in binary notation.

\item[(c)] (4pts) Prove that all binary strings generated by your grammar have values divisible by 3. ({\it{Note: Use induction on the number of nodes in the parse tree.}})

\item[(d)] (4pts) Prove that your grammar generates all binary strings with values divisible by 3. 

\end{enumerate}

\vspace{.15in}

\noindent{\bf Problem 3.} Consider the grammar 

\vspace{0.05in}

\begin{enumerate}

\item[(1)] $S\rightarrow \texttt{a}\ S \ \texttt{b}\ S$

\item[(2)] $S\rightarrow \texttt{b}\ S \ \texttt{a}\ S$

\item[(3)] $S\rightarrow \epsilon $

\end{enumerate}

\vspace{0.05in}

\begin{enumerate}

\item[(a)] (1pts) Show that the grammar is ambiguous by constructing two different leftmost derivations for a string. Use the shortest string possible. 

\item[(b)] (1pts) Construct the corresponding rightmost derivations for the string from (a).

\item[(c)] (2pts) Construct the corresponding parse trees.

\item[(d)](3pts) One can see that the grammar generates strings with equal number of \texttt{a}'s and \texttt{b}'s. Does it generate \emph{all} such strings? ({\it{Note: You do not need to write a full formal proof. If you answer YES, outline an inductive argument. If you answer NO, show a string with equal number of \texttt{a}'s and \texttt{b}'s that cannot be generated by the grammar.}})

\item[(e)] (5pts) Finally, write a recursive descent parser with backtracking in Python. Include function \texttt{dfparse(s:str) -> list[int]} that takes a string and returns the list of productions the parser applied (if the parse succeeded). An unusual requirement is that your backtracking parser tries the $S$ productions in reverse order: it first tries (3), then (2), then (1). Include the function in file \texttt{backtrack.py} and turn in Submitty. You may assume that we'll test with string inputs only. A few sample runs are included:

\begin{verbatim}

python3 -i backtrack.py 
>>> dfparse('')
[3] 

python3 -i backtrack.py 
>>> dfparse('aba')
[]

python3 -i backtrack.py 
>>> dfparse('abab')
[1, 3, 1, 3, 3]

\end{verbatim}

\end{enumerate}

\vspace{.15in}

\noindent{\bf Problem 4.} 
The following is an ambiguous grammar that generates regular expressions over symbols \texttt{a} and \texttt{b}: 
\[R \rightarrow R \texttt{|} R \ \mid \ R \ R \ \mid \ R^\texttt{*} \ \mid \ \texttt{(} \ R \ \texttt{)}\ \mid \ \texttt{a}\ \mid\ \texttt{b} \]

\begin{enumerate}
\item [(a)] (2pts) How many parse trees are there for regular expression $\texttt{a|ab}^\texttt{*}$? {\it{Note: you do not need to draw the trees, just write the answer.}}
\end{enumerate}

Disambiguate the grammar so that $\texttt{a|ab}^\texttt{*}$ gives rise to a parse tree with this shape:

\begin{equation*}
\begin{tikzcd} [row sep=small]
		& R \arrow[ld,no head] \arrow[rd,no head] \arrow[d,no head] &                           &                          &   \\
\texttt{a}	& \texttt{|}      & ... \arrow[ld,no head] \arrow[rd,no head] &                          &   \\
   		& \texttt{a}     &                           & ... \arrow[d,no head] \arrow[rd,no head]          &   \\
     		&                   &                           & \texttt{b}  &   \texttt{*} \\
\end{tikzcd}
\end{equation*}

\begin{enumerate}

\item[(b)](3pts) Disambiguate into a \emph{left recursive} grammar.

\item[(c)](3pts) Disambiguate into a \emph{right recursive} grammar.

\end{enumerate}

\vspace{0.15in}
\noindent{\bf Problem 5.} Consider the following LL(1) grammar that generates S-expressions, an essential structure in Lisp-like languages. $S$ and $\mathit{Ss}$ are the nonterminals, \texttt{num} (number), \texttt{sym} (symbol), \texttt{(} and \texttt{)} are the terminals, i.e., the tokens.  
\[
\begin{array}{lll}
\mathit{Start} & \rightarrow & S\ \texttt{\$\$}\\
S & \rightarrow & \texttt{num} \\
S & \rightarrow & \texttt{sym} \\
S & \rightarrow & \texttt{(} \ Ss \ \texttt{)} \\
Ss & \rightarrow & S \  Ss \\
Ss & \rightarrow & \epsilon 
\end{array}
\]

\begin{enumerate}

\item[(a)] (2.5pts) What is FOLLOW($Ss$)? FOLLOW($S$)? PREDICT($Ss \rightarrow \epsilon$)?

\item[(b)] (2.5pts) Draw a parse tree for the string \texttt{( 5 ( a ) ) \$\$}. 

\item[(c)] (3pts) Consider a recursive descent parser running on the same input. At the point
where token \texttt{a} is matched, which recursive descent routines will be 
active (i.e., what routines will have a frame on the run-time stack)?

\end{enumerate}

\vspace{.15in}

\noindent{\bf Problem 6.} For each grammar below, determine if it is LL(1) or not and if it is ambiguous or not. You do not need to justify your answer, just write YES or NO. As an example of the format for answers we are looking for: (x) LL(1): YES, Ambiguous: YES.

\vspace{.05in}

\begin{enumerate}

\item[(a)] (1pts) $A \rightarrow \texttt{0}\ A\ \texttt{1} \mid \texttt{0}\ \texttt{1}$

\item[(b)] (1pts) $A \rightarrow \texttt{+}\ A\ A \mid \texttt{*}\ A\ A \mid \texttt{a}$

\item[(c)] (1pts) $A \rightarrow A\ \texttt{(}\ A\ \texttt{)}\ A \mid \epsilon$ 

\item[(d)] (1pts) $A \rightarrow B~\texttt{a}~|~\texttt{b}~B~\texttt{c}~|~\texttt{d}~\texttt{c}~|~\texttt{b}~\texttt{d}~\texttt{c} \quad \quad B \rightarrow \texttt{d}$ 

\item[(e)] (1pts) $S \rightarrow C\texttt{a}C\texttt{b}~|~D\texttt{b}D\texttt{a} \quad\quad C \rightarrow \epsilon \quad\quad D \rightarrow \epsilon$

\end{enumerate}


\end{document}