Compare revisions

daa0db7c · daa0db7c · daa0db7c · daa0db7c · daa0db7c · daa0db7c
--- a/info/exercises/past-exercises/ex4/CS320_ES04_Solutions_v2.pdf
+++ b/info/exercises/past-exercises/ex4/CS320_ES04_Solutions_v2.pdf
--- a/info/exercises/past-exercises/ex5/CS320_ES05.pdf
+++ b/info/exercises/past-exercises/ex5/CS320_ES05.pdf
--- a/info/exercises/past-exercises/ex5/CS320_ES05_sol.pdf
+++ b/info/exercises/past-exercises/ex5/CS320_ES05_sol.pdf
--- a/info/exercises/past-exercises/ex6/CS320_ES06.pdf
+++ b/info/exercises/past-exercises/ex6/CS320_ES06.pdf
--- a/info/exercises/past-exercises/ex6/CS320_ES06_sol.pdf
+++ b/info/exercises/past-exercises/ex6/CS320_ES06_sol.pdf
--- a/info/exercises/past-exercises/ex7/CS320_ES07.pdf
+++ b/info/exercises/past-exercises/ex7/CS320_ES07.pdf
--- a/info/exercises/past-exercises/ex7/CS320_ES07_Solutions.pdf
+++ b/info/exercises/past-exercises/ex7/CS320_ES07_Solutions.pdf
--- a/info/exercises/src/.gitignore
+++ b/info/exercises/src/.gitignore
+*.aux
+*.fdb_latexmk
+*.fls
+*.log
+*.out
+*.synctex.gz
+*.pdf
--- a/info/exercises/src/ex-01/ex/dfa.tex
+++ b/info/exercises/src/ex-01/ex/dfa.tex
--- a/info/exercises/src/ex-01/ex/languages.tex
+++ b/info/exercises/src/ex-01/ex/languages.tex
+
+\section{Languages and Automata}
+
+\begin{exercise}{}
+
+  Consider the following languages defined by regular expressions:
+  
+  \begin{enumerate}
+    \item \(\{a,ab\}^*\)
+    \item \(\{aa\}^* \cup \{aaa\}^*\)
+    \item \(a^+b^+\)
+  \end{enumerate}
+
+  and the following languages defined in set-builder notation:
+
+  \begin{enumerate}
+    \renewcommand{\theenumi}{\Alph{enumi}}
+    \item \(\{w \mid \forall i. 0 \le i \le |w| \land w_{(i)} = b \implies (i > 0 \land w_{(i - 1)} = a)\}\) % 1
+    \item \(\{w \mid \forall i. 0 \le i < |w| - 1 \implies w_{(i)} = b \implies w_{(i + 1)} = a\}\) % wrong
+    \item \(\{w \mid \exists i. 0 < i < |w| \land w_{(i)} = b \land w_{(i - 1)} = a\}\) % wrong
+    \item \(\{w \mid (|w| = 0 \mod 2 \lor |w| = 0 \mod 3) \land \forall i. 0 \leq i < |w| \implies w_{(i)} = a\}\) % 2
+    \item \(\{w \mid \forall i. 0 \le i < |w| - 1 \land w_{(i)} = a \implies w_{(i + 1)} = b\}\) % wrong
+    \item \(\{w \mid \exists i. 0 < i < |w| - 1 \land 
+    (\forall y. 0 \leq y \leq i \implies w_{(y)} = a) \land  (\forall y. i < y < |w| \implies w_{(y)} = b) \}\) % 3
+  \end{enumerate}
+
+  For each pair (e.g. 1-A), check whether the two languages are equal, providing
+  a proof if they are, and a counterexample word that is in one but not the
+  other if unequal.
+
+  \begin{solution}
+
+    Equal language pairs: \(1 \mapsto A, 2 \mapsto D, 3 \mapsto F\).
+
+    Counterexamples (\(\cdot^\star\) means the word is in the alphabet-labelled
+    language, and the number-labelled language otherwise):
+    \begin{center}
+      \begin{tabular}{c c c c c c c}
+        & A & B & C & D & E & F \\
+        1 & - & a & a & a & aa & a \\
+        2 & ab\(^\star\) & ba\(^\star\)& ab\(^\star\)& - & ab\(^\star\)& aa \\
+        3 & abb & abb & aba\(^\star\) & aaabb & aab & - \\
+      \end{tabular}
+    \end{center}
+
+    We prove the first case as an example.
+
+    \begin{equation*}
+      \{a,ab\}^* = \{w \mid \forall i. 0 \le i \le |w| \land w_{(i)} = b \implies (i > 0 \land w_{(i - 1)} = a)\}
+    \end{equation*}
+
+    We must prove both directions, i.e. that \(\{a,ab\}^* \subseteq \{w \mid
+    P(w)\}\) and that \(\{w \mid P(w)\} \subseteq \{a,ab\}^*\).
+
+    \noindent
+    \textbf{Forward}: \(\{a,ab\}^* \subseteq \{w \mid P(w)\}\):
+
+    We must show that for all \(w \in \{a,ab\}^*\), \(P(w)\) holds. For any \(i
+    \in \naturals\), given that \(0 \le i \le |w| \land w_{(i)} = b\), we need
+    to show that \(i > 0 \land w_{(i - 1)} = a\).
+    
+    From the definition of \(*\) on sets of words, we know that there must exist
+    \(n < |w|\) words \(w_1, \ldots, w_n \in \{a, ab\}\) such that \(w = w_1
+    \ldots w_n\). The index \(i\) must be in the range of one of these words,
+    i.e. there exist \(1 \leq m \leq n\) and \(0 \leq j < |w_m|\) such that
+    \(w_{(i)} = w_{m(j)}\). 
+
+    We know that \(w_{(i)} = b\) and \(w_{m} \in \{a, ab\}\) by assumption. The
+    case \(w_m = a\) is a contradiction, since it cannot contain \(b\). Thus,
+    \(w_m = ab\). We know that \(w_{(i)} = w_{m(j)} = b\), so \(j = 1\). Thus,
+    \(w_{(i - 1)} = w_{m(j - 1)} = w_{m(0)} = a\), as required. Since \(i - 1
+    \geq 0\), being an index into \(w\), \(i > 0\) holds as well. Hence,
+    \(P(w)\) holds.
+
+    \noindent
+    \textbf{Backward}: \(\{w \mid P(w)\} \subseteq \{a,ab\}^*\):
+
+    We must show that for all \(w\) such that \(P(w)\) holds, \(w \in
+    \{a,ab\}^*\). We know by definition of \(*\) again, that \(w \in \{a,
+    ab\}*\) if and only if there exist \(n < |w|\) words \(w_1, \ldots, w_n \in
+    \{a, ab\}\) such that \(w = w_1 \ldots w_n\). We attempt to show that if
+    \(P(w)\) holds, then \(w\) admits such a decomposition.
+
+    We proceed by induction on the length of \(w\).
+
+    \noindent
+    \textit{Induction Case \(|w| = 0\)}: The empty word has a decomposition \(w =
+    \epsilon\) (with \(n = 0\) in the decomposition). QED.
+
+    \noindent
+    \textit{Induction Case \(|w| = 1\)}: The word \(w\) is either \(a\) or \(b\). We know
+    that \(P(w)\) holds, so \(w = a\) (why?). The decomposition is \(w = a\),
+    with \(n = 1\) and \(w_1 = a\). QED.
+
+    \noindent
+    \textit{Induction Case \(|w| > 1\)}: 
+
+    Induction hypothesis: for all words \(v\) such that \(|v| < |w|\) and
+    \(P(v)\) holds, \(v\) admits a decomposition into words in \(\{a, ab\}\),
+    and thus \(v \in \{a, ab\}^*\).
+
+    We need to show that if \(P(w)\) holds, then \(w\) admits such a
+    decomposition as well. Split the proof based on the first two characters of
+    \(w\). There are four possibilities. We give the name \(v\) to the rest of
+    \(w\).
+
+    \begin{enumerate}
+      \item \(w = aav\): \(P(w)\) holds, so \(\forall i. 0 \le i \le |w| \land
+      w_{(i)} = b \implies (i > 0 \land w_{(i - 1)} = a)\). In particular, we
+      can restrict to \(i > 1\) as
+
+      \begin{equation*}
+        \forall i. 2 \le i \le |w| \land w_{(i)} = b \implies (i > 0 \land w_{(i - 1)} = a)
+      \end{equation*}
+
+      but \(w_{(i)}\) for \(i \geq 2\) is simply \(v_{(i - 2)}\). Rewriting:
+
+      \begin{equation*}
+        \forall i. 2 \le i \le |w| \land v_{(i - 2)} = b \implies (i > 0 \land v_{(i - 3)} = a)
+      \end{equation*}
+
+      Finally, since the statement holds for all \(i\), we can replace \(i\) by
+      \(i + 2\) without loss of generality, using \(|v| = |w| - 2\):
+
+      \begin{equation*}
+        \forall i. 0 \le i \le |v| \land v_{(i)} = b \implies (i > 0 \land v_{(i - 1)} = a)
+      \end{equation*}
+
+      This is precisely the statement \(P(v)\), so by the induction hypothesis,
+      \(v\) has a decomposition into words in \(\{a, ab\}\), \(v = v_1\ldots
+      v_m\) for some \(m < |v|\) and \(v_i \in \{a, ab\}\).
+
+      We can now construct a decomposition for \(w\), \(w = w_1\ldots w_{m+2}\)
+      such that \(w_1 = a\), \(w_2 = a\), and \(w_{i + 2} = v_i\) for \(1 \le i
+      \le m\). Since \(m < |v|\) and \(|v| = |w| - 2\), \(m + 2 < |w|\). QED.
+
+      \item \(w = abv\): by the same argument as the previous case, \(v\) has a decomposition
+      into words in \(\{a, ab\}\), \(v = v_1\ldots v_m\) for some \(m < |v|\)
+      and \(v_i \in \{a, ab\}\). 
+      
+      We can similarly construct a decomposition for \(w\), \(w = w_1\ldots
+      w_{m+1}\) such that \(w_1 = ab\) and \(w_{i + 1} = v_i\) for \(1 \le i \le
+      m\). Since \(m < |v|\) and \(|v| = |w| - 2\), in particular \(m + 1 <
+      |w|\). QED.
+
+      \item \(w = bav\) or \(w = bbv\): \(P(w)\) cannot hold (set \(i = 0\)), so
+      the statement is vacuously true. 
+    \end{enumerate}
+  \end{solution}
+  
+\end{exercise}
--- a/info/exercises/src/ex-01/ex/lexer.tex
+++ b/info/exercises/src/ex-01/ex/lexer.tex
+
+% flavour text for lexer constructions
+
+\section{Lexing}
+
+%   In the lectures, we have seen how to manually construct a lexer for small
+% regular expressions. We often use tools that generate lexers from regular
+% expressions. You will see one such tool, Silex, while building the Amy lexer.
+
+% % about automata for lexing
+% Lexing frameworks process the description of tokens for a given language, and
+% may use a variety of techniques to construct the final lexer. The result is a
+% program that accepts a string and returns a list of tokens. One way to do this
+% automatically is by constructing and composing automata.
+
+Consider a simple arithmetic language that allows you to compute one arithmetic
+expression, construct conditionals, and let-bind expressions. An example program
+is:
+
+\begin{lstlisting}
+  let x = 3 in
+  let y = ite (x > 0) (x * x) 0 in
+    (2 * x) + y
+\end{lstlisting}
+
+The lexer for this language must recognize the following tokens:
+
+\begin{align*} 
+ \texttt{keyword}: &\quad \texttt{let} \mid \texttt{in} \mid \texttt{ite}\\
+ \texttt{op}: &\quad \texttt{+} \mid \texttt{-} \mid \texttt{*} \mid \texttt{/} \\
+ \texttt{comp}: &\quad \texttt{>} \mid \texttt{<} \mid \texttt{==} \mid \texttt{<=} \mid \texttt{>=} \\
+ \texttt{equal}: &\quad \texttt{=} \\
+ \texttt{lparen}: &\quad \texttt{(} \\
+ \texttt{rparen}: &\quad \texttt{)} \\
+ \texttt{id}: &\quad letter \cdot (letter \mid digit)^* \\
+ \texttt{number}: &\quad digit^+ \\
+ \texttt{skip}: &\quad \texttt{whitespace} 
+\end{align*}
+
+For simplicity, \(letter\) is a shorthand for the set of all English lowercase
+letters \(\{a - z\}\) and \(digit\) is a shorthand for the set of all decimal
+digits \(\{0 - 9\}\). 
+
+% \todo{if we allow an \texttt{ite} keyword with operators, we can ask them how
+% chained operators would be parsed, eg: \texttt{<===>=<===}. Is this interesting?}
+
+\begin{exercise}{}
+  For each of the tokens above, construct an NFA that recognizes strings matching
+  its regular expression.
+
+  \begin{solution}
+    The construction is similar in each case, following translation of regular
+    expressions to automata. For example:
+
+    \begin{itemize}
+      \item \texttt{keyword}: \texttt{let} $\mid$ \texttt{in} $\mid$ \texttt{ite}
+      \begin{center}
+        \begin{tikzpicture}[shorten >=1pt,node distance=2cm,on grid,auto] 
+          \node[state,initial] (q_0)   {$q_0$}; 
+          %
+          \node[state] (ql_1) [above right=of q_0] {$q_l$}; 
+          \node[state] (ql_2) [right=of ql_1] {$q_e$};
+          \node[state,accepting] (ql_3) [right=of ql_2] {$q_{let}$};
+          %
+          \node[state] (qin_1) [right=of q_0] {$q_{i1}$}; 
+          \node[state,accepting] (qin_2) [right=of qin_1] {$q_{in}$};
+          %
+          \node[state] (qite_1) [below right=of q_0] {$q_{i2}$}; 
+          \node[state] (qite_2) [right=of qite_1] {$q_t$};
+          \node[state,accepting] (qite_3) [right=of qite_2] {$q_{ite}$};
+          %
+          \path[->] 
+          (q_0) edge node {\texttt{l}} (ql_1)
+          (ql_1) edge node {\texttt{e}} (ql_2)
+          (ql_2) edge node {\texttt{t}} (ql_3)
+          %
+          (q_0) edge node {\texttt{i}} (qin_1)
+          (qin_1) edge node {\texttt{n}} (qin_2)
+          %
+          (q_0) edge node {\texttt{i}} (qite_1)
+          (qite_1) edge node {\texttt{t}} (qite_2)
+          (qite_2) edge node {\texttt{e}} (qite_3)
+          ;
+        \end{tikzpicture}
+      \end{center}
+
+      \item \texttt{id}: \texttt{letter} $\cdot$ (\texttt{letter} $\mid$ \texttt{digit})$^*$
+      \begin{center}
+        \begin{tikzpicture}[shorten >=1pt,node distance=3cm,on grid,auto] 
+          \node[state,initial] (q_0)   {$q_0$}; 
+          %
+          \node[state] (q1) [accepting, right=of q_0] {$q_1$}; 
+          %
+          \path[->] 
+          (q_0) edge node {\texttt{letter}} (q1)
+          (q1) edge[loop above] node {\texttt{letter}} (q1)
+          (q1) edge[loop below] node {\texttt{digit}} (q1)
+          ;
+        \end{tikzpicture}
+      \end{center}
+    \end{itemize}
+
+    The other cases are similar.
+  \end{solution}
+\end{exercise}
+
+A lexer is constructed by combining the NFAs for each of the tokens in parallel,
+assuming maximum munch. The resulting token is the first NFA in the token order
+that accepts a prefix of the string. Thus, tokens listed first have higher
+priority. We then continue lexing the remaining string. You may assume that the
+lexer drops any \texttt{skip} tokens.
+
+\begin{exercise}{}
+
+  For each of the following strings, write down the sequence of tokens that
+  would be produced by the constructed lexer, if it succeeds. 
+
+  \begin{enumerate}
+    \item \texttt{let x = 5 in x + 3}
+    \item \texttt{let5x2}
+    \item \texttt{xin}
+    \item \texttt{==>}
+    \item \texttt{<===><==}
+  \end{enumerate}
+
+  \begin{solution}
+    \begin{enumerate}
+      \item \texttt{[keyword("let"), id("x"), equal("="), number("5"), keyword("in"), id("x"), op("+"), number("3")]}
+      \item \texttt{[keyword("let"), number("5"), id("x2")]}
+      \item \texttt{[id("xin")]}
+      \item \texttt{[comp("=="), comp(">")]}
+      \item \texttt{[comp("<="), comp("=="), comp(">"), comp("<="), equal("=")]}
+    \end{enumerate}
+  \end{solution}
+  
+\end{exercise}
+
+
+\begin{exercise}{}
+  Construct a string that would be lexed differently if we ran the NFAs in parallel
+  and instead of using token priority, simply picked the longest match.
+
+  \begin{solution}
+    There are many possible solutions. The key is to notice which tokens have
+    overlapping prefixes.
+
+    An example is \texttt{letx1}, which would be lexed as
+    \texttt{[keyword("let"), id("x1")]} if we check acceptance in order of
+    priority, but as \texttt{[id("letx1")]} if we run them in parallel.
+  \end{solution}
+\end{exercise}
+  
+  
\ No newline at end of file
--- a/info/exercises/src/ex-01/main.tex
+++ b/info/exercises/src/ex-01/main.tex
+\documentclass[a4paper]{article}
+
+\input{../macro}
+
+\ifdefined\ANSWERS
+  \if\ANSWERS1
+    \printanswers
+  \fi
+\fi
+
+\title{CS 320 \\ Computer Language Processing\\Exercise Set 1}
+\author{}
+\date{February 28, 2025}
+
+\begin{document}
+\maketitle
+
+  % languages as sets
+  \input{ex/languages.tex}
+
+  % regex
+
+  % automata
+  \input{ex/dfa.tex}
+
+  % regex to automata
+
+  % constructing lexers
+  \input{ex/lexer.tex}
+
+\bibliographystyle{plain}
+\bibliography{../biblio}
+
+\end{document}
--- a/info/exercises/src/ex-02/ex/cfg.tex
+++ b/info/exercises/src/ex-02/ex/cfg.tex
+
+\begin{exercise}{}
+
+  For each of the following languages, give a context-free grammar that
+  generates it:
+
+  \begin{enumerate}
+    \item \(L_1 = \{a^nb^m \mid n, m \in \naturals \land n \geq 0 \land m \geq n\}\)
+    \item \(L_2 = \{a^nb^mc^{n+m} \mid n, m \in \naturals\}\)
+    \item \(L_3 = \{w \in \{a, b\}^* \mid \exists m \in \naturals.\; |w| = 2m +
+    1 \land w_{(m+1)} = a \}\) (\(w\) is of odd length, has \(a\) in the middle)
+  \end{enumerate}
+
+  \begin{solution}
+    \begin{enumerate}
+      \item \(L_1 = \{a^nb^m \mid n, m \in \naturals \land n \geq 0 \land m \geq n\}\)
+      \begin{align*}
+        S &::= aSb \mid B\\
+        B &::= bB \mid \epsilon
+      \end{align*}
+      \item \(L_2 = \{a^nb^mc^{n+m} \mid n, m \in \naturals\}\)
+      \begin{align*}
+        S &::= aSc \mid B\\
+        B &::= bBc \mid \epsilon
+      \end{align*}
+
+      A small tweak to \(L_1\)'s grammar allows us to keep track of addition
+      precisely here. Could we do something similar for \(\{a^nb^nc^n \mid n \in
+      \naturals\}\)? (open-ended discussion)
+
+      \item \(L_3 = \{w \in \{a, b\}^* \mid \exists m \in \naturals.\; |w| = 2m +
+      1 \land w_{(m+1)} = a \}\)
+      \begin{align*}
+        S &::= aSb \mid bSa \mid aSa \mid bSb \mid a
+      \end{align*}
+
+      Note that after each recursive step, the length of the inner string has
+      the same parity (i.e. odd).
+    \end{enumerate}    
+  \end{solution}
+  
+\end{exercise}
+
+\begin{exercise}{}
+
+  Consider the following context-free grammar \(G\):
+
+  \begin{align*}
+    A &::= -A \\
+    A &::= A - \textit{id} \\
+    A &::= \textit{id} \\
+  \end{align*}
+
+  \begin{enumerate}
+    \item Show that \(G\) is ambiguous, i.e., there is a string that has two
+    different possible parse trees with respect to \(G\).
+    \item Make two different unambiguous grammars recognizing the same words,
+    \(G_p\), where prefix-minus binds more tightly, and \(G_i\), where
+    infix-minus binds more tightly.
+    \item Show the parse trees for the string you produced in (1) with respect
+    to \(G_p\) and \(G_i\).
+    \item Produce a regular expression that recognizes the same language as
+    \(G\).
+  \end{enumerate}
+
+  \begin{solution}
+    \begin{enumerate}
+      \item An example string is \(- \textit{id} - \textit{id}\). It can be
+      parsed as either \(-(\textit{id} - \textit{id})\) or \((- \textit{id}) -
+      \textit{id}\). The corresponding parse trees are:
+
+      \begin{center}
+        \begin{forest}
+          [\(A\)
+            [\(A\)
+              [\(-\)]
+              [\(\textit{id}\)]
+            ]
+            [\(-\)]
+            [\(\textit{id}\)]
+          ]
+        \end{forest}
+        \hspace{10ex}
+        \begin{forest}
+          [\(A\)
+            [\(-\)]
+            [\(A\)
+              [\(A\)
+                [\(\textit{id}\)]
+              ]
+              [\(-\)]
+              [\(\textit{id}\)]
+            ]
+          ]
+        \end{forest}
+      \end{center}
+
+      Left: prefix binds tighter, right: infix binds tighter.
+
+      \item \(G_p\):
+      \begin{align*}
+          A &::= B \mid A - \textit{id} \\
+          B &::= -B \mid \textit{id}
+      \end{align*}
+
+      \(G_i\):
+      \begin{align*}
+          A &::= C \mid -A \\
+          C &::= \textit{id} \mid C - \textit{id}
+      \end{align*}
+
+      \item Parse trees for \(- \textit{id} - \textit{id}\) with respect to \(G_p\) (left)
+      and \(G_i\) (right):
+
+      \begin{center}
+        \begin{forest}
+          [\(A\)
+            [\(A\)
+              [\(B\)
+                [\(-\)]
+                [\(B\)
+                  [\(\textit{id}\)]
+                ]
+              ]
+            ]
+            [\(-\)]
+            [\(\textit{id}\)]
+          ]
+        \end{forest}
+        \hspace{10ex}
+        \begin{forest}
+          [\(A\)
+            [\(-\)]
+            [\(A\)
+              [\(C\)
+                [\(\textit{id}\)]
+              ]
+              [\(-\)]
+              [\(\textit{id}\)]
+            ]
+          ]
+        \end{forest}
+      \end{center}
+
+      \item \(L(G) = L(-^*\textit{id} (-\textit{id})^*)\). Note: \(()\) are part
+      of the regular expression syntax, not parentheses in the string.
+
+    \end{enumerate}
+  \end{solution}
+
+\end{exercise}
+
+
+\begin{exercise}{}
+
+  Consider the two following grammars \(G_1\) and \(G_2\):
+
+  \begin{align*}
+    G_1: & \\
+    S &::= S(S)S \mid \epsilon \\
+    G_2: & \\
+    R &::= RR \mid (R) \mid \epsilon
+  \end{align*}
+
+  \noindent
+  Prove that:
+  \begin{enumerate}
+    \item \(L(G_1) \subseteq L(G_2)\), by showing that for every parse tree in
+    \(G_1\), there exists a parse tree yielding the same word in \(G_2\).
+    \item (Bonus) \(L(G_2) \subseteq L(G_1)\), by showing that there exist
+    equivalent parse trees or derivations.
+  \end{enumerate}
+
+  \begin{solution}
+
+    \begin{enumerate}
+      \item \(L(G_1) \subseteq L(G_2)\).
+      
+      We give a recursive transformation of parse trees in \(G_1\) producing
+      parse trees in \(G_2\). 
+
+      \begin{enumerate}
+        \item \textbf{Base case:} The smallest parse tree is the \(\epsilon\)
+        production, which can be transformed as (left to right):
+        \begin{center}
+          \begin{forest}
+            [\(S\)
+              [\(\epsilon\)]
+            ]
+          \end{forest}
+          \hspace{8ex}
+          \begin{forest}
+            [\(R\)
+              [\(\epsilon\)]
+            ]
+          \end{forest}
+        \end{center}
+        \item \textbf{Recursive case:} Rule \(S ::= S(S)S\). The parse tree transformation is:
+        \begin{center}
+          \begin{forest}
+            [\(S\)
+              [\(S_1\)]
+              [\((_2\)]
+              [\(S_3\)]
+              [\()_4\)]
+              [\(S_5\)]
+            ]
+          \end{forest}
+          \hspace{10ex}
+          \begin{forest}
+            [\(R\)
+              [\(R_1\)]
+              [\(R\)
+                [\(R\)
+                  [\((_2\)]
+                  [\(R_3\)]
+                  [\()_4\)]
+                ]
+                [\(R_5\)]
+              ]
+            ]
+          \end{forest}
+        \end{center} 
+
+        The nodes are numbered to check that the order of children (left to
+        right) does not change. This ensures that the word yielded by the tree
+        is the same. The transformation is applied recursively to the children
+        \(S_1, S_3, S_5\) to obtain \(R_1, R_3, R_5\).
+
+        Verify that the tree on the right is indeed a parse tree in \(G_2\).
+      \end{enumerate}
+
+      \item \(L(G_2) \subseteq L(G_1)\). 
+      
+      Straightforward induction on parse trees does not work easily. The rule
+      \(R ::= RR\) in \(G_2\) is not directly expressible in \(G_1\) by a simple
+      transformation of parse trees. However, we can note that, in fact, adding
+      this rule to \(G_1\) does not change the language!
+
+      Consider the grammar \(G_1'\) defined by \(S ::= SS \mid S(S)S \mid
+      \epsilon\). We must show that for every two words \(v\) and \(w\) in
+      \(L(G_1)\), \(vw\) is in \(L(G_1)\), and so adding the rule \(S ::= SS\)
+      does not change the language.
+
+      We induct on the length \(|v| + |w|\). 
+      
+      \begin{enumerate}
+        \item \textbf{Base case:} \(|v| + |w| = 0\). \(v = w = vw = \epsilon \in
+        L(G_1)\). QED.
+        \item \textbf{Inductive case:} \(|v| + |w| = n + 1\). The induction
+        hypothesis is that for every \(v', w'\) with \(|v'| + |w'| = n\), \(v'w'
+        \in L(G_1)\).
+
+        From the grammar, we know that either \(v = \epsilon\) or \(v = x(y)z\)
+        for \(x, y, z \in L(G_1)\). If \(v = \epsilon\), then \(w = vw \in
+        L(G_1)\). In the second case, \(vw = x(y)zw\). However, \(zw \in
+        L(G_1)\) by the inductive hypothesis, as \(|z| + |w| < n \).
+
+        Thus, \(vw = x(y)z'\) for \(z' \in L(G_1)\). Finally, since \(x, y, z'
+        \in L(G_1)\), it follows from the grammar rules that \(vw = x(y)z' \in
+        L(G_1)\). 
+      \end{enumerate}
+      
+      Thus, \(L(G_1) = L(G_1')\). It can now be shown just as in the first part,
+      that \(L(G_2) \subseteq L(G_1')\).
+    \end{enumerate}
+    
+  \end{solution}
+  
+\end{exercise}
+
+\begin{exercise}{}
+  
+  Consider a context-free grammar \(G = (A, N, S, R)\). Define the reversed
+  grammar \(rev(G) = (A, N, S, rev(R))\), where \(rev(R)\) is the set of rules
+  is produced from \(R\) by reversing the right-hand side of each rule, i.e.,
+  for each rule \(n ::= p_1 \ldots p_n\) in \(R\), there is a rule \(n ::=
+  p_n \ldots p_1\) in \(rev(R)\), and vice versa. The terminals,
+  non-terminals, and start symbol of the language remain the same.
+
+  For example, \(S ::= abS \mid \epsilon\) becomes \(S ::= Sba \mid \epsilon\).
+
+  Is it the case that for every context-free grammar \(G\) defining a language
+  \(L\), the language defined by \(rev(G)\) is the same as the language of
+  reversed strings of \(L\), \(rev(L) = \{rev(w) \mid w \in L\}\)? Give a proof
+  or a counterexample.
+
+  \begin{solution}
+
+    Consider any word \(w\) in the original language. Looking at the definition
+    of a language \(L(G)\) defined by a grammar \(G\):
+    \begin{equation*}
+      w \in L(G) \iff \exists T.\; w = yield(T) \land isParseTree(G, T) 
+    \end{equation*}
+
+    There must exist a parse tree \(T\) for \(w\) with respect to \(G\). We must
+    show that there exists a parse tree for \(rev(w)\) with respect to the
+    reversed grammar \(G_r = rev(G)\) as well.
+
+    We propose that this is precisely the tree \(T_r = mirror(T)\). Thus, we
+    need to show that \(rev(w) = yield(T_r)\) and that \(isParseTree(G_r,
+    T_r)\).
+
+    \begin{enumerate}
+      \item \(rev(w) = yield(T_r)\): \(yield(\cdot)\) of a tree is the word
+      obtained by reading its leaves from left to right. Thus, the yield of the
+      mirror of a tree \(yield(mirror(\cdot))\) is the word obtained by reading
+      the leaves of the original tree from right to left. Thus, \(yield(T_r) =
+      yield(mirror(T)) = rev(yield(T)) = rev(w)\).
+
+      \item \(isParseTree(G_r, T_r)\): We need to show that \(T_r\) is a parse
+      tree with respect to \(G_r\). Consider the definition of a parse tree:
+      \begin{enumerate}
+        \item The root of \(T_r\) is the start symbol of \(G_r\): the root of
+        \(T_r = mirror(T)\) is the same as that of \(T\). Since \(T\)'s root
+        node must be the start symbol of \(G\), it is also the root symbol of
+        \(T_r\). \(G\) and \(G_r\) share the same start symbol in our
+        transformation.
+        \item The leaves are labelled by the elements of \(A\): the mirror
+        transformation does not alter the set or the label of leaves, only their
+        order. This property transfers from \(T\) to \(T_r\) as well.
+        \item Each non-leaf node is labelled by a non-terminal symbol: the
+        mirror transformation does not alter the label of non-leaf nodes either,
+        so this property transfers from \(T\) to \(T_r\) as well.
+        \item If a non-leaf node has children that are labelled \(p_1, \ldots,
+        p_n\) left-to-right, then there is a rule \((n ::= p_1 \ldots p_n)\) in
+        the grammar: consider any non-leaf node in \(T_r\), labelled \(n\), with
+        children labelled left-to-right \(p_1, \ldots, p_n\). By the definition
+        of \(mirror\), the original tree \(T\) must have the same node labelled
+        \(n\), with the reversed list of children left-to-right, \(p_n, \ldots,
+        p_1\). Since \(T\) is a parse tree for \(G\), \(n ::= p_n \ldots p_1\)
+        is a valid rule in \(G\), and by the reverse transformation, \(n ::= p_1
+        \ldots p_n\) must be a rule in \(G_r\). Thus, the property is satisfied.
+      \end{enumerate}
+    \end{enumerate}
+
+    Thus, both properties are satisfied. Therefore, the language defined by the
+    reversed grammar is the reversed language of the original grammar.
+
+  \end{solution}
+
+\end{exercise}
+
--- a/info/exercises/src/ex-02/ex/pumping.tex
+++ b/info/exercises/src/ex-02/ex/pumping.tex
+
+\begin{exercise}{}
+
+  Recall the pumping lemma for regular languages:
+
+  For any language \(L \subseteq \Sigma^*\), if \(L\) is regular, there exists a
+  strictly positive constant \(p \in \naturals\) such that every word \(w \in
+  L\) with \(|w| \geq p\) can be written as \(w = xyz\) such that:
+
+  \begin{itemize}
+    \item \(x, y, z \in \Sigma^*\)
+    \item \(|y| > 0\)
+    \item \(|xy| \leq p\), and
+    \item \(\forall i \in \naturals.\; xy^iz \in L\)
+  \end{itemize}
+
+  Consider the language \(L = \{w \in \{a\}^* \mid |w| \text{ is prime}\}\).
+  Show that \(L\) is not regular by using the pumping lemma.
+  
+  \begin{solution}
+      \(L = \{w \in \{a\}^* \mid |w| \text{ is prime}\}\) is not a regular
+      language.
+
+      To the contrary, assume it is regular, and so there exists a constant
+      \(p\) such that the pumping conditions hold for this language. 
+      
+      Consider the word \(w = a^{n} \in L\), for some prime \(n \geq p\). By the
+      pumping lemma, we can write \(w = xyz\) such that \(|y| > 0\), \(|xy| \leq
+      p\), and \(xy^iz \in L\) for all \(i \geq 0\).
+      
+      Assume that \(|xz| = m\) and \(|y| = k\) for some natural numbers \(m\)
+      and \(k\). Thus, \(|xy^iz| = m + ik\) for all \(i\). Since by the pumping
+      lemma \(xy^iz \in L\) for every \(i\), it follows that for every \(i\),
+      the length \(m + ik\) is prime. However, if \(m \not = 0\), then \(m\)
+      divides \(m + mk\), and if \(m = 0\), then \(m + 2k\) is not prime. In
+      either case, we have a contradiction.
+
+      Thus, this language is not regular.
+
+  \end{solution}
+
+\end{exercise}
--- a/info/exercises/src/ex-02/main.tex
+++ b/info/exercises/src/ex-02/main.tex
+\documentclass[a4paper]{article}
+
+\input{../macro}
+
+\ifdefined\ANSWERS
+  \if\ANSWERS1
+    \printanswers
+  \fi
+\fi
+
+\title{CS 320 \\ Computer Language Processing\\Exercise Set 2}
+\author{}
+\date{March 7, 2025}
+
+\begin{document}
+\maketitle
+
+  \input{ex/pumping}
+  
+  \input{ex/cfg}
+
+\end{document}
--- a/info/exercises/src/ex-03/ex/compute.tex
+++ b/info/exercises/src/ex-03/ex/compute.tex
+
+% Compiler Design 3.9
+\begin{exercise}{}
+  Compute \(\nullable\), \(\first\), and \(\follow\) for the non-terminals \(A\)
+  and \(B\) in the following grammar:
+  %
+  \begin{align*}
+    A &::= BAa \\
+    A &::=  \\
+    B &::= bBc \\
+    B &::= AA
+  \end{align*}
+
+  Remember to extend the language with an extra start production for the
+  computation of \(\follow\).
+
+  \begin{solution}
+    \begin{enumerate}
+      \item \(\nullable\): we get the constraints
+      \begin{gather*}
+        \nullable(A) = \nullable(BAa) \lor \nullable(\epsilon) \\
+        \nullable(B) = \nullable(bBc) \lor \nullable(AA)
+      \end{gather*}
+
+      We can solve these to get \(\nullable(A) = \nullable(B) = true\).
+
+      \item \(\first\): we get the constraints (given that both \(A\) and \(B\)
+      are nullable):
+      \begin{align*}
+        \first(A) &= \first(BAa) \cup \first(\epsilon) \\
+                  &= \first(B) \cup \first(A) \cup \emptyset \\
+                  &= \first(B) \cup \first(A) \\
+        \first(B) &= \first(bBc) \cup \first(AA) \\
+                  &= \{b\} \cup \first(A) \cup \first(A) \cup \emptyset \\
+                  &= \{b\} \cup \first(A)
+      \end{align*}
+
+      Starting from \(\first(A) = \first(B) = \emptyset\), we iteratively
+      compute the fixpoint to get \(\first(A) = \first(B) = \{a, b\}\).
+
+      \item \(\follow\): we add a production \(A' ::= A~\mathbf{EOF}\), and get
+      the constraints (in order of productions):
+      \begin{gather*}
+        \{\mathbf{EOF}\} \subseteq \follow(A) \\
+        \\
+        \first(A) \subseteq \follow(B) \\
+        \{a\} \subseteq \follow(A) \\
+        \\
+        \{c\} \subseteq \follow(B) \\
+        \\
+        \first(A) \subseteq \follow(A) \\
+        \follow(B) \subseteq \follow(A)
+      \end{gather*}
+
+      Substituting the computed \(\first\) sets, and computing a fixpoint, we
+      get \(\follow(A) = \{a, b, c,\mathbf{EOF}\}\) and \(\follow(B) = \{a, b,
+      c\}\).
+    \end{enumerate}
+  \end{solution}
+\end{exercise}
+
+% Compiler design 3.11
+\begin{exercise}{}
+  
+  Given the following grammar for arithmetic expressions:
+
+  \begin{align*}
+    S &::= Exp~\mathbf{EOF} \\
+    Exp &::= Term~ Add \\
+    Add &::= +~ Term~ Add \\
+    Add &::= -~ Term~ Add \\
+    Add &::= \\
+    Term &::= Factor~ Mul \\
+    Mul &::= *~ Factor~ Mul \\
+    Mul &::= /~ Factor~ Mul \\
+    Mul &::= \\
+    Factor &::= \mathbf{num} \\
+    Factor &::= (Exp) \\
+  \end{align*}
+
+  \begin{enumerate}
+    \item Compute \(\nullable\), \(\first\), \(\follow\) for each of the
+    non-terminals in the grammar.
+    \item Check if the grammar is LL(1). If not, modify the grammar to make it
+    so.
+    \item Build the LL(1) parsing table for the grammar.
+    \item Using your parsing table, parse or attempt to parse (till error) the
+    following strings, assuming that \(\mathbf{num}\) matches any natural
+    number:
+    \begin{enumerate}
+      \item \((3 + 4) * 5 ~\mathbf{EOF}\)
+      \item \(2 + + ~\mathbf{EOF}\)
+      \item \(2 ~\mathbf{EOF}\)
+      \item \(2 * 3 + 4 ~\mathbf{EOF}\)
+      \item \(2 + 3 * 4 ~\mathbf{EOF}\)
+    \end{enumerate}
+  \end{enumerate}
+
+  \begin{solution}
+    \begin{enumerate}
+      \item We can compute the \(\nullable\), \(\first\), and \(\follow\) sets as:
+
+        \begin{enumerate}
+          \item \(\nullable\): 
+          %
+            \begin{align*}
+              \nullable(S) &= false \\
+              \nullable(Exp) &= false \\
+              \nullable(Add) &= true \\
+              \nullable(Term) &= false \\
+              \nullable(Mul) &= true \\
+              \nullable(Factor) &= false
+            \end{align*}
+
+          \item \(\first\): we have constraints:
+            %
+            \begin{align*}
+              \first(S) &= \first(Exp) \\
+              \first(Exp) &= \first(Term) \\
+              \first(Add) &= \{+\} \cup \{-\} \cup \emptyset \\
+              \first(Term) &= \first(Factor) \\
+              \first(Mul) &= \{*\} \cup \{/\} \cup \emptyset \\
+              \first(Factor) &= \{\mathbf{num}\} \cup \{(\}
+            \end{align*}
+            %
+            which can be solved to get:
+            %
+            \begin{align*}
+              \first(S) &= \{\mathbf{num}, (\} \\
+              \first(Exp) &= \{\mathbf{num}, (\} \\
+              \first(Add) &= \{+, -\} \\
+              \first(Term) &= \{\mathbf{num}, (\} \\
+              \first(Mul) &= \{*, /\} \\
+              \first(Factor) &= \{\mathbf{num}, (\}
+            \end{align*}
+          \item \(\follow\): we have constraints (for each rule, except
+          empty/terminal rules):
+          \begin{multicols}{2}
+            \allowdisplaybreaks
+            \begin{align*}
+              \{\mathbf{EOF}\} &\subseteq \follow(Exp) \\
+              &\\
+              \first(Add) &\subseteq \follow(Term) \\
+              \follow(Exp) &\subseteq \follow(Term) \\
+              \follow(Exp) &\subseteq \follow(Add) \\
+              &\\
+              \first(Add) &\subseteq \follow(Term) \\
+              \follow(Add) &\subseteq \follow(Term) \\
+              &\\
+              \first(Add) &\subseteq \follow(Term) \\
+              \follow(Add) &\subseteq \follow(Term) \\
+              &\\
+              \first(Mul) &\subseteq \follow(Factor) \\
+              \follow(Term) &\subseteq \follow(Factor) \\
+              \follow(Term) &\subseteq \follow(Mul) \\
+              &\\
+              \first(Mul) &\subseteq \follow(Factor) \\
+              \follow(Mul) &\subseteq \follow(Factor) \\
+              &\\
+              \first(Mul) &\subseteq \follow(Factor) \\
+              \follow(Mul) &\subseteq \follow(Factor) \\
+              &\\
+              \{)\} &\subseteq \follow(Exp) \\
+            \end{align*}
+          \end{multicols}
+
+          The fixpoint can again be computed to get:
+          \begin{align*}
+            \follow(S) &= \{\} \\
+            \follow(Exp) &= \{), \mathbf{EOF}\} \\
+            \follow(Add) &= \{), \mathbf{EOF}\} \\
+            \follow(Term) &= \{+, -, ), \mathbf{EOF}\} \\
+            \follow(Mul) &= \{+, -, ), \mathbf{EOF}\} \\
+            \follow(Factor) &= \{+, -, *, /, ), \mathbf{EOF}\}
+          \end{align*}
+
+        \end{enumerate}
+      \item The grammar is LL(1), there are no conflicts. Demonstrated by the
+      parsing table below.
+      \item LL(1) parsing table:
+        \begin{center}
+          \begin{tabular}{c|c|c|c|c|c|c|c|c}
+            & \(\mathbf{num}\) & \(+\) & \(-\) & \(*\) & \(/\) & \((\) & \()\) & \(\mathbf{EOF}\) \\
+            \hline
+            \(S\) & 1 & & & & & 1 & &\\
+            \(Exp\) & 1 & & & & & 1 & &\\
+            \(Add\) & & 1 & 2 & & & & 3 & 3 \\
+            \(Term\) & 1 & & & & & 1 & & \\
+            \(Mul\) & & 3 & 3 & 1 & 2 & & 3 & 3 \\
+            \(Factor\) & 1 & & & & & 2 & & \\
+          \end{tabular}
+        \end{center}
+      \item Parsing the strings:
+      \begin{enumerate}
+        \item \((3 + 4) * 5 ~\mathbf{EOF}\) \checkmark
+        \item \(2 + + ~\mathbf{EOF}\) --- fails on the second \(+\). The
+        corresponding error cell in the parsing table is \((Term, +)\).
+        \item \(2 ~\mathbf{EOF}\) \checkmark
+        \item \(2 * 3 + 4 ~\mathbf{EOF}\) \checkmark
+        \item \(2 + 3 * 4 ~\mathbf{EOF}\) \checkmark
+      \end{enumerate}
+
+      Example step-by-step LL(1) parsing state for \(2 * 3 + 4\):
+      \begin{center}
+        \begin{tabular}{c c c}
+          Lookahead & Stack & Next Rule \\
+          \hline
+          \(2\) & \(S\) & \(S ::= Exp ~\mathbf{EOF}\)\\
+          \(2\) & \(Exp ~ \mathbf{EOF}\) & \(Exp ::= Term~Add\)\\
+          \(2\) & \(Term ~ Add ~ \mathbf{EOF}\) & \(Term ::= Factor~Mul\)\\
+          \(2\) & \(Factor ~ Mul ~ Add ~ \mathbf{EOF}\) & \(Factor ::= \mathbf{num}\)\\
+          \(2\) & \(\mathbf{num} ~ Mul ~ Add ~ \mathbf{EOF}\) & \(match(\mathbf{num})\)\\
+          \(*\) & \(Mul ~ Add ~ \mathbf{EOF}\) & \(Mul ::= *~Factor~Mul\)\\
+          \(*\) & \(* ~Factor ~ Mul ~ Add ~ \mathbf{EOF}\) & \(match(*)\)\\
+          \(3\) & \(Factor ~ Mul ~ Add ~ \mathbf{EOF}\) & \(Factor ::= \mathbf{num}\)\\
+          \(3\) & \(\mathbf{num} ~ Mul ~ Add ~ \mathbf{EOF}\) & \(match(\mathbf{num})\)\\
+          \(+\) & \(Mul ~ Add ~ \mathbf{EOF}\) & \(Mul ::=\)\\
+          \(+\) & \(Add ~ \mathbf{EOF}\) & \(Add ::= +~Term~Add\)\\
+          \(+\) & \(+ ~Term ~Add ~ \mathbf{EOF}\) & \(match(+)\)\\
+          \(4\) & \(Term ~Add ~ \mathbf{EOF}\) & \(Term ::= Factor~Term*\)\\
+          \(4\) & \(Factor ~Mul ~Add ~ \mathbf{EOF}\) & \(Factor ::= \mathbf{num}\)\\
+          \(4\) & \(\mathbf{num} ~Mul ~Add ~ \mathbf{EOF}\) & \(match(\mathbf{num})\)\\
+          \(\mathbf{EOF}\) & \(Mul ~Add ~ \mathbf{EOF}\) & \(Mul ::= \)\\
+          \(\mathbf{EOF}\) & \(Add ~ \mathbf{EOF}\) & \(Add ::= \)\\
+          \(\mathbf{EOF}\) & \(\mathbf{EOF}\) & \(match(\mathbf{EOF})\)\\
+        \end{tabular}
+      \end{center}
+    \end{enumerate}
+  \end{solution}
+
+\end{exercise}
+
--- a/info/exercises/src/ex-03/ex/prefix.tex
+++ b/info/exercises/src/ex-03/ex/prefix.tex
+
+\begin{exercise}{}
+
+  If \(L\) is a regular language, then the set of prefixes of words in \(L\) is
+  also a regular language. Given this fact, from a regular expression for \(L\),
+  we should be able to obtain a regular expression for the set of all prefixes
+  of words in \(L\) as well.
+  
+  We want to do this with a function \(\prefixes\) that is recursive over the
+  structure of the regular expression for \(L\), i.e. of the form:
+  %
+  \begin{align*}
+    \prefixes(\epsilon) &= \epsilon \\
+    \prefixes(a) &= a \mid \epsilon \\
+    \prefixes(r \mid s) &= \prefixes(r) \mid \prefixes(s) \\
+    \prefixes(r \cdot s) &= \ldots \\
+    \prefixes(r^*) &= \ldots \\
+    \prefixes(r^+) &= \ldots
+  \end{align*}
+
+  \begin{enumerate}
+    \item Complete the definition of \(\prefixes\) above by filling in the
+    missing cases.
+    \item Use this definition to find:
+    \begin{enumerate}
+      \item \(\prefixes(ab^*c)\)
+      \item \(\prefixes((a \mid bc)^*)\)
+    \end{enumerate}
+  \end{enumerate}
+
+  \begin{solution}
+    The computation for \(\prefixes(\cdot)\) is similar to the computation of
+    \(\first(\cdot)\) for grammars.
+
+    \begin{enumerate}
+      \item The missing cases:
+      \begin{enumerate}
+        \item \(\prefixes(r \cdot s) = \prefixes(r) \mid r \cdot \prefixes(s)\).
+        Either we have read \(r\) partially, or we have read all of \(r\), and a
+        part of \(s\).
+        \item \(\prefixes(r^*) = r*\cdot\prefixes(r)\). We can
+        consider \(r^* = \epsilon \mid r \mid rr \mid \ldots\), and apply the
+        rules for union and concatenation. Intuitively, if the word has \(n \ge
+        0\) instances of \(r\), we can read \(m < n\) instances of \(r\), and
+        then a prefix of the next instance of \(r\).
+        \item \(\prefixes(r^+) = r^* \cdot \prefixes(r)\). Same as
+        previous. Why does the empty case still appear?
+      \end{enumerate}
+      \item The prefix computations are:
+      \begin{enumerate}
+        \item \(\prefixes(ab^*c) = \epsilon \mid a \mid ab^*(b \mid c \mid \epsilon)\). Computation:
+        \begin{align*}
+          \prefixes(ab^*c) &= \prefixes(a) \mid a\cdot\prefixes(b^*c) & [\text{concatenation}]\\
+                           &= (a \mid \epsilon) \mid a\cdot\prefixes(b^*c) &[a]\\
+                           &= (a \mid \epsilon) \mid a\cdot(\prefixes(b^*) \mid b^*\prefixes(c)) &[\text{concatenation}]\\
+                           &= (a \mid \epsilon) \mid a\cdot(\prefixes(b^*) \mid b^*(c \mid \epsilon)) &[c]\\
+                           &= (a \mid \epsilon) \mid a\cdot(b^*\prefixes(b) \mid b^*(c \mid \epsilon)) &[\text{star}]\\
+                           &= (a \mid \epsilon) \mid a\cdot(b^*(b \mid \epsilon) \mid b^*(c \mid \epsilon)) &[b]\\
+                           &= (a \mid \epsilon) \mid a\cdot(b^*(b \mid c \mid \epsilon)) &[\text{rewrite}]\\
+                           &= \epsilon \mid a \mid a\cdot(b^*(b \mid c \mid \epsilon)) & [\text{rewrite}]\\
+        \end{align*}
+        \item \(\prefixes((a \mid bc)^*) = (a \mid bc)^*(\epsilon \mid a \mid b \mid bc)\).
+      \end{enumerate}
+    \end{enumerate}
+  \end{solution}
+  
+\end{exercise}
--- a/info/exercises/src/ex-03/ex/table.tex
+++ b/info/exercises/src/ex-03/ex/table.tex
+
+% this language is not LL 1 actually, I think
+
+% \begin{exercise}{}
+%   Consider the following grammar of \(\mathbf{if}-\mathbf{then}-\mathbf{else}\) expressions with assignments:
+%   %
+%   \begin{align*}
+%     stmt &::= \mathbf{if} ~id = id~ \mathbf{then} ~stmt ~optStmt \\
+%          &::= \{ stmt^* \} \\
+%          &::= id = id; \\
+%     optStmt &::= \epsilon \mid \mathbf{else} ~stmt \\
+%   \end{align*}
+  
+%   \begin{enumerate}
+%     \item Show that the grammar is ambiguous.
+%     \item Is the grammar LL(1)?
+%   \end{enumerate}
+
+% \end{exercise}
+
+
+\begin{exercise}{}
+  Argue that the following grammar is \emph{not} LL(1). Produce an equivalent
+  LL(1) grammar.
+
+  \begin{equation*}
+    E ::= \mathbf{num} + E \mid \mathbf{num} - E \mid \mathbf{num}
+  \end{equation*}
+
+  \begin{solution}
+    The language is clearly not LL(1), as on seeing a token \(\mathbf{num}\), we
+    cannot decide whether to continue parsing it as \(\mathbf{num} + E\),
+    \(\mathbf{num} - E\), or the end. 
+
+    The notable problem is the common prefix between the rules. We can separate
+    this out by introducing a new non-terminal \(T\). This is a transformation
+    known as \emph{left factorization}.
+
+    \begin{align*}
+      E &::= \mathbf{num} ~T \\
+      T &::= + E \mid - E \mid \epsilon
+    \end{align*}
+
+    % without changing the terms or the overall "structure" of the grammar, we
+    % have logically partitioned it to fit within our parsing schema.
+
+  \end{solution}
+\end{exercise}
+
+\begin{exercise}{}
+  Consider the following grammar:
+
+  \begin{equation*}
+    S ::= S(S) \mid S[S] \mid () \mid [\;]
+  \end{equation*}
+    
+  Check whether the same transformation as the previous case can be applied to
+  produce an LL(1) grammar. If not, argue why, and suggest a different
+  transformation.
+
+  \begin{solution}
+    Applying left factorization to the grammar, we get:
+    \begin{align*}
+      S &::= S ~T \mid S ~T \mid () \mid [\;] \\
+      T &::= (S) \mid [S]
+    \end{align*}
+
+    This is not LL(1), as on reading a token ``\((\)'', we cannot decide whether
+    this is the final parentheses (base case) in the expression, or whether
+    there is a \(T\) following it.
+
+    The problem is that this version of the grammar is left-recursive. A
+    recursive-descent parser for this grammar would loop forever on the first
+    rule. This is caused by the fact that our parsers are top-down, left to
+    right. We can fix this by \emph{moving} the recursion to the right. This is
+    generally called \emph{left recursion elimination}.
+
+    Transformed grammar steps (explanation below):
+    \begin{align*}
+      S &::= ()S' \mid [\;]S' \\
+      S' &::= (S)S' \mid [S]S' \mid \epsilon
+    \end{align*}
+
+    To eliminate left-recursion in general, consider a non-terminal \(A ::=
+    A\alpha \mid \beta\), where \(\beta\) does not start with \(A\) (not
+    left-recursive). We can remove the left recursion by introducing a new
+    non-terminal, \(A'\), such that:
+    \begin{align*}
+      A &::= \beta A' \\
+      A' &::= \alpha A' \mid \epsilon
+    \end{align*}
+    i.e., for the left-recursive rule \(A\alpha\), we instead attempt to parse
+    an \(\alpha\) followed by the rest. In exchange, the base case \(\beta\) now
+    expects an \(A'\) to follow it.
+    %
+    Note that \(\beta\) can be empty as well.
+    
+    Intuitively, we are shifting the direction in which we look for instances of
+    \(A\). Consider a partial derivation starting from \(\beta \alpha \alpha
+    \alpha\). The original version of the grammar would complete the parsing as:
+    \begin{center}
+      \begin{forest}
+        [\(A\)
+          [\(A\)
+            [\(A\)
+              [\(A\)
+                [\(\beta\)]
+              ]
+              [\(\alpha\)]
+            ]
+            [\(\alpha\)]
+          ]
+          [\(\alpha\)]
+        ]     
+      \end{forest}
+    \end{center}
+    but with the new grammar, we parse it as:
+    \begin{center}
+      \begin{forest}
+        [\(A\)
+          [\(\beta\)]
+          [\(A'\)
+            [\(\alpha\)]
+            [\(A'\)
+              [\(\alpha\)]
+              [\(A'\)
+                [\(\alpha\)]
+                [\(A'\)
+                  [\(\epsilon\)]
+                ]
+              ]
+            ]
+          ]
+        ]
+      \end{forest}
+    \end{center}
+
+    There are two main pitfalls to remember with left-recursion elimination:
+    \begin{enumerate}
+      \item it may need to be applied several times till the grammar is
+      unchanged, as the first transformation may introduce new (indirect)
+      recursive rules (check \(A ::= AA\alpha \mid \epsilon\)).
+      \item it may require \emph{inlining} some non-terminals, when the left
+      recursion is \emph{indirect}. For example, consider \(A ::= B\alpha, B ::=
+      A\beta\), where there is no immediate reduction to do, but inlining \(B\),
+      we get \(A ::= A\beta\alpha\), where the elimination can be applied.
+    \end{enumerate}
+  \end{solution}
+
+\end{exercise}
--- a/info/exercises/src/ex-03/main.tex
+++ b/info/exercises/src/ex-03/main.tex
+\documentclass[a4paper]{article}
+
+\input{../macro}
+
+\ifdefined\ANSWERS
+  \if\ANSWERS1
+    \printanswers
+  \fi
+\fi
+
+\DeclareMathOperator{\prefixes}{prefixes}
+\DeclareMathOperator{\first}{first}
+\DeclareMathOperator{\nullable}{nullable}
+\DeclareMathOperator{\follow}{follow}
+
+\title{CS 320 \\ Computer Language Processing\\Exercise Set 3}
+\author{}
+\date{March 19, 2025}
+
+\begin{document}
+\maketitle
+
+% prefixes of regular expressions
+\input{ex/prefix}
+
+% compute nullable follow first for CFGs
+\input{ex/compute}
+
+% build ll1 parsing table, parse or attempt to parse some strings
+\input{ex/table}
+
+\end{document}
--- a/info/exercises/src/ex-04/ex/grammar.tex
+++ b/info/exercises/src/ex-04/ex/grammar.tex
+
+\begin{exercise}{}
+  For each of the following pairs of grammars, show that they are equivalent by
+  identifying them with inductive relations, and proving that the inductive
+  relations contain the same elements.
+
+  \begin{enumerate}
+    \item 
+    \(A_1 : S ::= S + S \mid \num \) \\ 
+    \(A_2 : R ::= \num ~R' \text{ and } R' ::= + R~ R' \mid \epsilon\)
+    \item 
+    \(B_1 : S ::= S(S)S \mid \epsilon \) \\ 
+    \(B_2 : R ::= RR \mid (R) \epsilon\)
+  \end{enumerate}
+
+  \begin{solution}
+    \begin{enumerate}
+      \item \(A_2\) is the result of left-recursion elimination on \(A_1\).
+      First, expressing them as inductive relations, with rules named as on the
+      right:
+      %
+      \addtolength{\jot}{1ex}
+      \begin{gather*}
+        \AxiomC{\phantom{\(w_1 \in S\)}}
+        \RightLabel{\(S_{num}\)}
+        \UnaryInfC{\(\num \in S\)}
+        \DisplayProof 
+        \quad
+        \AxiomC{\(w_1 \in S\)}
+        \AxiomC{\(w_2 \in S\)}
+        \RightLabel{\(S_+\)}
+        \BinaryInfC{\(w_1 + w_2 \in S\)}
+        \DisplayProof \\
+        %
+        \AxiomC{\(w \in S\)}
+        \RightLabel{\(A_{1}^{start}\)}
+        \UnaryInfC{\(w \in A_1\)}
+        \DisplayProof \\
+        %
+        \AxiomC{\(w \in R'\)}
+        \RightLabel{\(R_{num}\)}
+        \UnaryInfC{\(\num~ w\in R\)}
+        \DisplayProof \\
+        %
+        \AxiomC{\(w \in R\)}
+        \AxiomC{\(w' \in R'\)}
+        \RightLabel{\(R'_{+}\)}
+        \BinaryInfC{\(+w ~ w' \in R'\)}
+        \DisplayProof 
+        \quad
+        \AxiomC{\phantom{\(w' \in R'\)}}
+        \RightLabel{\(R'_{\epsilon}\)}
+        \UnaryInfC{\(\epsilon \in R'\)}
+        \DisplayProof \\
+        %
+        \AxiomC{\(w \in R\)}
+        \RightLabel{\(A_{2}^{start}\)}
+        \UnaryInfC{\(w \in A_2\)}
+        \DisplayProof
+      \end{gather*}
+
+      We must show that for any word \(w\), \(w \in A_1\) if and only if \(w \in
+      A_2\). For this, it must be the case that there is a derivation tree for
+      \(w \in A_1\) (equivalently, \(w \in S\)) if and only if there is a
+      derivation tree for \(w \in A_2\) (equivalently, \(w \in R\)) according to
+      the inference rules above.
+      \begin{enumerate}
+        \item \(w \in S \implies w \in R\): we induct on the depth of the
+        derivation tree. 
+        \begin{itemize}
+          \item Base case: derivation tree of depth 1. The tree must be
+          \begin{gather*}
+            \AxiomC{}
+            \RightLabel{\(S_{num}\)}
+            \UnaryInfC{\(\num \in S\)}
+            \DisplayProof
+          \end{gather*}
+          We can show that there is a corresponding derivation tree for \(w \in R\):
+          \begin{gather*}
+            \AxiomC{}
+            \RightLabel{\(R'_{\epsilon}\)}
+            \UnaryInfC{\(\epsilon \in R'\)}
+            \RightLabel{\(R_{num}\)}
+            \UnaryInfC{\(\num \in R\)}
+            \DisplayProof
+          \end{gather*}
+          \item Inductive case: derivation tree of depth \(n+1\), given that for
+          every derivation of depth \( \le n\) of \(w' \in S\) for any \(w'\), there
+          is a corresponding derivation of \(w' \in R\). The last rule applied
+          in the derivation must be \(S_+\):
+          \begin{gather*}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_1 \in S\)}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_2 \in S\)}
+            \RightLabel{\(S_+\)}
+            \BinaryInfC{\(w_1 + w_2 \in S\)}
+            \DisplayProof
+          \end{gather*}
+          By the inductive hypothesis, since \(w_1 \in S\) and \(w_2 \in S\)
+          have a derivation tree of smaller depth, there are derivation trees
+          for \(w_1 \in R\) and \(w_2 \in R\). In particular, the derivation for
+          \(w_1 \in R\) must end with the rule \(R_{num}\) (only case), so there
+          must be a derivation tree for \(\num ~w_1' \in R\) with \(w_1' \in
+          R'\) and \(num~w_1' = w_1\). We have the following pieces:
+          \begin{gather*}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_1' \in R'\)}
+            \RightLabel{\(R_{num}\)}
+            \UnaryInfC{\(\num ~w_1' \in R\)}
+            \DisplayProof
+            \quad
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_2 \in R\)}
+            \DisplayProof
+          \end{gather*}
+          To show that \(w_1 + w_2 \in R\), i.e. \(\num ~w_1' + w_2 \in R\), we
+          must first show that \(w_1' + w_2 \in R'\), as required by the rule
+          \(R_{num}\). Note that words in \(R'\) are of the form \((+ \num)^*\).
+          We will prove this separately for all pairs of words at the end
+          (\(R'_{Lemma}\)). Knowing this, however, we can construct the
+          derivation tree for \(w_1 + w_2 \in R\):
+          \begin{gather*}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_1' \in R'\)}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_2 \in R\)}
+            \RightLabel{\(R'_{Lemma}\)}
+            \BinaryInfC{\(w_1' + w_2 \in R'\)}
+            \RightLabel{\(R_{num}\)}
+            \UnaryInfC{\(\num ~w_1' + w_2 \in R\)}
+            \DisplayProof
+          \end{gather*}
+          \(\num ~w_1' + w_2 = w_1 + w_2 = w\), as required.
+
+          Finally, we will show the required lemma. We will prove a stronger
+          property \(R'_{concat}\) first, that for any pair of words \(w_1, w_2
+          \in R'\), \(w_1 ~w_2 \in R'\) as well. We induct on the derivation of
+          \(w_1 \in R'\).
+          
+          Base case: derivation ends with \(R'_\epsilon\). Then \(w_1 =
+          \epsilon\), and \(w_1 ~w_2 = w_2 \in R'\) by assumption. 
+          
+          Inductive case: derivation ends with \(R'_+\). Then \(w_1 = + v v'\)
+          for some \(v \in R\) and \(v' \in R'\):
+          \begin{gather*}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(v \in R\)}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(v' \in R'\)}
+            \RightLabel{\(R'_+\)}
+            \BinaryInfC{\(+ v ~v' \in R'\)}
+            \DisplayProof
+          \end{gather*}
+
+          Since \(v' \in R'\) has a smaller derivation tree than \(w_1\), by the
+          inductive hypothesis, we can prove that \(v'~w_2 \in R'\). We get:
+          \begin{gather*}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(v \in R\)}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(v' \in R'\)}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_2 \in R'\)}
+            \RightLabel{\(R'_{concat}\)}
+            \BinaryInfC{\(v' ~w_2 \in R'\)}
+            \RightLabel{\(R'_+\)}
+            \BinaryInfC{\(+ v ~v' ~w_2 \in R'\)}
+            \DisplayProof
+          \end{gather*}
+
+          So, \(R'_{concat}\) is proven. We can show \(R'_{lemma}\), i.e. \(w_1'
+          + w_2 \in R'\) if \(w_1' \in R'\) and \(w_2 \in R\) as:
+          \begin{gather*}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_1' \in R'\)}
+            \AxiomC{\ldots}
+            \UnaryInfC{\(w_2 \in R\)}
+            \AxiomC{}
+            \RightLabel{\(R'_\epsilon\)}
+            \UnaryInfC{\(\epsilon \in R'\)}
+            \RightLabel{\(R'_+\)}
+            \BinaryInfC{\(+ w_2 \in R'\)}
+            \RightLabel{\(R'_{concat}\)}
+            \BinaryInfC{\(w_1' + w_2 \in R'\)}
+            \DisplayProof
+          \end{gather*}
+
+          Thus, the proof is complete.
+      \end{itemize}
+      \item \(w \in R \implies w \in S\): we induct on the depth of the
+      derivation tree for \(w \in R\). This direction is simpler than the other,
+      but the general method is similar.
+      
+      \begin{itemize}
+        \item Base case: derivation tree of depth 2 (minimum). The tree must be
+        \begin{gather*}
+          \AxiomC{}
+          \RightLabel{\(R'_{\epsilon}\)}
+          \UnaryInfC{\(\epsilon \in R'\)}
+          \RightLabel{\(R_{num}\)}
+          \UnaryInfC{\(\num \in R\)}
+          \DisplayProof
+        \end{gather*}
+  
+        We have the corresponding derivation tree for \(w \in S\):
+        \begin{gather*}
+          \AxiomC{}
+          \RightLabel{\(S_{num}\)}
+          \UnaryInfC{\(\num \in S\)}
+          \DisplayProof
+        \end{gather*}
+  
+        \item Inductive case: derivation tree of depth \(n+1\), given that for
+        every derivation of depth \(\le n\) of \(w' \in R\) for any \(w'\),
+        there is a corresponding derivation of \(w' \in S\). The last rules
+        applied must be \(R_{num}\) and \(R'_{+}\) (otherwise the derivation
+        would be of the base case):
+        \begin{gather*}
+          \AxiomC{\ldots}
+          \UnaryInfC{\(w_1 \in R\)}
+          \AxiomC{\ldots}
+          \UnaryInfC{\(w_2 \in R'\)}
+          \RightLabel{\(R'_{+}\)}
+          \BinaryInfC{\(+ w_1 ~w_2 \in R'\)}
+          \RightLabel{\(R_{num}\)}
+          \UnaryInfC{\(\num + w_1~ w_2 \in R\)}
+          \DisplayProof
+        \end{gather*}
+        %
+        where \(w = \num + w_1 ~w_2\). However, we are somewhat stuck here, as
+        we have no way to relate \(R'\) and \(S\). We will separately show that
+        if \(+w' \in R'\), then there is a derivation of \(w' in S\) (lemma
+        \(R'_{S}\)). This will allow us to complete the proof:
+        \begin{gather*}
+          \AxiomC{}
+          \RightLabel{\(S_{num}\)}
+          \UnaryInfC{\(\num \in S\)}
+          \AxiomC{\ldots}
+          \UnaryInfC{\(+w_1 ~w_2 \in R'\)}
+          \RightLabel{\(R'_{S}\)}
+          \UnaryInfC{\(w_1 ~w_2 \in S\)}
+          \RightLabel{\(S_{+}\)}
+          \BinaryInfC{\(\num + w_1 ~w_2 \in S\)}
+          \DisplayProof
+        \end{gather*}
+
+        The proof of the lemma \(R'_S\) is by induction again, and not shown
+        here. This completes the original proof.
+      \end{itemize}
+        
+      \end{enumerate}
+      \item Argument similar to Exercise Set 2 Problem 4 (same pair of
+      grammars). \(B_1 \subset B_2\) as relations can be seen by producing a
+      derivation tree for each possible case in \(B_1\). For the other
+      direction, \(B_2 \subseteq B_1\), it is first convenient to prove
+      that \(B_1\) is closed under concatenation, i.e., if \(w_1, w_2 \in B_1\)
+      then there is a derivation tree for \(w_1 ~ w_2 \in B_1\).
+    \end{enumerate}
+  \end{solution}
+  
+\end{exercise}
+
No results found