diff --git a/info/exercises/Makefile b/info/exercises/Makefile index 47f52641ac6fe074292f38a574019220aa4e2a2f..f36de7314ef655dab4176376e2938509d607812f 100644 --- a/info/exercises/Makefile +++ b/info/exercises/Makefile @@ -7,20 +7,22 @@ DIRS := $(wildcard src/ex-??) EXPDFS := $(patsubst src/ex-%,ex-%.pdf,$(DIRS)) SOLPDFS := $(patsubst src/ex-%,ex-%-sol.pdf,$(DIRS)) +TEXARGS := -shell-escape -interaction=batchmode + all: $(EXPDFS) $(SOLPDFS) ex-%.pdf: src/ex-%/main.tex cd src/ex-$* && \ - lualatex -jobname=ex-$* "\def\ANSWERS{0}\input{main.tex}" && \ + lualatex $(TEXARGS) -jobname=ex-$* "\def\ANSWERS{0}\input{main.tex}" && \ cp ex-$*.pdf $(OUT_DIR)/ex-$*.pdf ex-%-sol.pdf: src/ex-%/main.tex cd src/ex-$* && \ - lualatex -jobname=ex-$*-sol "\def\ANSWERS{1}\input{main.tex}" && \ + lualatex $(TEXARGS) -jobname=ex-$*-sol "\def\ANSWERS{1}\input{main.tex}" && \ cp ex-$*-sol.pdf $(OUT_DIR)/ex-$*-sol.pdf clean: rm -f $(EXPDFS) $(SOLPDFS) for d in $(DIRS); do \ - cd $$d && rm -f *.aux *.log *.out main.pdf; \ + pushd $$d && rm -f *.aux *.log *.out main.pdf; popd; \ done diff --git a/info/exercises/ex-01-sol.pdf b/info/exercises/ex-01-sol.pdf index 3c674aa25cd281494275c9d400f1dc0124618807..93662d1e647e8072f1ab46dc7070f31f9464331d 100644 Binary files a/info/exercises/ex-01-sol.pdf and b/info/exercises/ex-01-sol.pdf differ diff --git a/info/exercises/ex-01.pdf b/info/exercises/ex-01.pdf index eadbca2227fad05f0517a7f16a08e757988bcbb5..bea9f40a05dfe49f34e1e0d7852628c1e738789d 100644 Binary files a/info/exercises/ex-01.pdf and b/info/exercises/ex-01.pdf differ diff --git a/info/exercises/ex-02-sol.pdf b/info/exercises/ex-02-sol.pdf new file mode 100644 index 0000000000000000000000000000000000000000..addf36672f1968cb314e7f400a22ce842087b1d8 Binary files /dev/null and b/info/exercises/ex-02-sol.pdf differ diff --git a/info/exercises/ex-02.pdf b/info/exercises/ex-02.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6ce4b0365c1cdfab35c32af5518286f4f58fdb77 Binary files /dev/null and b/info/exercises/ex-02.pdf differ diff --git a/info/exercises/src/ex-02/ex/cfg.tex b/info/exercises/src/ex-02/ex/cfg.tex new file mode 100644 index 0000000000000000000000000000000000000000..d4b33a341fde72140a8346a95df5e1b08db6a521 --- /dev/null +++ b/info/exercises/src/ex-02/ex/cfg.tex @@ -0,0 +1,343 @@ + +\begin{exercise}{} + + For each of the following languages, give a context-free grammar that + generates it: + + \begin{enumerate} + \item \(L_1 = \{a^nb^m \mid n, m \in \naturals \land n > 0 \land m > n\}\) + \item \(L_2 = \{a^nb^mc^{n+m} \mid n, m \in \naturals\}\) + \item \(L_3 = \{w \in \{a, b\}^* \mid \exists m \in \naturals.\; |w| = 2m + + 1 \land w_{(m+1)} = a \}\) (\(w\) is of odd length, has \(a\) in the middle) + \end{enumerate} + + \begin{solution} + \begin{enumerate} + \item \(L_1 = \{a^nb^m \mid n, m \in \naturals \land n > 0 \land m > n\}\) + \begin{align*} + S &::= aSb \mid B\\ + B &::= bB \mid \epsilon + \end{align*} + \item \(L_2 = \{a^nb^mc^{n+m} \mid n, m \in \naturals\}\) + \begin{align*} + S &::= aSc \mid B\\ + B &::= bBc \mid \epsilon + \end{align*} + + A small tweak to \(L_1\)'s grammar allows us to keep track of addition + precisely here. Could we do something similar for \(\{a^nb^nc^n \mid n \in + \naturals\}\)? (open-ended discussion) + + \item \(L_3 = \{w \in \{a, b\}^* \mid \exists m \in \naturals.\; |w| = 2m + + 1 \land w_{(m+1)} = a \}\) + \begin{align*} + S &::= aSb \mid bSa \mid aSa \mid bSb \mid a + \end{align*} + + Note that after each recursive step, the length of the inner string has + the same parity (i.e. odd). + \end{enumerate} + \end{solution} + +\end{exercise} + +\begin{exercise}{} + + Consider the following context-free grammar \(G\): + + \begin{align*} + A &::= -A \\ + A &::= A - \textit{id} \\ + A &::= \textit{id} \\ + \end{align*} + + \begin{enumerate} + \item Show that \(G\) is ambiguous, i.e., there is a string that has two + different possible parse trees with respect to \(G\). + \item Make two different unambiguous grammars recognizing the same words, + \(G_p\), where prefix-minus binds more tightly, and \(G_i\), where + infix-minus binds more tightly. + \item Show the parse trees for the string you produced in (1) with respect + to \(G_p\) and \(G_i\). + \item Produce a regular expression that recognizes the same language as + \(G\). + \end{enumerate} + + \begin{solution} + \begin{enumerate} + \item An example string is \(- \textit{id} - \textit{id}\). It can be + parsed as either \(-(\textit{id} - \textit{id})\) or \((- \textit{id}) - + \textit{id}\). The corresponding parse trees are: + + \begin{center} + \begin{forest} + [\(A\) + [\(A\) + [\(-\)] + [\(\textit{id}\)] + ] + [\(-\)] + [\(\textit{id}\)] + ] + \end{forest} + \hspace{10ex} + \begin{forest} + [\(A\) + [\(-\)] + [\(A\) + [\(A\) + [\(\textit{id}\)] + ] + [\(-\)] + [\(\textit{id}\)] + ] + ] + \end{forest} + \end{center} + + Left: prefix binds tighter, right: infix binds tighter. + + \item \(G_p\): + \begin{align*} + A &::= B \mid A - \textit{id} \\ + B &::= -B \mid \textit{id} + \end{align*} + + \(G_i\): + \begin{align*} + A &::= C \mid -A \\ + C &::= \textit{id} \mid C - \textit{id} + \end{align*} + + \item Parse trees for \(- \textit{id} - \textit{id}\) with respect to \(G_p\) (left) + and \(G_i\) (right): + + \begin{center} + \begin{forest} + [\(A\) + [\(A\) + [\(B\) + [\(-\)] + [\(B\) + [\(\textit{id}\)] + ] + ] + ] + [\(-\)] + [\(\textit{id}\)] + ] + \end{forest} + \hspace{10ex} + \begin{forest} + [\(A\) + [\(-\)] + [\(A\) + [\(C\) + [\(\textit{id}\)] + ] + [\(-\)] + [\(\textit{id}\)] + ] + ] + \end{forest} + \end{center} + + \item \(L(G) = L(-^*\textit{id} (-\textit{id})^*)\). Note: \(()\) are part + of the regular expression syntax, not parentheses in the string. + + \end{enumerate} + \end{solution} + +\end{exercise} + + +\begin{exercise}{} + + Consider the two following grammars \(G_1\) and \(G_2\): + + \begin{align*} + G_1: & \\ + S &::= S(S)S \mid \epsilon \\ + G_2: & \\ + R &::= RR \mid (R) \mid \epsilon + \end{align*} + + \noindent + Prove that: + \begin{enumerate} + \item \(L(G_1) \subseteq L(G_2)\), by showing that for every parse tree in + \(G_1\), there exists a parse tree yielding the same word in \(G_2\). + \item (Bonus) \(L(G_2) \subseteq L(G_1)\), by showing that there exist + equivalent parse trees or derivations. + \end{enumerate} + + \begin{solution} + + \begin{enumerate} + \item \(L(G_1) \subseteq L(G_2)\). + + We give a recursive transformation of parse trees in \(G_1\) producing + parse trees in \(G_2\). + + \begin{enumerate} + \item \textbf{Base case:} The smallest parse tree is the \(\epsilon\) + production, which can be transformed as (left to right): + \begin{center} + \begin{forest} + [\(S\) + [\(\epsilon\)] + ] + \end{forest} + \hspace{8ex} + \begin{forest} + [\(R\) + [\(\epsilon\)] + ] + \end{forest} + \end{center} + \item \textbf{Recursive case:} Rule \(S ::= S(S)S\). The parse tree transformation is: + \begin{center} + \begin{forest} + [\(S\) + [\(S_1\)] + [\((_2\)] + [\(S_3\)] + [\()_4\)] + [\(S_5\)] + ] + \end{forest} + \hspace{10ex} + \begin{forest} + [\(R\) + [\(R_1\)] + [\(R\) + [\(R\) + [\((_2\)] + [\(R_3\)] + [\()_4\)] + ] + [\(R_5\)] + ] + ] + \end{forest} + \end{center} + + The nodes are numbered to check that the order of children (left to + right) does not change. This ensures that the word yielded by the tree + is the same. The transformation is applied recursively to the children + \(S_1, S_3, S_5\) to obtain \(R_1, R_3, R_5\). + + Verify that the tree on the right is indeed a parse tree in \(G_2\). + \end{enumerate} + + \item \(L(G_2) \subseteq L(G_1)\). + + Straightforward induction on parse trees does not work easily. The rule + \(R ::= RR\) in \(G_2\) is not directly expressible in \(G_1\) by a simple + transformation of parse trees. However, we can note that, in fact, adding + this rule to \(G_1\) does not change the language! + + Consider the grammar \(G_1'\) defined by \(S ::= SS \mid S(S)S \mid + \epsilon\). We must show that for every two words \(v\) and \(w\) in + \(L(G_1)\), \(vw\) is in \(L(G_1)\), and so adding the rule \(S ::= SS\) + does not change the language. + + We induct on the length \(|v| + |w|\). + + \begin{enumerate} + \item \textbf{Base case:} \(|v| + |w| = 0\). \(v = w = vw = \epsilon \in + L(G_1)\). QED. + \item \textbf{Inductive case:} \(|v| + |w| = n + 1\). The induction + hypothesis is that for every \(v', w'\) with \(|v'| + |w'| = n\), \(v'w' + \in L(G_1)\). + + From the grammar, we know that either \(v = \epsilon\) or \(v = x(y)z\) + for \(x, y, z \in L(G_1)\). If \(v = \epsilon\), then \(w = vw \in + L(G_1)\). In the second case, \(vw = x(y)zw\). However, \(zw \in + L(G_1)\) by the inductive hypothesis, as \(|z| + |w| < n \). + + Thus, \(vw = x(y)z'\) for \(z' \in L(G_1)\). Finally, since \(x, y, z' + \in L(G_1)\), it follows from the grammar rules that \(vw = x(y)z' \in + L(G_1)\). + \end{enumerate} + + Thus, \(L(G_1) = L(G_1')\). It can now be shown just as in the first part, + that \(L(G_2) \subseteq L(G_1')\). + \end{enumerate} + + \end{solution} + +\end{exercise} + +\begin{exercise}{} + + Consider a context-free grammar \(G = (A, N, S, R)\). Define the reversed + grammar \(rev(G) = (A, N, S, rev(R))\), where \(rev(R)\) is the set of rules + is produced from \(R\) by reversing the right-hand side of each rule, i.e., + for each rule \(n ::= p_1 \ldots p_n\) in \(R\), there is a rule \(n ::= + p_n \ldots p_1\) in \(rev(R)\), and vice versa. The terminals, + non-terminals, and start symbol of the language remain the same. + + For example, \(S ::= abS \mid \epsilon\) becomes \(S ::= Sba \mid \epsilon\). + + Is it the case that for every context-free grammar \(G\) defining a language + \(L\), the language defined by \(rev(G)\) is the same as the language of + reversed strings of \(L\), \(rev(L) = \{rev(w) \mid w \in L\}\)? Give a proof + or a counterexample. + + \begin{solution} + + Consider any word \(w\) in the original language. Looking at the definition + of a language \(L(G)\) defined by a grammar \(G\): + \begin{equation*} + w \in L(G) \iff \exists T.\; w = yield(T) \land isParseTree(G, T) + \end{equation*} + + There must exist a parse tree \(T\) for \(w\) with respect to \(G\). We must + show that there exists a parse tree for \(rev(w)\) with respect to the + reversed grammar \(G_r = rev(G)\) as well. + + We propose that this is precisely the tree \(T_r = mirror(T)\). Thus, we + need to show that \(rev(w) = yield(T_r)\) and that \(isParseTree(G_r, + T_r)\). + + \begin{enumerate} + \item \(rev(w) = yield(T_r)\): \(yield(\cdot)\) of a tree is the word + obtained by reading its leaves from left to right. Thus, the yield of the + mirror of a tree \(yield(mirror(\cdot))\) is the word obtained by reading + the leaves of the original tree from right to left. Thus, \(yield(T_r) = + yield(mirror(T)) = rev(yield(T)) = rev(w)\). + + \item \(isParseTree(G_r, T_r)\): We need to show that \(T_r\) is a parse + tree with respect to \(G_r\). Consider the definition of a parse tree: + \begin{enumerate} + \item The root of \(T_r\) is the start symbol of \(G_r\): the root of + \(T_r = mirror(T)\) is the same as that of \(T\). Since \(T\)'s root + node must be the start symbol of \(G\), it is also the root symbol of + \(T_r\). \(G\) and \(G_r\) share the same start symbol in our + transformation. + \item The leaves are labelled by the elements of \(A\): the mirror + transformation does not alter the set or the label of leaves, only their + order. This property transfers from \(T\) to \(T_r\) as well. + \item Each non-leaf node is labelled by a non-terminal symbol: the + mirror transformation does not alter the label of non-leaf nodes either, + so this property transfers from \(T\) to \(T_r\) as well. + \item If a non-leaf node has children that are labelled \(p_1, \ldots, + p_n\) left-to-right, then there is a rule \((n ::= p_1 \ldots p_n)\) in + the grammar: consider any non-leaf node in \(T_r\), labelled \(n\), with + children labelled left-to-right \(p_1, \ldots, p_n\). By the definition + of \(mirror\), the original tree \(T\) must have the same node labelled + \(n\), with the reversed list of children left-to-right, \(p_n, \ldots, + p_1\). Since \(T\) is a parse tree for \(G\), \(n ::= p_n \ldots p_1\) + is a valid rule in \(G\), and by the reverse transformation, \(n ::= p_1 + \ldots p_n\) must be a rule in \(G_r\). Thus, the property is satisfied. + \end{enumerate} + \end{enumerate} + + Thus, both properties are satisfied. Therefore, the language defined by the + reversed grammar is the reversed language of the original grammar. + + \end{solution} + +\end{exercise} + diff --git a/info/exercises/src/ex-02/ex/pumping.tex b/info/exercises/src/ex-02/ex/pumping.tex new file mode 100644 index 0000000000000000000000000000000000000000..51bdf0636b18404a3ea8cefbad882213d984d036 --- /dev/null +++ b/info/exercises/src/ex-02/ex/pumping.tex @@ -0,0 +1,42 @@ + +\begin{exercise}{} + + Recall the pumping lemma for regular languages: + + For any language \(L \subseteq \Sigma^*\), if \(L\) is regular, there exists a + strictly positive constant \(p \in \naturals\) such that every word \(w \in + L\) with \(|w| \geq p\) can be written as \(w = xyz\) such that: + + \begin{itemize} + \item \(x, y, z \in \Sigma^*\) + \item \(|y| > 0\) + \item \(|xy| \leq p\), and + \item \(\forall i \in \naturals.\; xy^iz \in L\) + \end{itemize} + + Consider the language \(L = \{w \in \{a\}^* \mid |w| \text{ is prime}\}\). + Show that \(L\) is not regular by using the pumping lemma. + + \begin{solution} + \(L = \{w \in \{a\}^* \mid |w| \text{ is prime}\}\) is not a regular + language. + + To the contrary, assume it is regular, and so there exists a constant + \(p\) such that the pumping conditions hold for this language. + + Consider the word \(w = a^{n} \in L\), for some prime \(n \geq p\). By the + pumping lemma, we can write \(w = xyz\) such that \(|y| > 0\), \(|xy| \leq + p\), and \(xy^iz \in L\) for all \(i \geq 0\). + + Assume that \(|xz| = m\) and \(|y| = k\) for some natural numbers \(m\) + and \(k\). Thus, \(|xy^iz| = m + ik\) for all \(i\). Since by the pumping + lemma \(xy^iz \in L\) for every \(i\), it follows that for every \(i\), + the length \(m + ik\) is prime. However, if \(m \not = 0\), then \(m\) + divides \(m + mk\), and if \(m = 0\), then \(m + 2k\) is not prime. In + either case, we have a contradiction. + + Thus, this language is not regular. + + \end{solution} + +\end{exercise} diff --git a/info/exercises/src/ex-02/main.tex b/info/exercises/src/ex-02/main.tex new file mode 100644 index 0000000000000000000000000000000000000000..ad9d334d324b410c8b7457dc9955f63eb29a978b --- /dev/null +++ b/info/exercises/src/ex-02/main.tex @@ -0,0 +1,22 @@ +\documentclass[a4paper]{article} + +\input{../macro} + +\ifdefined\ANSWERS + \if\ANSWERS1 + \printanswers + \fi +\fi + +\title{CS 320 \\ Computer Language Processing\\Exercises: Week 3} +\author{} +\date{March 7, 2025} + +\begin{document} +\maketitle + + \input{ex/pumping} + + \input{ex/cfg} + +\end{document}