Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • shchen/cs320
  • raveendr/cs320
  • mwojnaro/cs320
3 results
Show changes
Showing
with 649 additions and 0 deletions
package amyc
package parsing
import amyc.utils._
import java.io.File
import silex._
import amyc.utils.Position
// The lexer for Amy.
object AmyLexer extends Pipeline[List[File], Iterator[Token]]
with Lexers {
/** Tiny Silex reference:
* ==============================
* Silex's lexer essentially allows you to define a list of regular expressions
* in their order of priority. To tokenize a given input stream of characters, each
* individual regular expression is applied in turn. If a given expression matches, it
* is used to produce a token of maximal length. Whenever a regular expression does not
* match, the expression of next-highest priority is tried.
* The result is a stream of tokens.
*
* Regular expressions `r` can be built using the following operators:
* - `word("abc")` matches the sequence "abc" exactly
* - `r1 | r2` matches either expression `r1` or expression `r2`
* - `r1 ~ r2` matches `r1` followed by `r2`
* - `oneOf("xy")` matches either "x" or "y"
* (i.e., it is a shorthand of `word` and `|` for single characters)
* - `elem(c)` matches character `c`
* - `elem(f)` matches any character for which the boolean predicate `f` holds
* - `opt(r)` matches `r` or nothing at all
* - `many(r)` matches any number of repetitions of `r` (including none at all)
* - `many1(r)` matches any non-zero number of repetitions of `r`
*
* To define the token that should be output for a given expression, one can use
* the `|>` combinator with an expression on the left-hand side and a function
* producing the token on the right. The function is given the sequence of matched
* characters and the source-position range as arguments.
*
* For instance,
*
* `elem(_.isDigit) ~ word("kg") |> {
* (cs, range) => WeightLiteralToken(cs.mkString).setPos(range._1)) }`
*
* will match a single digit followed by the characters "kg" and turn them into a
* "WeightLiteralToken" whose value will be the full string matched (e.g. "1kg").
*/
// Type of characters consumed.
type Character = Char
// Type of positions.
type Position = SourcePosition
// Type of tokens produced.
type Token = parsing.Token
import Tokens._
val lexer = Lexer(
// Keywords,
word("abstract") | word("case") | word("class") |
word("def") | word("else") | word("extends") |
word("if") | word("match") | word("object") |
word("val") | word("error") | word("_") | word("end")
|> { (cs, range) => KeywordToken(cs.mkString).setPos(range._1) },
// Primitive type names,
// TODO
// Boolean literals,
// TODO
// Operators,
// NOTE: You can use `oneof("abc")` as a shortcut for `word("a") | word("b") | word("c")`
// TODO
// Identifiers,
// TODO
// Integer literal,
// NOTE: Make sure to handle invalid (e.g. overflowing) integer values safely by
// emitting an ErrorToken instead.
// TODO
// String literal,
// TODO
// Delimiters,
// TODO
// Whitespace,
// TODO
// Single line comment,
word("//") ~ many(elem(_ != '\n'))
|> { cs => CommentToken(cs.mkString("")) },
// Multiline comments,
// NOTE: Amy does not support nested multi-line comments (e.g. `/* foo /* bar */ */`).
// Make sure that unclosed multi-line comments result in an ErrorToken.
// TODO
) onError {
// We also emit ErrorTokens for Silex-handled errors.
(cs, range) => ErrorToken(cs.mkString).setPos(range._1)
} onEnd {
// Once all the input has been consumed, we emit one EOFToken.
pos => EOFToken().setPos(pos)
}
override def run(ctx: amyc.utils.Context)(files: List[File]): Iterator[Token] = {
var it = Seq[Token]().iterator
for (file <- files) {
val source = Source.fromFile(file.toString, SourcePositioner(file))
it ++= lexer.spawn(source).filter {
token =>
// TODO: Remove all whitespace and comment tokens
???
}.map {
case token@ErrorToken(error) => ctx.reporter.fatal("Unknown token at " + token.position + ": " + error)
case token => token
}
}
it
}
}
/** Extracts all tokens from input and displays them */
object DisplayTokens extends Pipeline[Iterator[Token], Unit] {
override def run(ctx: Context)(tokens: Iterator[Token]): Unit = {
tokens.foreach(println(_))
}
}
package amyc
package parsing
import scala.language.implicitConversions
import amyc.ast.NominalTreeModule._
import amyc.utils._
import Tokens._
import TokenKinds._
import scallion._
// The parser for Amy
object Parser extends Pipeline[Iterator[Token], Program]
with Parsers {
type Token = amyc.parsing.Token
type Kind = amyc.parsing.TokenKind
import Implicits._
override def getKind(token: Token): TokenKind = TokenKind.of(token)
val eof: Syntax[Token] = elem(EOFKind)
def op(string: String): Syntax[Token] = elem(OperatorKind(string))
def kw(string: String): Syntax[Token] = elem(KeywordKind(string))
implicit def delimiter(string: String): Syntax[Token] = elem(DelimiterKind(string))
// An entire program (the starting rule for any Amy file).
lazy val program: Syntax[Program] = many1(many1(module) ~<~ eof).map(ms => Program(ms.flatten.toList).setPos(ms.head.head))
// A module (i.e., a collection of definitions and an initializer expression)
lazy val module: Syntax[ModuleDef] = (kw("object") ~ identifier ~ many(definition) ~ opt(expr) ~ kw("end") ~ identifier).map {
case obj ~ id ~ defs ~ body ~ _ ~ id1 =>
if id == id1 then
ModuleDef(id, defs.toList, body).setPos(obj)
else
throw new AmycFatalError("Begin and end module names do not match: " + id + " and " + id1)
}
// An identifier.
val identifier: Syntax[String] = accept(IdentifierKind) {
case IdentifierToken(name) => name
}
// An identifier along with its position.
val identifierPos: Syntax[(String, Position)] = accept(IdentifierKind) {
case id@IdentifierToken(name) => (name, id.position)
}
// A definition within a module.
lazy val definition: Syntax[ClassOrFunDef] =
???
// A list of parameter definitions.
lazy val parameters: Syntax[List[ParamDef]] = repsep(parameter, ",").map(_.toList)
// A parameter definition, i.e., an identifier along with the expected type.
lazy val parameter: Syntax[ParamDef] =
???
// A type expression.
lazy val typeTree: Syntax[TypeTree] = primitiveType | identifierType
// A built-in type (such as `Int`).
val primitiveType: Syntax[TypeTree] = (accept(PrimTypeKind) {
case tk@PrimTypeToken(name) => TypeTree(name match {
case "Unit" => UnitType
case "Boolean" => BooleanType
case "Int" => IntType
case "String" => StringType
case _ => throw new java.lang.Error("Unexpected primitive type name: " + name)
}).setPos(tk)
} ~ opt("(" ~ literal ~ ")")).map {
case (prim@TypeTree(IntType)) ~ Some(_ ~ IntLiteral(32) ~ _) => prim
case TypeTree(IntType) ~ Some(_ ~ IntLiteral(width) ~ _) =>
throw new AmycFatalError("Int type can only be used with a width of 32 bits, found : " + width)
case TypeTree(IntType) ~ Some(_ ~ lit ~ _) =>
throw new AmycFatalError("Int type should have an integer width (only 32 bits is supported)")
case TypeTree(IntType) ~ None =>
throw new AmycFatalError("Int type should have a specific width (only 32 bits is supported)")
case prim ~ Some(_) =>
throw new AmycFatalError("Only Int type can have a specific width")
case prim ~ None => prim
}
// A user-defined type (such as `List`).
lazy val identifierType: Syntax[TypeTree] =
???
// An expression.
// HINT: You can use `operators` to take care of associativity and precedence
lazy val expr: Syntax[Expr] = recursive {
???
}
// A literal expression.
lazy val literal: Syntax[Literal[?]] =
???
// A pattern as part of a mach case.
lazy val pattern: Syntax[Pattern] = recursive {
???
}
lazy val literalPattern: Syntax[Pattern] =
???
lazy val wildPattern: Syntax[Pattern] =
???
// HINT: It is useful to have a restricted set of expressions that don't include any more operators on the outer level.
lazy val simpleExpr: Syntax[Expr] =
literal.up[Expr] | variableOrCall | ???
lazy val variableOrCall: Syntax[Expr] = ???
// TODO: Other definitions.
// Feel free to decompose the rules in whatever way convenient.
// Ensures the grammar is in LL(1)
lazy val checkLL1: Boolean = {
if (program.isLL1) {
true
} else {
// Set `showTrails` to true to make Scallion generate some counterexamples for you.
// Depending on your grammar, this may be very slow.
val showTrails = false
debug(program, showTrails)
false
}
}
override def run(ctx: Context)(tokens: Iterator[Token]): Program = {
import ctx.reporter._
if (!checkLL1) {
ctx.reporter.fatal("Program grammar is not LL1!")
}
val parser = Parser(program)
parser(tokens) match {
case Parsed(result, rest) => result
case UnexpectedEnd(rest) => fatal("Unexpected end of input.")
case UnexpectedToken(token, rest) => fatal("Unexpected token: " + token + ", possible kinds: " + rest.first.map(_.toString).mkString(", "))
}
}
}
\ No newline at end of file
package amyc
package parsing
import amyc.utils.Positioned
sealed trait Token extends Positioned with Product {
override def toString = {
productPrefix + productIterator.mkString("(", ",", ")") + "(" + position.withoutFile + ")"
}
}
object Tokens {
final case class KeywordToken(value: String) extends Token // e.g. keyword "if"
final case class IdentifierToken(name: String) extends Token // e.g. variable name "x"
final case class PrimTypeToken(value: String) extends Token // e.g. primitive type "Int"
final case class IntLitToken(value: Int) extends Token // e.g. integer literal "123"
final case class StringLitToken(value: String) extends Token
final case class BoolLitToken(value: Boolean) extends Token
final case class DelimiterToken(value: String) extends Token // .,:;(){}[]= and =>
final case class OperatorToken(name: String) extends Token // e.g. "+"
final case class CommentToken(text: String) extends Token // e.g. "// this is a comment"
final case class SpaceToken() extends Token // e.g. "\n "
final case class ErrorToken(content: String) extends Token
final case class EOFToken() extends Token // special token at the end of file
}
sealed abstract class TokenKind(representation: String) {
override def toString: String = representation
}
object TokenKinds {
final case class KeywordKind(value: String) extends TokenKind(value)
case object IdentifierKind extends TokenKind("<Identifier>")
case object PrimTypeKind extends TokenKind("<Primitive Type>")
case object LiteralKind extends TokenKind("<Literal>")
final case class DelimiterKind(value: String) extends TokenKind(value)
final case class OperatorKind(value: String) extends TokenKind(value)
case object EOFKind extends TokenKind("<EOF>")
case object NoKind extends TokenKind("<???>")
}
object TokenKind {
import Tokens._
import TokenKinds._
def of(token: Token): TokenKind = token match {
case KeywordToken(value) => KeywordKind(value)
case IdentifierToken(_) => IdentifierKind
case PrimTypeToken(_) => PrimTypeKind
case BoolLitToken(_) => LiteralKind
case IntLitToken(_) => LiteralKind
case StringLitToken(_) => LiteralKind
case DelimiterToken(value) => DelimiterKind(value)
case OperatorToken(value) => OperatorKind(value)
case EOFToken() => EOFKind
case _ => NoKind
}
}
\ No newline at end of file
package amyc.utils
case class AmycFatalError(msg: String) extends Exception(msg)
package amyc.utils
// Contains a reporter and configuration for the compiler
case class Context(
reporter: Reporter,
files: List[String],
printTokens: Boolean = false,
printTrees: Boolean = false,
printNames: Boolean = false,
interpret: Boolean = false,
typeCheck: Boolean = false,
help: Boolean = false
)
package amyc.utils
// A structured document to be printed with nice indentation
abstract class Document {
def <:>(other: Document) = Lined(List(this, other))
def print: String = {
val sb = new StringBuffer()
def rec(d: Document)(implicit ind: Int, first: Boolean): Unit = d match {
case Raw(s) =>
if (first && s.nonEmpty) sb.append((" " * ind))
sb.append(s)
case Indented(doc) =>
rec(doc)(ind + 1, first)
case Unindented(doc) =>
assume(ind > 0)
rec(doc)(ind - 1, first)
case Lined(Nil, _) => // skip
case Lined(docs, sep) =>
rec(docs.head)
docs.tail foreach { doc =>
rec(sep)(ind, false)
rec(doc)(ind, false)
}
case Stacked(Nil, _) => // skip
case Stacked(docs, emptyLines) =>
rec(docs.head)
docs.tail foreach { doc =>
sb.append("\n")
if (emptyLines) sb.append("\n")
rec(doc)(ind, true)
}
}
rec(this)(0, true)
sb.toString
}
}
case class Indented(content: Document) extends Document
case class Unindented(content: Document) extends Document
case class Stacked(docs: List[Document], emptyLines: Boolean = false) extends Document
case class Lined(docs: List[Document], separator: Document = Raw("")) extends Document
case class Raw(s: String) extends Document
object Stacked {
def apply(docs: Document*): Stacked = Stacked(docs.toList)
}
\ No newline at end of file
package amyc.utils
object Env {
trait OS
object Linux extends OS
object Windows extends OS
object Mac extends OS
lazy val os = {
// If all fails returns Linux
val optOsName = Option(System.getProperty("os.name"))
optOsName.map(_.toLowerCase()).map { osName =>
if (osName.contains("linux")) then Linux
else if (osName.contains("win")) then Windows
else if (osName.contains("mac")) then Mac
else Linux
} getOrElse Linux
}
}
package amyc.utils
// A sequence of operations to be run by the compiler,
// with interruption at every stage if there is an error
abstract class Pipeline[-F, +T] {
self =>
def andThen[G](thenn: Pipeline[T, G]): Pipeline[F, G] = new Pipeline[F,G] {
def run(ctx : Context)(v : F) : G = {
val first = self.run(ctx)(v)
ctx.reporter.terminateIfErrors()
thenn.run(ctx)(first)
}
}
def run(ctx: Context)(v: F): T
}
case class Noop[T]() extends Pipeline[T, T] {
def run(ctx: Context)(v: T) = v
}
package amyc.utils
import java.io.File
import silex._
object Position {
/** Number of bits used to encode the line number */
private final val LINE_BITS = 20
/** Number of bits used to encode the column number */
private final val COLUMN_BITS = 31 - LINE_BITS // no negatives => 31
/** Mask to decode the line number */
private final val LINE_MASK = (1 << LINE_BITS) - 1
/** Mask to decode the column number */
private final val COLUMN_MASK = (1 << COLUMN_BITS) - 1
private def lineOf(pos: Int): Int = (pos >> COLUMN_BITS) & LINE_MASK
private def columnOf(pos: Int): Int = pos & COLUMN_MASK
def fromFile(f: File, i: Int) = {
SourcePosition(f, lineOf(i), columnOf(i))
}
}
abstract class Position {
val file: File
val line: Int
val col: Int
def isDefined: Boolean
def withoutFile: String
}
case class SourcePosition(file: File, line: Int, col: Int) extends Position {
override def toString: String = s"${file.getPath}:$line:$col"
def withoutFile = s"$line:$col"
val isDefined = true
}
case object NoPosition extends Position {
val file = null
val line = 0
val col = 0
override def toString: String = "?:?"
def withoutFile = toString
val isDefined = false
}
// A trait for entities which have a position in a file
trait Positioned {
protected var pos_ : Position = NoPosition
def hasPosition = pos_ != NoPosition
def position = pos_
def setPos(pos: Position): this.type = {
pos_ = pos
this
}
def setPos(other: Positioned): this.type = {
setPos(other.position)
}
}
case class SourcePositioner(file: File) extends Positioner[Char, SourcePosition] {
override val start: SourcePosition = SourcePosition(file, 1, 1)
override def increment(position: SourcePosition, character: Char): SourcePosition =
if (character == '\n') {
position.copy(line = position.line + 1, col = 1)
}
else {
position.copy(col = position.col + 1)
}
}
package amyc.utils
import java.io.File
import scala.io.Source
// Reports errors and warnings during compilation
class Reporter {
/** Issues some information from the compiler */
def info(msg: Any, pos: Position = NoPosition): Unit = {
report("[ Info ]", msg, pos)
}
/** Issues a warning from the compiler */
def warning(msg: Any, pos: Position = NoPosition): Unit = {
report("[Warning]", msg, pos)
}
private var hasErrors = false
/** Issues a recoverable error message */
def error(msg: Any, pos: Position = NoPosition): Unit = {
hasErrors = true
report("[ Error ]", msg, pos)
}
/** Used for an unrecoverable error: Issues a message, then exits the compiler */
def fatal(msg: Any, pos: Position = NoPosition): Nothing = {
report("[ Fatal ]", msg, pos)
// Despite printing the message, we store it in the error for testing
val errMsg = s"$pos: $msg"
throw AmycFatalError(errMsg)
}
// Versions for Positioned
def info(msg: Any, pos: Positioned): Unit = info(msg, pos.position)
def warning(msg: Any, pos: Positioned): Unit = warning(msg, pos.position)
def error(msg: Any, pos: Positioned): Unit = error(msg, pos.position)
def fatal(msg: Any, pos: Positioned): Nothing = fatal(msg, pos.position)
/** Terminates the compiler if any errors have been detected. */
def terminateIfErrors() = {
if (hasErrors) {
fatal("There were errors.")
}
}
private def err(msg: String): Unit = {
Console.err.println(msg)
}
private def report(prefix: String, msg: Any, pos: Position): Unit = {
if (pos.isDefined) {
err(s"$prefix $pos: $msg")
val lines = getLines(pos.file)
if (pos.line > 0 && pos.line-1 < lines.size) {
err(s"$prefix ${lines(pos.line-1)}")
err(prefix + " " + " "*(pos.col - 1)+"^")
} else {
err(s"$prefix <line unavailable in source file>")
}
} else {
err(s"$prefix $msg")
}
}
private var filesToLines = Map[File, IndexedSeq[String]]()
private def getLines(f: File): IndexedSeq[String] = {
filesToLines.get(f) match {
case Some(lines) =>
lines
case None =>
val source = Source.fromFile(f).withPositioning(true)
val lines = source.getLines().toIndexedSeq
source.close()
filesToLines += f -> lines
lines
}
}
}
package amyc.utils
import scala.collection.mutable
// Generates unique counters for each element of a type K
class UniqueCounter[K] {
private val elemIds = mutable.Map[K, Int]().withDefaultValue(-1)
def next(key: K): Int = synchronized {
elemIds(key) += 1
elemIds(key)
}
}
999999999999999999999999999
/***
\ No newline at end of file