/*
Copyright 2024 Ondrej Lhotak. All rights reserved.
Permission is granted for private study use by students registered in
CS 241E in the Fall 2024 term.
The contents of this file may not be published, in whole or in part,
in print or electronic form.
The contents of this file may be included in work submitted for CS
241E assignments in Fall 2024. The contents of this file may not be
submitted, in whole or in part, for credit in any other course.
*/
package cs241e.scanparse
import DFAs.Token
object Grammars {
/** A representation of a production rule in a context-free grammar. */
case class Production(lhs: String, rhs: Seq[String]) {
override def toString: String = lhs + " " + rhs.mkString(" ")
}
/** A representation of a context-free grammar. */
case class Grammar(productions: Seq[Production]) {
val nonTerminals = productions.map(_.lhs).toSet
val symbols = nonTerminals ++ productions.flatMap(_.rhs).toSet
val terminals = symbols -- nonTerminals
val start = productions.head.lhs
/** Given a non-terminal symbol, returns the set of production rules that have the symbol as their left-hand side.
**/
val productionsExpanding: Map[String, Seq[Production]] =
productions.groupBy(_.lhs).withDefaultValue(Seq())
override def toString: String = productions.mkString("\n")
}
/** A representation of a node of a parse tree.
*
* The grammar symbol corresponding to the tree node is in `lhs.kind`. If the tree node is a leaf node representing
* a terminal (token), the lexeme of the token is in `lhs.lexeme`; otherwise, lhs.lexeme is the empty string.
*/
class Tree(val lhs: Token, val children: Seq[Tree] = Seq.empty) {
def production: String = lhs.kind + " " + children.map(_.lhs.kind).mkString(" ")
def show(indent: Int = 0): String = " " * indent + lhs + "\n" + children.map(_.show(indent + 1)).mkString
override def toString: String = show(0)
}
/** Generates a `Grammar` from a textual description. Each non-empty line of the input is considered a production
* rule of the grammar. The symbols of the production rule are space-separated words on the line. The first word
* is the lhs of the production rule, and the remaining words are the rhs. The lhs of the first production rule
* is considered the start symbol of the grammar.
*/
def parseGrammar(input: String): Grammar = {
def parseLine(line: String): Option[Production] = {
val tokens = line.trim.split(" ").filterNot(_.isEmpty)
if (tokens.isEmpty) None else Some(Production(tokens.head, tokens.tail))
}
val productions = input.split("\n").flatMap(parseLine)
Grammar(productions)
}
}