mirror of https://github.com/vitalif/lime
Initial commit
commit
6789f4c785
|
@ -0,0 +1,70 @@
|
||||||
|
Lime: An LALR(1) parser generator in and for PHP.
|
||||||
|
|
||||||
|
Interpretter pattern got you down? Time to use a real parser? Welcome to Lime.
|
||||||
|
|
||||||
|
If you're familiar with BISON or YACC, you may want to read the metagrammar.
|
||||||
|
It's written in the Lime input language, so you'll get a head-start on
|
||||||
|
understanding how to use Lime.
|
||||||
|
|
||||||
|
0. If you're not running Linux on an IA32 box, then you will have to rebuild
|
||||||
|
lime_scan_tokens for your system. It should be enough to erase it,
|
||||||
|
and then type "CFLAGS=-O2 make lime_scan_tokens" at the bash prompt.
|
||||||
|
|
||||||
|
1. Stare at the file lime/metagrammar to understand the syntax. You're seeing
|
||||||
|
slightly modified and tweaked Backus-Naur forms. The main differences
|
||||||
|
are that you get to name your components, instead of refering to them
|
||||||
|
by numbers the way that BISON demands. This idea was stolen from the
|
||||||
|
C-based "Lemon" parser from which Lime derives its name. Incidentally,
|
||||||
|
the author of Lemon disclaimed copyright, so you get a copy of the C
|
||||||
|
code that taught me LALR(1) parsing better than any book, despite the
|
||||||
|
obvious difficulties in understanding it. Oh, and one other thing:
|
||||||
|
symbols are terminal if the scanner feeds them to the parser. They
|
||||||
|
are non-terminal if they appear on the left side of a production rule.
|
||||||
|
Lime names semantic categories using strings instead of the numbers
|
||||||
|
that BISON-based parsers use, so you don't have to declare any list of
|
||||||
|
terminal symbols anywhere.
|
||||||
|
|
||||||
|
2. Look at the file lime/lime.php to see what pragmas are defined. To be more
|
||||||
|
specific, you might look at the method lime::pragma(), which at the
|
||||||
|
time of this writing, supports "%left", "%right", "%nonassoc",
|
||||||
|
"%start", and "%class". The first three are for operator precedence.
|
||||||
|
The last two declare the start symbol and the name of a PHP class to
|
||||||
|
generate which will hold all the bottom-up parsing tables.
|
||||||
|
|
||||||
|
3. Write a grammar file.
|
||||||
|
|
||||||
|
4. php /path/to/lime/lime.php list-of-grammar-files > my_parser.php
|
||||||
|
|
||||||
|
5. Read the function parse_lime_grammar() in lime.php to understand
|
||||||
|
how to integrate your parser into your program.
|
||||||
|
|
||||||
|
6. Integrate your parser as follows:
|
||||||
|
|
||||||
|
--------------- CUT ---------------
|
||||||
|
|
||||||
|
include_once "lime/parse_engine.php";
|
||||||
|
include_once "my_parser.php";
|
||||||
|
#
|
||||||
|
# Later:
|
||||||
|
#
|
||||||
|
$parser = new parse_engine(new my_parser());
|
||||||
|
#
|
||||||
|
# And still later:
|
||||||
|
#
|
||||||
|
try {
|
||||||
|
while (..something..) {
|
||||||
|
$parser->eat($type, $val);
|
||||||
|
# You figure out how to get the parameters.
|
||||||
|
}
|
||||||
|
# And after the last token has been eaten:
|
||||||
|
$parser->eat_eof();
|
||||||
|
} catch (parse_error $e) {
|
||||||
|
die($e->getMessage());
|
||||||
|
}
|
||||||
|
return $parser->semantic;
|
||||||
|
|
||||||
|
--------------- CUT ---------------
|
||||||
|
|
||||||
|
7. You now have the computed semantic value of whatever you parsed. Add salt
|
||||||
|
and pepper to taste, and serve.
|
||||||
|
|
|
@ -0,0 +1,335 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
DON'T EDIT THIS FILE!
|
||||||
|
|
||||||
|
This file was automatically generated by the Lime parser generator.
|
||||||
|
The real source code you should be looking at is in one or more
|
||||||
|
grammar files in the Lime format.
|
||||||
|
|
||||||
|
THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar
|
||||||
|
file that your error happened, because there are enough comments to
|
||||||
|
help you debug your grammar.
|
||||||
|
|
||||||
|
If you ignore this warning, you're shooting yourself in the brain,
|
||||||
|
not the foot.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
class calc extends lime_parser {
|
||||||
|
var $qi = 0;
|
||||||
|
var $i = array (
|
||||||
|
0 =>
|
||||||
|
array (
|
||||||
|
'exp' => 's 1',
|
||||||
|
'var' => 's 15',
|
||||||
|
'num' => 's 3',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
'stmt' => 's 18',
|
||||||
|
'\'start\'' => 'a \'start\'',
|
||||||
|
),
|
||||||
|
1 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 's 2',
|
||||||
|
'\'-\'' => 's 6',
|
||||||
|
'\'*\'' => 's 8',
|
||||||
|
'\'/\'' => 's 10',
|
||||||
|
'#' => 'r 0',
|
||||||
|
),
|
||||||
|
2 =>
|
||||||
|
array (
|
||||||
|
'num' => 's 3',
|
||||||
|
'var' => 's 4',
|
||||||
|
'exp' => 's 5',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
),
|
||||||
|
3 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 'r 2',
|
||||||
|
'\'-\'' => 'r 2',
|
||||||
|
'\'*\'' => 'r 2',
|
||||||
|
'\'/\'' => 'r 2',
|
||||||
|
'\')\'' => 'r 2',
|
||||||
|
'#' => 'r 2',
|
||||||
|
),
|
||||||
|
4 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 'r 3',
|
||||||
|
'\'-\'' => 'r 3',
|
||||||
|
'\'*\'' => 'r 3',
|
||||||
|
'\'/\'' => 'r 3',
|
||||||
|
'\')\'' => 'r 3',
|
||||||
|
'#' => 'r 3',
|
||||||
|
),
|
||||||
|
5 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 'r 4',
|
||||||
|
'\'-\'' => 'r 4',
|
||||||
|
'\'*\'' => 's 8',
|
||||||
|
'\'/\'' => 's 10',
|
||||||
|
'\')\'' => 'r 4',
|
||||||
|
'#' => 'r 4',
|
||||||
|
),
|
||||||
|
6 =>
|
||||||
|
array (
|
||||||
|
'num' => 's 3',
|
||||||
|
'var' => 's 4',
|
||||||
|
'exp' => 's 7',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
),
|
||||||
|
7 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 'r 5',
|
||||||
|
'\'-\'' => 'r 5',
|
||||||
|
'\'*\'' => 's 8',
|
||||||
|
'\'/\'' => 's 10',
|
||||||
|
'\')\'' => 'r 5',
|
||||||
|
'#' => 'r 5',
|
||||||
|
),
|
||||||
|
8 =>
|
||||||
|
array (
|
||||||
|
'num' => 's 3',
|
||||||
|
'var' => 's 4',
|
||||||
|
'exp' => 's 9',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
),
|
||||||
|
9 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 'r 6',
|
||||||
|
'\'-\'' => 'r 6',
|
||||||
|
'\'*\'' => 'r 6',
|
||||||
|
'\'/\'' => 'r 6',
|
||||||
|
'\')\'' => 'r 6',
|
||||||
|
'#' => 'r 6',
|
||||||
|
),
|
||||||
|
10 =>
|
||||||
|
array (
|
||||||
|
'num' => 's 3',
|
||||||
|
'var' => 's 4',
|
||||||
|
'exp' => 's 11',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
),
|
||||||
|
11 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 'r 7',
|
||||||
|
'\'-\'' => 'r 7',
|
||||||
|
'\'*\'' => 'r 7',
|
||||||
|
'\'/\'' => 'r 7',
|
||||||
|
'\')\'' => 'r 7',
|
||||||
|
'#' => 'r 7',
|
||||||
|
),
|
||||||
|
12 =>
|
||||||
|
array (
|
||||||
|
'num' => 's 3',
|
||||||
|
'var' => 's 4',
|
||||||
|
'exp' => 's 13',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
),
|
||||||
|
13 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 's 2',
|
||||||
|
'\'-\'' => 's 6',
|
||||||
|
'\'*\'' => 's 8',
|
||||||
|
'\'/\'' => 's 10',
|
||||||
|
'\')\'' => 's 14',
|
||||||
|
),
|
||||||
|
14 =>
|
||||||
|
array (
|
||||||
|
'\'/\'' => 'r 8',
|
||||||
|
'\'*\'' => 'r 8',
|
||||||
|
'\'-\'' => 'r 8',
|
||||||
|
'\'+\'' => 'r 8',
|
||||||
|
'\')\'' => 'r 8',
|
||||||
|
'#' => 'r 8',
|
||||||
|
),
|
||||||
|
15 =>
|
||||||
|
array (
|
||||||
|
'\'=\'' => 's 16',
|
||||||
|
'\'+\'' => 'r 3',
|
||||||
|
'\'-\'' => 'r 3',
|
||||||
|
'\'*\'' => 'r 3',
|
||||||
|
'\'/\'' => 'r 3',
|
||||||
|
'#' => 'r 3',
|
||||||
|
),
|
||||||
|
16 =>
|
||||||
|
array (
|
||||||
|
'exp' => 's 17',
|
||||||
|
'num' => 's 3',
|
||||||
|
'var' => 's 4',
|
||||||
|
'\'(\'' => 's 12',
|
||||||
|
),
|
||||||
|
17 =>
|
||||||
|
array (
|
||||||
|
'\'+\'' => 's 2',
|
||||||
|
'\'-\'' => 's 6',
|
||||||
|
'\'*\'' => 's 8',
|
||||||
|
'\'/\'' => 's 10',
|
||||||
|
'#' => 'r 1',
|
||||||
|
),
|
||||||
|
18 =>
|
||||||
|
array (
|
||||||
|
'#' => 'r 9',
|
||||||
|
),
|
||||||
|
);
|
||||||
|
function reduce_0_stmt_1($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (0) stmt := exp
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
echo " -> "; echo $tokens[0]; echo "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_1_stmt_2($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (1) stmt := var '=' exp
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
$v =& $tokens[0];
|
||||||
|
$e =& $tokens[2];
|
||||||
|
|
||||||
|
echo "$v = $e\n";
|
||||||
|
set_variable($v, $e);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_2_exp_1($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (2) exp := num
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_3_exp_2($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (3) exp := var
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
$result = get_variable($tokens[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_4_exp_3($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (4) exp := exp '+' exp
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
$result = $tokens[0] + $tokens[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_5_exp_4($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (5) exp := exp '-' exp
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
$result = $tokens[0] - $tokens[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_6_exp_5($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (6) exp := exp '*' exp
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
$result = $tokens[0] * $tokens[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_7_exp_6($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (7) exp := exp '/' exp
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
$result = $tokens[0] / $tokens[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_8_exp_7($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (8) exp := '(' exp ')'
|
||||||
|
#
|
||||||
|
$result = $tokens[1];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function reduce_9_start_1($tokens, &$result) {
|
||||||
|
#
|
||||||
|
# (9) 'start' := stmt
|
||||||
|
#
|
||||||
|
$result = reset($tokens);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
var $method = array (
|
||||||
|
0 => 'reduce_0_stmt_1',
|
||||||
|
1 => 'reduce_1_stmt_2',
|
||||||
|
2 => 'reduce_2_exp_1',
|
||||||
|
3 => 'reduce_3_exp_2',
|
||||||
|
4 => 'reduce_4_exp_3',
|
||||||
|
5 => 'reduce_5_exp_4',
|
||||||
|
6 => 'reduce_6_exp_5',
|
||||||
|
7 => 'reduce_7_exp_6',
|
||||||
|
8 => 'reduce_8_exp_7',
|
||||||
|
9 => 'reduce_9_start_1',
|
||||||
|
);
|
||||||
|
var $a = array (
|
||||||
|
0 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'stmt',
|
||||||
|
'len' => 1,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
1 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'stmt',
|
||||||
|
'len' => 3,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
2 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 1,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
3 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 1,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
4 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 3,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
5 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 3,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
6 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 3,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
7 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 3,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
8 =>
|
||||||
|
array (
|
||||||
|
'symbol' => 'exp',
|
||||||
|
'len' => 3,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
9 =>
|
||||||
|
array (
|
||||||
|
'symbol' => '\'start\'',
|
||||||
|
'len' => 1,
|
||||||
|
'replace' => true,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
%class calc
|
||||||
|
%start stmt
|
||||||
|
|
||||||
|
%left '+' '-'
|
||||||
|
%left '*' '/'
|
||||||
|
|
||||||
|
stmt = exp { echo " -> "; echo $1; echo "\n"; }
|
||||||
|
| var/v '=' exp/e {
|
||||||
|
echo "$v = $e\n";
|
||||||
|
set_variable($v, $e);
|
||||||
|
}
|
||||||
|
.
|
||||||
|
|
||||||
|
|
||||||
|
exp = num
|
||||||
|
| var { $$ = get_variable($1); }
|
||||||
|
| exp '+' exp { $$ = $1 + $3; }
|
||||||
|
| exp '-' exp { $$ = $1 - $3; }
|
||||||
|
| exp '*' exp { $$ = $1 * $3; }
|
||||||
|
| exp '/' exp { $$ = $1 / $3; }
|
||||||
|
| '(' exp/$ ')'
|
||||||
|
.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
This program is like a calculator. Type in lines of math, and it will
|
||||||
|
print the results. You can set a variable with:
|
||||||
|
foo = 12 + 7.3
|
||||||
|
and use it in another calculation like:
|
||||||
|
23.14 - foo
|
||||||
|
|
||||||
|
<?
|
||||||
|
|
||||||
|
include_once "../parse_engine.php";
|
||||||
|
include_once "calc.class";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function tokenize($line) {
|
||||||
|
// Numbers are tokens, as are all other non-whitespace characters.
|
||||||
|
// Note: This isn't a particularly efficent tokenizer, but it gets the
|
||||||
|
// job done.
|
||||||
|
$out = array();
|
||||||
|
while (strlen($line)) {
|
||||||
|
$line = trim($line);
|
||||||
|
if (preg_match('/^[0-9]+(\.[0-9]*)?/', $line, $regs)) {
|
||||||
|
# It's a number
|
||||||
|
$out[] = $regs[0];
|
||||||
|
$line = substr($line, strlen($regs[0]));
|
||||||
|
} else if (preg_match('/^[A-Za-z]+/', $line, $regs)) {
|
||||||
|
# It's a variable name
|
||||||
|
$out[] = $regs[0];
|
||||||
|
$line = substr($line, strlen($regs[0]));
|
||||||
|
} else {
|
||||||
|
# It's some other character
|
||||||
|
$out[] = $line[0];
|
||||||
|
$line = substr($line, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
$symbol_table = array();
|
||||||
|
function set_variable($v, $e) {
|
||||||
|
global $symbol_table;
|
||||||
|
$symbol_table[$v] = $e;
|
||||||
|
}
|
||||||
|
function get_variable($v) {
|
||||||
|
global $symbol_table;
|
||||||
|
return doubleval($symbol_table[$v]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function calculate($line) {
|
||||||
|
global $parser;
|
||||||
|
if (!strlen($line)) return;
|
||||||
|
try {
|
||||||
|
$parser->reset();
|
||||||
|
foreach(tokenize($line) as $t) {
|
||||||
|
if (is_numeric($t)) $parser->eat('num', doubleval($t));
|
||||||
|
else if (ctype_alpha($t)) $parser->eat('var', $t);
|
||||||
|
else $parser->eat("'$t'", null);
|
||||||
|
}
|
||||||
|
$parser->eat_eof();
|
||||||
|
} catch (parse_error $e) {
|
||||||
|
echo $e->getMessage(), "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$parser = new parse_engine(new calc());
|
||||||
|
while ($line = fgets(STDIN)) calculate(trim($line));
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
abstract class flex_scanner {
|
||||||
|
/*
|
||||||
|
Let's face it: PHP is not up to lexical processing. GNU flex handles
|
||||||
|
it well, so I've created a little protocol for delegating the work.
|
||||||
|
Extend this class so that executable() gives a path to your lexical
|
||||||
|
analyser program.
|
||||||
|
*/
|
||||||
|
abstract function executable();
|
||||||
|
function __construct($path) {
|
||||||
|
if (!is_readable($path)) throw new Exception("$path is not readable.");
|
||||||
|
putenv("PHP_LIME_SCAN_STDIN=$path");
|
||||||
|
$scanner = $this->executable();
|
||||||
|
$tokens = explode("\0", `$scanner < "\$PHP_LIME_SCAN_STDIN"`);
|
||||||
|
array_pop($tokens);
|
||||||
|
$this->tokens = $tokens;
|
||||||
|
$this->lineno = 1;
|
||||||
|
}
|
||||||
|
function next() {
|
||||||
|
if (list($key, $token) = each($this->tokens)) {
|
||||||
|
list($this->lineno, $type, $text) = explode("\1", $token);
|
||||||
|
return array($type, $text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function feed($parser) {
|
||||||
|
while (list($type, $text) = $this->next()) {
|
||||||
|
$parser->eat($type, $text);
|
||||||
|
}
|
||||||
|
return $parser->eat_eof();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
There is nothing to see here. Go and look at the file called "metagrammar".
|
||||||
|
|
||||||
|
: $$ = new lime();
|
||||||
|
grammar pragma toklist stop : $$->pragma($2, $3);
|
||||||
|
grammar rewrite stop : $2->update($$);
|
||||||
|
to grammar
|
||||||
|
: {$$=array();}
|
||||||
|
toklist sym : $$[] = $2;
|
||||||
|
toklist lit : $$[] = $2;
|
||||||
|
to toklist
|
||||||
|
sym '=' rhs : $$ = new lime_rewrite($1); $$->add_rhs($3);
|
||||||
|
rewrite '|' rhs : $$->add_rhs($3);
|
||||||
|
to rewrite
|
||||||
|
list : $$ = new lime_rhs($1, '');
|
||||||
|
list action : $$ = new lime_rhs($1, $2);
|
||||||
|
to rhs
|
||||||
|
action : $$ = new lime_action($1, NULL);
|
||||||
|
action lambda : $$ = new lime_action($1, $2);
|
||||||
|
sym : $$ = new lime_glyph($1, NULL);
|
||||||
|
sym lambda : $$ = new lime_glyph($1, $2);
|
||||||
|
lit : $$ = new lime_glyph($1, NULL);
|
||||||
|
to slot
|
||||||
|
: $$ = new lime_rhs();
|
||||||
|
rhs slot : $$->add($2);
|
||||||
|
to rhs
|
||||||
|
'{' code '}' : $$ = $2;
|
||||||
|
to action
|
||||||
|
:
|
||||||
|
code php : $$.=$2;
|
||||||
|
code '{' code '}' : $$.='{'.$3.'}';
|
||||||
|
to code
|
|
@ -0,0 +1,911 @@
|
||||||
|
<?php
|
||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Library General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
define('LIME_DIR', dirname(__FILE__));
|
||||||
|
|
||||||
|
function emit($str) { fputs(STDERR, $str."\n"); }
|
||||||
|
|
||||||
|
class Bug extends Exception {}
|
||||||
|
function bug($gripe='Bug found.') { throw new Bug($gripe); }
|
||||||
|
function bug_if($falacy, $gripe='Bug found.') { if ($falacy) throw new Bug($gripe); }
|
||||||
|
function bug_unless($assertion, $gripe='Bug found.') { if (!$assertion) throw new Bug($gripe); }
|
||||||
|
|
||||||
|
include_once(LIME_DIR.'/parse_engine.php');
|
||||||
|
include_once(LIME_DIR.'/set.so.php');
|
||||||
|
include_once(LIME_DIR.'/flex_token_stream.php');
|
||||||
|
|
||||||
|
function lime_token_reference($pos) { return '$tokens['.$pos.']'; }
|
||||||
|
function lime_token_reference_callback($foo) { return lime_token_reference($foo[1]-1); }
|
||||||
|
|
||||||
|
class cf_action {
|
||||||
|
function __construct($code) { $this->code=$code; }
|
||||||
|
}
|
||||||
|
class step {
|
||||||
|
/*
|
||||||
|
Base class for parse table instructions. The main idea is to make the
|
||||||
|
subclasses responsible for conflict resolution among themselves. It also
|
||||||
|
forms a sort of interface to the parse table.
|
||||||
|
*/
|
||||||
|
function __construct($sym) {
|
||||||
|
bug_unless($sym instanceof sym);
|
||||||
|
$this->sym = $sym;
|
||||||
|
}
|
||||||
|
function glyph() { return $this->sym->name; }
|
||||||
|
}
|
||||||
|
class error extends step {
|
||||||
|
function sane() { return false; }
|
||||||
|
function instruction() { bug("This should not happen."); }
|
||||||
|
function decide($that) { return $this; /* An error shall remain one. */ }
|
||||||
|
}
|
||||||
|
class shift extends step {
|
||||||
|
function __construct($sym, $q) {
|
||||||
|
parent::__construct($sym);
|
||||||
|
$this->q = $q;
|
||||||
|
}
|
||||||
|
function sane() { return true; }
|
||||||
|
function instruction() { return "s $this->q"; }
|
||||||
|
function decide($that) {
|
||||||
|
# shift-shift conflicts are impossible.
|
||||||
|
# shift-accept conflicts are a bug.
|
||||||
|
# so we can infer:
|
||||||
|
bug_unless($that instanceof reduce);
|
||||||
|
|
||||||
|
# That being said, the resolution is a matter of precedence.
|
||||||
|
$shift_prec = $this->sym->right_prec;
|
||||||
|
$reduce_prec = $that->rule->prec;
|
||||||
|
|
||||||
|
# If we don't have defined precedence levels for both options,
|
||||||
|
# then we default to shifting:
|
||||||
|
if (!($shift_prec and $reduce_prec)) return $this;
|
||||||
|
|
||||||
|
# Otherwise, use the step with higher precedence.
|
||||||
|
if ($shift_prec > $reduce_prec) return $this;
|
||||||
|
if ($reduce_prec > $shift_prec) return $that;
|
||||||
|
|
||||||
|
# The "nonassoc" works by giving equal precedence to both options,
|
||||||
|
# which means to put an error instruction in the parse table.
|
||||||
|
return new error($this->sym);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class reduce extends step {
|
||||||
|
function __construct($sym, $rule) {
|
||||||
|
bug_unless($rule instanceof rule);
|
||||||
|
parent::__construct($sym);
|
||||||
|
$this->rule = $rule;
|
||||||
|
}
|
||||||
|
function sane() { return true; }
|
||||||
|
function instruction() { return 'r '.$this->rule->id; }
|
||||||
|
function decide($that) {
|
||||||
|
# This means that the input grammar has a reduce-reduce conflict.
|
||||||
|
# Such things are considered an error in the input.
|
||||||
|
throw new RRC($this, $that);
|
||||||
|
#exit(1);
|
||||||
|
# BISON would go with the first encountered reduce thus:
|
||||||
|
# return $this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class accept extends step {
|
||||||
|
function __construct($sym) { parent::__construct($sym); }
|
||||||
|
function sane() { return true; }
|
||||||
|
function instruction() { return 'a '.$this->sym->name; }
|
||||||
|
}
|
||||||
|
class RRC extends Exception {
|
||||||
|
function __construct($a, $b) {
|
||||||
|
parent::__construct("Reduce-Reduce Conflict");
|
||||||
|
$this->a = $a;
|
||||||
|
$this->b = $b;
|
||||||
|
}
|
||||||
|
function make_noise() {
|
||||||
|
emit(sprintf(
|
||||||
|
"Reduce-Reduce Conflict:\n%s\n%s\nLookahead is (%s)",
|
||||||
|
$this->a->rule->text(),
|
||||||
|
$this->b->rule->text(),
|
||||||
|
$this->a->glyph()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class state {
|
||||||
|
function __construct($id, $key, $close) {
|
||||||
|
$this->id = $id;
|
||||||
|
$this->key = $key;
|
||||||
|
$this->close = $close; # config key -> object
|
||||||
|
ksort($this->close);
|
||||||
|
$this->action = array();
|
||||||
|
}
|
||||||
|
function dump() {
|
||||||
|
echo " * ".$this->id.' / '.$this->key."\n";
|
||||||
|
foreach ($this->close as $config) $config->dump();
|
||||||
|
}
|
||||||
|
function add_shift($sym, $state) {
|
||||||
|
$this->add_instruction(new shift($sym, $state->id));
|
||||||
|
}
|
||||||
|
function add_reduce($sym, $rule) {
|
||||||
|
$this->add_instruction(new reduce($sym, $rule));
|
||||||
|
}
|
||||||
|
function add_accept($sym) {
|
||||||
|
$this->add_instruction(new accept($sym));
|
||||||
|
}
|
||||||
|
function add_instruction($step) {
|
||||||
|
bug_unless($step instanceof step);
|
||||||
|
$this->action[] = $step;
|
||||||
|
}
|
||||||
|
function find_reductions($lime) {
|
||||||
|
# rightmost configurations followset yields reduce.
|
||||||
|
foreach($this->close as $c) {
|
||||||
|
if ($c->rightmost) {
|
||||||
|
foreach ($c->follow->all() as $glyph) $this->add_reduce($lime->sym($glyph), $c->rule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function resolve_conflicts() {
|
||||||
|
# For each possible lookahead, find one (and only one) step to take.
|
||||||
|
$table = array();
|
||||||
|
foreach ($this->action as $step) {
|
||||||
|
$glyph = $step->glyph();
|
||||||
|
if (isset($table[$glyph])) {
|
||||||
|
# There's a conflict. The shifts all came first, which
|
||||||
|
# simplifies the coding for the step->decide() methods.
|
||||||
|
try {
|
||||||
|
$table[$glyph] = $table[$glyph]->decide($step);
|
||||||
|
} catch (RRC $e) {
|
||||||
|
emit("State $this->id:");
|
||||||
|
$e->make_noise();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
# This glyph is yet unprocessed, so the step at hand is
|
||||||
|
# our best current guess at what the grammar indicates.
|
||||||
|
$table[$glyph] = $step;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Now that we have the correct steps chosen, this routine is oddly
|
||||||
|
# also responsible for turning that table into the form that will
|
||||||
|
# eventually be passed to the parse engine. (So FIXME?)
|
||||||
|
$out = array();
|
||||||
|
foreach ($table as $glyph => $step) {
|
||||||
|
if ($step->sane()) $out[$glyph] = $step->instruction();
|
||||||
|
}
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
function segment_config() {
|
||||||
|
# Filter $this->close into categories based on the symbol_after_the_dot.
|
||||||
|
$f = array();
|
||||||
|
foreach ($this->close as $c) {
|
||||||
|
$p = $c->symbol_after_the_dot;
|
||||||
|
if (!$p) continue;
|
||||||
|
$f[$p->name][] = $c;
|
||||||
|
}
|
||||||
|
return $f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class sym {
|
||||||
|
function __construct($name, $id) {
|
||||||
|
$this->name=$name;
|
||||||
|
$this->id=$id;
|
||||||
|
$this->term = true; # Until proven otherwise.
|
||||||
|
$this->rule = array();
|
||||||
|
$this->config = array();
|
||||||
|
$this->lambda = false;
|
||||||
|
$this->first = new set();
|
||||||
|
$this->left_prec = $this->right_prec = 0;
|
||||||
|
}
|
||||||
|
function summary() {
|
||||||
|
$out = '';
|
||||||
|
foreach ($this->rule as $rule) $out .= $rule->text()."\n";
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class rule {
|
||||||
|
function __construct($id, $sym, $rhs, $code, $look, $replace) {
|
||||||
|
$this->id = $id;
|
||||||
|
$this->sym = $sym;
|
||||||
|
$this->rhs = $rhs;
|
||||||
|
$this->code = $code;
|
||||||
|
$this->look = $look;
|
||||||
|
bug_unless(is_int($look));
|
||||||
|
$this->replace = $replace;
|
||||||
|
#$this->prec_sym = $prec_sym;
|
||||||
|
$this->prec = 0;
|
||||||
|
$this->first = array();
|
||||||
|
$this->epsilon = count($rhs);
|
||||||
|
}
|
||||||
|
function lhs_glyph() { return $this->sym->name; }
|
||||||
|
function determine_precedence() {
|
||||||
|
# We may eventually expand to allow explicit prec_symbol declarations.
|
||||||
|
# Until then, we'll go with the rightmost terminal, which is what
|
||||||
|
# BISON does. People probably expect that. The leftmost terminal
|
||||||
|
# is a reasonable alternative behaviour, but I don't see the big
|
||||||
|
# deal just now.
|
||||||
|
|
||||||
|
#$prec_sym = $this->prec_sym;
|
||||||
|
#if (!$prec_sym)
|
||||||
|
$prec_sym = $this->rightmost_terminal();
|
||||||
|
if (!$prec_sym) return;
|
||||||
|
$this->prec = $prec_sym->left_prec;
|
||||||
|
}
|
||||||
|
private function rightmost_terminal() {
|
||||||
|
$symbol = NULL;
|
||||||
|
$rhs = $this->rhs;
|
||||||
|
while ($rhs) {
|
||||||
|
$symbol = array_pop($rhs);
|
||||||
|
if ($symbol->term) break;
|
||||||
|
}
|
||||||
|
return $symbol;
|
||||||
|
}
|
||||||
|
function text() {
|
||||||
|
$t = "($this->id) ".$this->lhs_glyph().' :=';
|
||||||
|
foreach($this->rhs as $s) $t .= ' '.$s->name;
|
||||||
|
return $t;
|
||||||
|
}
|
||||||
|
function table(lime_language $lang) {
|
||||||
|
return array(
|
||||||
|
'symbol' => $this->lhs_glyph(),
|
||||||
|
'len' => $this->look,
|
||||||
|
'replace' => $this->replace,
|
||||||
|
'code' => $lang->fixup($this->code),
|
||||||
|
'text' => $this->text(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
function lambda() {
|
||||||
|
foreach ($this->rhs as $sym) if (!$sym->lambda) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
function find_first() {
|
||||||
|
$dot = count($this->rhs);
|
||||||
|
$last = $this->first[$dot] = new set();
|
||||||
|
while ($dot) {
|
||||||
|
$dot--;
|
||||||
|
$symbol_after_the_dot = $this->rhs[$dot];
|
||||||
|
$first = $symbol_after_the_dot->first->all();
|
||||||
|
bug_if(empty($first) and !$symbol_after_the_dot->lambda);
|
||||||
|
$set = new set($first);
|
||||||
|
if ($symbol_after_the_dot->lambda) {
|
||||||
|
$set->union($last);
|
||||||
|
if ($this->epsilon == $dot+1) $this->epsilon = $dot;
|
||||||
|
}
|
||||||
|
$last = $this->first[$dot] = $set;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function teach_symbol_of_first_set() {
|
||||||
|
$go = false;
|
||||||
|
foreach ($this->rhs as $sym) {
|
||||||
|
if ($this->sym->first->union($sym->first)) $go = true;
|
||||||
|
if (!$sym->lambda) break;
|
||||||
|
}
|
||||||
|
return $go;
|
||||||
|
}
|
||||||
|
function lambda_from($dot) {
|
||||||
|
return $this->epsilon <= $dot;
|
||||||
|
}
|
||||||
|
function leftmost($follow) {
|
||||||
|
return new config($this, 0, $follow);
|
||||||
|
}
|
||||||
|
function dotted_text($dot) {
|
||||||
|
$out = $this->lhs_glyph().' :=';
|
||||||
|
$idx = -1;
|
||||||
|
foreach($this->rhs as $idx => $s) {
|
||||||
|
if ($idx == $dot) $out .= ' .';
|
||||||
|
$out .= ' '.$s->name;
|
||||||
|
}
|
||||||
|
if ($dot > $idx) $out .= ' .';
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class config {
|
||||||
|
function __construct($rule, $dot, $follow) {
|
||||||
|
$this->rule=$rule;
|
||||||
|
$this->dot = $dot;
|
||||||
|
$this->key = "$rule->id.$dot";
|
||||||
|
$this->rightmost = count($rule->rhs) <= $dot;
|
||||||
|
$this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot];
|
||||||
|
$this->_blink = array();
|
||||||
|
$this->follow = new set($follow);
|
||||||
|
$this->_flink= array();
|
||||||
|
bug_unless($this->rightmost or count($rule));
|
||||||
|
}
|
||||||
|
function text() {
|
||||||
|
$out = $this->rule->dotted_text($this->dot);
|
||||||
|
$out .= ' [ '.implode(' ', $this->follow->all()).' ]';
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
function blink($config) {
|
||||||
|
$this->_blink[] = $config;
|
||||||
|
}
|
||||||
|
function next() {
|
||||||
|
bug_if($this->rightmost);
|
||||||
|
$c = new config($this->rule, $this->dot+1, array());
|
||||||
|
# Anything in the follow set for this config will also be in the next.
|
||||||
|
# However, we link it backwards because we might wind up selecting a
|
||||||
|
# pre-existing state, and the housekeeping is easier in the first half
|
||||||
|
# of the program. We'll fix it before doing the propagation.
|
||||||
|
$c->blink($this);
|
||||||
|
return $c;
|
||||||
|
}
|
||||||
|
function copy_links_from($that) {
|
||||||
|
foreach($that->_blink as $c) $this->blink($c);
|
||||||
|
}
|
||||||
|
function lambda() {
|
||||||
|
return $this->rule->lambda_from($this->dot);
|
||||||
|
}
|
||||||
|
function simple_follow() {
|
||||||
|
return $this->rule->first[$this->dot+1]->all();
|
||||||
|
}
|
||||||
|
function epsilon_follows() {
|
||||||
|
return $this->rule->lambda_from($this->dot+1);
|
||||||
|
}
|
||||||
|
function fixlinks() {
|
||||||
|
foreach ($this->_blink as $that) $that->_flink[] = $this;
|
||||||
|
$this->blink = array();
|
||||||
|
}
|
||||||
|
function dump() {
|
||||||
|
echo " * ";
|
||||||
|
echo $this->key.' : ';
|
||||||
|
echo $this->rule->dotted_text($this->dot);
|
||||||
|
echo $this->follow->text();
|
||||||
|
foreach ($this->_flink as $c) echo $c->key.' / ';
|
||||||
|
echo "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class lime {
|
||||||
|
var $parser_class = 'parser';
|
||||||
|
function __construct() {
|
||||||
|
$this->p_next = 1;
|
||||||
|
$this->sym = array();
|
||||||
|
$this->rule = array();
|
||||||
|
$this->start_symbol_set = array();
|
||||||
|
$this->state = array();
|
||||||
|
$this->stop = $this->sym('#');
|
||||||
|
if ($err = $this->sym('error')) {
|
||||||
|
$err->term = false;
|
||||||
|
}
|
||||||
|
$this->lang = new lime_language_php();
|
||||||
|
}
|
||||||
|
function language() { return $this->lang; }
|
||||||
|
function build_parser() {
|
||||||
|
$this->add_start_rule();
|
||||||
|
foreach ($this->rule as $r) $r->determine_precedence();
|
||||||
|
$this->find_sym_lamdba();
|
||||||
|
$this->find_sym_first();
|
||||||
|
foreach ($this->rule as $rule) $rule->find_first();
|
||||||
|
$initial = $this->find_states();
|
||||||
|
$this->fixlinks();
|
||||||
|
# $this->dump_configurations();
|
||||||
|
$this->find_follow_sets();
|
||||||
|
foreach($this->state as $s) $s->find_reductions($this);
|
||||||
|
$i = $this->resolve_conflicts();
|
||||||
|
$a = $this->rule_table();
|
||||||
|
$qi = $initial->id;
|
||||||
|
return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i'));
|
||||||
|
}
|
||||||
|
function rule_table() {
|
||||||
|
$s = array();
|
||||||
|
foreach ($this->rule as $i => $r) {
|
||||||
|
$s[$i] = $r->table($this->lang);
|
||||||
|
}
|
||||||
|
return $s;
|
||||||
|
}
|
||||||
|
function add_rule($symbol, $rhs, $code) {
|
||||||
|
$this->add_raw_rule($symbol, $rhs, $code, count($rhs), true);
|
||||||
|
}
|
||||||
|
function trump_up_bogus_lhs($real) {
|
||||||
|
return "'$real'".count($this->rule);
|
||||||
|
}
|
||||||
|
function add_raw_rule($lhs, $rhs, $code, $look, $replace) {
|
||||||
|
$sym = $this->sym($lhs);
|
||||||
|
$sym->term=false;
|
||||||
|
if (empty($rhs)) $sym->lambda = true;
|
||||||
|
$rs = array();
|
||||||
|
foreach ($rhs as $str) $rs[] = $this->sym($str);
|
||||||
|
$rid = count($this->rule);
|
||||||
|
$r = new rule($rid, $sym, $rs, $code, $look, $replace);
|
||||||
|
$this->rule[$rid] = $r;
|
||||||
|
$sym->rule[] = $r;
|
||||||
|
}
|
||||||
|
function sym($str) {
|
||||||
|
if (!isset($this->sym[$str])) $this->sym[$str] = new sym($str, count($this->sym));
|
||||||
|
return $this->sym[$str];
|
||||||
|
}
|
||||||
|
function summary() {
|
||||||
|
$out = '';
|
||||||
|
foreach ($this->sym as $sym) if (!$sym->term) $out .= $sym->summary();
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
private function find_sym_lamdba() {
|
||||||
|
do {
|
||||||
|
$go = false;
|
||||||
|
foreach ($this->sym as $sym) if (!$sym->lambda) {
|
||||||
|
foreach ($sym->rule as $rule) if ($rule->lambda()) {
|
||||||
|
$go = true;
|
||||||
|
$sym->lambda = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while ($go);
|
||||||
|
}
|
||||||
|
private function teach_terminals_first_set() {
|
||||||
|
foreach ($this->sym as $sym) if ($sym->term) $sym->first->add($sym->name);
|
||||||
|
}
|
||||||
|
private function find_sym_first() {
|
||||||
|
$this->teach_terminals_first_set();
|
||||||
|
do {
|
||||||
|
$go = false;
|
||||||
|
foreach ($this->rule as $r) if ($r->teach_symbol_of_first_set()) $go = true;
|
||||||
|
} while ($go);
|
||||||
|
}
|
||||||
|
function add_start_rule() {
|
||||||
|
$rewrite = new lime_rewrite("'start'");
|
||||||
|
$rhs = new lime_rhs();
|
||||||
|
$rhs->add(new lime_glyph($this->deduce_start_symbol()->name, NULL));
|
||||||
|
#$rhs->add(new lime_glyph($this->stop->name, NULL));
|
||||||
|
$rewrite->add_rhs($rhs);
|
||||||
|
$rewrite->update($this);
|
||||||
|
}
|
||||||
|
private function deduce_start_symbol() {
|
||||||
|
$candidate = current($this->start_symbol_set);
|
||||||
|
# Did the person try to set a start symbol at all?
|
||||||
|
if (!$candidate) return $this->first_rule_lhs();
|
||||||
|
# Do we actually have such a symbol on the left of a rule?
|
||||||
|
if ($candidate->terminal) return $this->first_rule_lhs();
|
||||||
|
# Ok, it's a decent choice. We need to return the symbol entry.
|
||||||
|
return $this->sym($candidate);
|
||||||
|
}
|
||||||
|
private function first_rule_lhs() {
|
||||||
|
reset($this->rule);
|
||||||
|
$r = current($this->rule);
|
||||||
|
return $r->sym;
|
||||||
|
}
|
||||||
|
function find_states() {
|
||||||
|
/*
|
||||||
|
Build an initial state. This is a recursive process which digs out
|
||||||
|
the LR(0) state graph.
|
||||||
|
*/
|
||||||
|
$start_glyph = "'start'";
|
||||||
|
$sym = $this->sym($start_glyph);
|
||||||
|
$basis = array();
|
||||||
|
foreach($sym->rule as $rule) {
|
||||||
|
$c = $rule->leftmost(array('#'));
|
||||||
|
$basis[$c->key] = $c;
|
||||||
|
}
|
||||||
|
$initial = $this->get_state($basis);
|
||||||
|
$initial->add_accept($sym);
|
||||||
|
return $initial;
|
||||||
|
}
|
||||||
|
function get_state($basis) {
|
||||||
|
$key = array_keys($basis);
|
||||||
|
sort($key);
|
||||||
|
$key = implode(' ', $key);
|
||||||
|
if (isset($this->state[$key])) {
|
||||||
|
# Copy all the links around...
|
||||||
|
$state = $this->state[$key];
|
||||||
|
foreach($basis as $config) $state->close[$config->key]->copy_links_from($config);
|
||||||
|
return $state;
|
||||||
|
} else {
|
||||||
|
$close = $this->state_closure($basis);
|
||||||
|
$this->state[$key] = $state = new state(count($this->state), $key, $close);
|
||||||
|
$this->build_shifts($state);
|
||||||
|
return $state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private function state_closure($q) {
|
||||||
|
# $q is a list of config.
|
||||||
|
$close = array();
|
||||||
|
while ($config = array_pop($q)) {
|
||||||
|
if (isset($close[$config->key])) {
|
||||||
|
$close[$config->key]->copy_links_from($config);
|
||||||
|
$close[$config->key]->follow->union($config->follow);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$close[$config->key] = $config;
|
||||||
|
|
||||||
|
$symbol_after_the_dot = $config->symbol_after_the_dot;
|
||||||
|
if (!$symbol_after_the_dot) continue;
|
||||||
|
|
||||||
|
if (! $symbol_after_the_dot->term) {
|
||||||
|
foreach ($symbol_after_the_dot->rule as $r) {
|
||||||
|
$station = $r->leftmost($config->simple_follow());
|
||||||
|
if ($config->epsilon_follows()) $station->blink($config);
|
||||||
|
$q[] = $station;
|
||||||
|
}
|
||||||
|
# The following turned out to be wrong. Don't do it.
|
||||||
|
#if ($symbol_after_the_dot->lambda) {
|
||||||
|
# $q[] = $config->next();
|
||||||
|
#}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return $close;
|
||||||
|
}
|
||||||
|
function build_shifts($state) {
|
||||||
|
foreach ($state->segment_config() as $glyph => $segment) {
|
||||||
|
$basis = array();
|
||||||
|
foreach ($segment as $preshift) {
|
||||||
|
$postshift = $preshift->next();
|
||||||
|
$basis[$postshift->key] = $postshift;
|
||||||
|
}
|
||||||
|
$dest = $this->get_state($basis);
|
||||||
|
$state->add_shift($this->sym($glyph), $dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function fixlinks() {
|
||||||
|
foreach ($this->state as $s) foreach ($s->close as $c) $c->fixlinks();
|
||||||
|
}
|
||||||
|
function find_follow_sets() {
|
||||||
|
$q = array();
|
||||||
|
foreach ($this->state as $s) foreach ($s->close as $c) $q[] = $c;
|
||||||
|
while ($q) {
|
||||||
|
$c = array_shift($q);
|
||||||
|
foreach ($c->_flink as $d) {
|
||||||
|
if ($d->follow->union($c->follow)) $q[] = $d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private function set_assoc($ss, $l, $r) {
|
||||||
|
$p = ($this->p_next++)*2;
|
||||||
|
foreach ($ss as $glyph) {
|
||||||
|
$s = $this->sym($glyph);
|
||||||
|
$s->left_prec = $p+$l;
|
||||||
|
$s->right_prec = $p+$r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function left_assoc($ss) { $this->set_assoc($ss, 1, 0); }
|
||||||
|
function right_assoc($ss) { $this->set_assoc($ss, 0, 1); }
|
||||||
|
function non_assoc($ss) { $this->set_assoc($ss, 0, 0); }
|
||||||
|
private function resolve_conflicts() {
|
||||||
|
# For each state, try to find one and only one
|
||||||
|
# thing to do for any given lookahead.
|
||||||
|
$i = array();
|
||||||
|
foreach ($this->state as $s) $i[$s->id] = $s->resolve_conflicts();
|
||||||
|
return $i;
|
||||||
|
}
|
||||||
|
function dump_configurations() {
|
||||||
|
foreach ($this->state as $q) $q->dump();
|
||||||
|
}
|
||||||
|
function dump_first_sets() {
|
||||||
|
foreach ($this->sym as $s) {
|
||||||
|
echo " * ";
|
||||||
|
echo $s->name.' : ';
|
||||||
|
echo $s->first->text();
|
||||||
|
echo "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function add_rule_with_actions($lhs, $rhs) {
|
||||||
|
# First, make sure this thing is well-formed.
|
||||||
|
if(!is_object(end($rhs))) $rhs[] = new cf_action('');
|
||||||
|
# Now, split it into chunks based on the actions.
|
||||||
|
$look = -1;
|
||||||
|
$subrule = array();
|
||||||
|
$subsymbol = '';
|
||||||
|
while (count($rhs)) {
|
||||||
|
$it = array_shift($rhs);
|
||||||
|
$look ++;
|
||||||
|
if (is_string($it)) {
|
||||||
|
$subrule[] = $it;
|
||||||
|
} else {
|
||||||
|
$code = $it->code;
|
||||||
|
# It's an action.
|
||||||
|
# Is it the last one?
|
||||||
|
if (count($rhs)) {
|
||||||
|
# no.
|
||||||
|
$subsymbol = $this->trump_up_bogus_lhs($lhs);
|
||||||
|
$this->add_raw_rule($subsymbol, $subrule, $code, $look, false);
|
||||||
|
$subrule = array($subsymbol);
|
||||||
|
} else {
|
||||||
|
# yes.
|
||||||
|
$this->add_raw_rule($lhs, $subrule, $code, $look, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function pragma($type, $args) {
|
||||||
|
switch ($type) {
|
||||||
|
case 'left':
|
||||||
|
$this->left_assoc($args);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'right':
|
||||||
|
$this->right_assoc($args);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'nonassoc':
|
||||||
|
$this->non_assoc($args);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'start':
|
||||||
|
$this->start_symbol_set = $args;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'class':
|
||||||
|
$this->parser_class = $args[0];
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
emit(sprintf("Bad Parser Pragma: (%s)", $type));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class lime_language {}
|
||||||
|
class lime_language_php extends lime_language {
|
||||||
|
private function result_code($expr) { return '$result=' . $expr . ";\n"; }
|
||||||
|
function default_result() { return $this->result_code('reset($tokens)'); }
|
||||||
|
function result_pos($pos) { return $this->result_code(lime_token_reference($pos)); }
|
||||||
|
function bind($name, $pos) { return '$' . $name . '=&$tokens[' . $pos . "];\n"; }
|
||||||
|
function fixup($code) {
|
||||||
|
$code = preg_replace_callback('/\\$(\d+)/', 'lime_token_reference_callback', $code);
|
||||||
|
$code = preg_replace('/\\$\\$/', '$result', $code);
|
||||||
|
return $code;
|
||||||
|
}
|
||||||
|
function to_php($code) {
|
||||||
|
return $code;
|
||||||
|
}
|
||||||
|
function ptab_to_class($parser_class, $ptab) {
|
||||||
|
$code = "class $parser_class extends lime_parser{\n";
|
||||||
|
$code .= 'var $qi = '.var_export($ptab['qi'], true).";\n";
|
||||||
|
$code .= 'var $i = '.var_export($ptab['i'], true).";\n";
|
||||||
|
|
||||||
|
|
||||||
|
$rc = array();
|
||||||
|
$method = array();
|
||||||
|
$rules = array();
|
||||||
|
foreach($ptab['a'] as $k => $a) {
|
||||||
|
$symbol = preg_replace('/[^\w]/', '', $a['symbol']);
|
||||||
|
$rn = ++$rc[$symbol];
|
||||||
|
$mn = "reduce_${k}_${symbol}_${rn}";
|
||||||
|
$method[$k] = $mn;
|
||||||
|
$comment = "#\n# $a[text]\n#\n";
|
||||||
|
$php = $this->to_php($a['code']);
|
||||||
|
$code .= "function $mn(".LIME_CALL_PROTOCOL.") {\n$comment$php\n}\n\n";
|
||||||
|
|
||||||
|
|
||||||
|
unset($a['code']);
|
||||||
|
unset($a['text']);
|
||||||
|
$rules[$k] = $a;
|
||||||
|
}
|
||||||
|
|
||||||
|
$code .= 'var $method = '.var_export($method, true).";\n";
|
||||||
|
$code .= 'var $a = '.var_export($rules, true).";\n";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
$code .= "}\n";
|
||||||
|
#echo $code;
|
||||||
|
return $code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class lime_rhs {
|
||||||
|
function __construct() {
|
||||||
|
/**
|
||||||
|
Construct and add glyphs and actions in whatever order.
|
||||||
|
Then, add this to a lime_rewrite.
|
||||||
|
|
||||||
|
Don't call install_rule.
|
||||||
|
The rewrite will do that for you when you "update" with it.
|
||||||
|
*/
|
||||||
|
$this->rhs = array();
|
||||||
|
}
|
||||||
|
function add($slot) {
|
||||||
|
bug_unless($slot instanceof lime_slot);
|
||||||
|
$this->rhs[] = $slot;
|
||||||
|
}
|
||||||
|
function install_rule(lime $lime, $lhs) {
|
||||||
|
# This is the part that has to break the rule into subrules if necessary.
|
||||||
|
$rhs = $this->rhs;
|
||||||
|
# First, make sure this thing is well-formed.
|
||||||
|
if (!(end($rhs) instanceof lime_action)) $rhs[] = new lime_action('', NULL);
|
||||||
|
# Now, split it into chunks based on the actions.
|
||||||
|
|
||||||
|
$lang = $lime->language();
|
||||||
|
$result_code = $lang->default_result();
|
||||||
|
$look = -1;
|
||||||
|
$subrule = array();
|
||||||
|
$subsymbol = '';
|
||||||
|
$preamble = '';
|
||||||
|
while (count($rhs)) {
|
||||||
|
$it = array_shift($rhs);
|
||||||
|
$look ++;
|
||||||
|
if ($it instanceof lime_glyph) {
|
||||||
|
$subrule[] = $it->data;
|
||||||
|
} elseif ($it instanceof lime_action) {
|
||||||
|
$code = $it->data;
|
||||||
|
# It's an action.
|
||||||
|
# Is it the last one?
|
||||||
|
if (count($rhs)) {
|
||||||
|
# no.
|
||||||
|
$subsymbol = $lime->trump_up_bogus_lhs($lhs);
|
||||||
|
$action = $lang->default_result().$preamble.$code;
|
||||||
|
$lime->add_raw_rule($subsymbol, $subrule, $action, $look, false);
|
||||||
|
$subrule = array($subsymbol);
|
||||||
|
} else {
|
||||||
|
# yes.
|
||||||
|
$action = $result_code.$preamble.$code;
|
||||||
|
$lime->add_raw_rule($lhs, $subrule, $action, $look, true);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
impossible();
|
||||||
|
}
|
||||||
|
if ($it->name == '$') $result_code = $lang->result_pos($look);
|
||||||
|
elseif ($it->name) $preamble .= $lang->bind($it->name, $look);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class lime_rewrite {
|
||||||
|
function __construct($glyph) {
|
||||||
|
/**
|
||||||
|
Construct one of these with the name of the lhs.
|
||||||
|
Add some rhs-es to it.
|
||||||
|
Finally, "update" the lime you're building.
|
||||||
|
*/
|
||||||
|
$this->glyph = $glyph;
|
||||||
|
$this->rhs = array();
|
||||||
|
}
|
||||||
|
function add_rhs($rhs) {
|
||||||
|
bug_unless($rhs instanceof lime_rhs);
|
||||||
|
$this->rhs[] = $rhs;
|
||||||
|
}
|
||||||
|
function update(lime $lime) {
|
||||||
|
foreach ($this->rhs as $rhs) {
|
||||||
|
$rhs->install_rule($lime, $this->glyph);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class lime_slot {
|
||||||
|
/**
|
||||||
|
This keeps track of one position in an rhs.
|
||||||
|
We specialize to handle actions and glyphs.
|
||||||
|
If there is a name for the slot, we store it here.
|
||||||
|
Later on, this structure will be consulted in the formation of
|
||||||
|
actual production rules.
|
||||||
|
*/
|
||||||
|
function __construct($data, $name) {
|
||||||
|
$this->data = $data;
|
||||||
|
$this->name = $name;
|
||||||
|
}
|
||||||
|
function preamble($pos) {
|
||||||
|
if (strlen($this->name) > 0) {
|
||||||
|
return "\$$this->name =& \$tokens[$pos];\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class lime_glyph extends lime_slot {}
|
||||||
|
class lime_action extends lime_slot {}
|
||||||
|
function lime_bootstrap() {
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
This function isn't too terribly interesting to the casual observer.
|
||||||
|
You're probably better off looking at parse_lime_grammar() instead.
|
||||||
|
|
||||||
|
Ok, if you insist, I'll explain.
|
||||||
|
|
||||||
|
The input to Lime is a CFG parser definition. That definition is
|
||||||
|
written in some language. (The Lime language, to be exact.)
|
||||||
|
Anyway, I have to parse the Lime language and compile it into a
|
||||||
|
very complex data structure from which a parser is eventually
|
||||||
|
built. What better way than to use Lime itself to parse its own
|
||||||
|
language? Well, it's almost that simple, but not quite.
|
||||||
|
|
||||||
|
The Lime language is fairly potent, but a restricted subset of
|
||||||
|
its features was used to write a metagrammar. Then, I hand-translated
|
||||||
|
that metagrammar into another form which is easy to snarf up.
|
||||||
|
In the process of reading that simplified form, this function
|
||||||
|
builds the same sort of data structure that later gets turned into
|
||||||
|
a parser. The last step is to run the parser generation algorithm,
|
||||||
|
eval() the resulting PHP code, and voila! With no hard work, I can
|
||||||
|
suddenly read and comprehend the full range of the Lime language
|
||||||
|
without ever having written an algorithm to do so. It feels like magic.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
$bootstrap = LIME_DIR."/lime.bootstrap";
|
||||||
|
$lime = new lime();
|
||||||
|
$lime->parser_class = 'lime_metaparser';
|
||||||
|
$rhs = array();
|
||||||
|
bug_unless(is_readable($bootstrap));
|
||||||
|
foreach(file($bootstrap) as $l) {
|
||||||
|
$a = explode(":", $l, 2);
|
||||||
|
if (count($a) == 2) {
|
||||||
|
list($pattern, $code) = $a;
|
||||||
|
$sl = new lime_rhs();
|
||||||
|
$pattern = trim($pattern);
|
||||||
|
if (strlen($pattern)>0) {
|
||||||
|
foreach (explode(' ', $pattern) as $glyph) $sl->add(new lime_glyph($glyph, NULL));
|
||||||
|
}
|
||||||
|
$sl->add(new lime_action($code, NULL));
|
||||||
|
$rhs[] = $sl;
|
||||||
|
} else {
|
||||||
|
$m = preg_match('/^to (\w+)$/', $l, $r);
|
||||||
|
if ($m == 0) continue;
|
||||||
|
$g = $r[1];
|
||||||
|
$rw = new lime_rewrite($g);
|
||||||
|
foreach($rhs as $b) $rw->add_rhs($b);
|
||||||
|
$rw->update($lime);
|
||||||
|
$rhs = array();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$parser_code = $lime->build_parser();
|
||||||
|
eval($parser_code);
|
||||||
|
}
|
||||||
|
|
||||||
|
class voodoo_scanner extends flex_scanner {
|
||||||
|
/*
|
||||||
|
|
||||||
|
The voodoo is in the way I do lexical processing on grammar definition
|
||||||
|
files. They contain embedded bits of PHP, and it's important to keep
|
||||||
|
track of things like strings, comments, and matched braces. It seemed
|
||||||
|
like an ideal problem to solve with GNU flex, so I wrote a little
|
||||||
|
scanner in flex and C to dig out the tokens for me. Of course, I need
|
||||||
|
the tokens in PHP, so I designed a simple binary wrapper for them which
|
||||||
|
also contains line-number information, guaranteed to help out if you
|
||||||
|
write a grammar which surprises the parser in any manner.
|
||||||
|
|
||||||
|
*/
|
||||||
|
function executable() { return LIME_DIR.'/lime_scan_tokens'; }
|
||||||
|
}
|
||||||
|
|
||||||
|
function parse_lime_grammar($path) {
|
||||||
|
/*
|
||||||
|
|
||||||
|
This is a good function to read because it teaches you how to interface
|
||||||
|
with a Lime parser. I've tried to isolate out the bits that aren't
|
||||||
|
instructive in that regard.
|
||||||
|
|
||||||
|
*/
|
||||||
|
if (!class_exists('lime_metaparser')) lime_bootstrap();
|
||||||
|
|
||||||
|
$parse_engine = new parse_engine(new lime_metaparser());
|
||||||
|
$scanner = new voodoo_scanner($path);
|
||||||
|
try {
|
||||||
|
# The result of parsing a Lime grammar is a Lime object.
|
||||||
|
$lime = $scanner->feed($parse_engine);
|
||||||
|
# Calling its build_parser() method gets the output PHP code.
|
||||||
|
return $lime->build_parser();
|
||||||
|
} catch (parse_error $e) {
|
||||||
|
die ($e->getMessage()." in $path line $scanner->lineno.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ($_SERVER['argv']) {
|
||||||
|
$code = '';
|
||||||
|
array_shift($_SERVER['argv']); # Strip out the program name.
|
||||||
|
foreach ($_SERVER['argv'] as $path) {
|
||||||
|
$code .= parse_lime_grammar($path);
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "<?php\n\n";
|
||||||
|
?>
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
DON'T EDIT THIS FILE!
|
||||||
|
|
||||||
|
This file was automatically generated by the Lime parser generator.
|
||||||
|
The real source code you should be looking at is in one or more
|
||||||
|
grammar files in the Lime format.
|
||||||
|
|
||||||
|
THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar
|
||||||
|
file that your error happened, because there are enough comments to
|
||||||
|
help you debug your grammar.
|
||||||
|
|
||||||
|
If you ignore this warning, you're shooting yourself in the brain,
|
||||||
|
not the foot.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
<?
|
||||||
|
echo $code;
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,122 @@
|
||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Library General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
%{
|
||||||
|
void out(char*t, char*v);
|
||||||
|
void lit();
|
||||||
|
void tok(char*t);
|
||||||
|
void php();
|
||||||
|
%}
|
||||||
|
|
||||||
|
%option stack
|
||||||
|
%option yylineno
|
||||||
|
%option main
|
||||||
|
|
||||||
|
%x code
|
||||||
|
%x dquote
|
||||||
|
%x squote
|
||||||
|
|
||||||
|
CHAR \n|.
|
||||||
|
|
||||||
|
ALPHA [a-zA-Z]
|
||||||
|
DIGIT [0-9]
|
||||||
|
ALNUM {ALPHA}|{DIGIT}
|
||||||
|
WORD {ALNUM}|_
|
||||||
|
STOP "."
|
||||||
|
|
||||||
|
SYM {ALPHA}{WORD}*'*
|
||||||
|
LIT '.'
|
||||||
|
|
||||||
|
ESC "\"{CHAR}
|
||||||
|
SCHAR [^\']|ESC
|
||||||
|
DCHAR [^\"]|ESC
|
||||||
|
COM "//"|"#"
|
||||||
|
|
||||||
|
CC [^*\n]
|
||||||
|
CX "*"+{CC}+
|
||||||
|
CT "*"+"/"
|
||||||
|
BLOCKCMT "/*"({CC}|{CX})*{CT}
|
||||||
|
|
||||||
|
%x pragma
|
||||||
|
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[[:space:]]+ {}
|
||||||
|
#.* {}
|
||||||
|
|
||||||
|
{STOP} out("stop", ".");
|
||||||
|
{SYM} tok("sym");
|
||||||
|
{LIT} tok("lit");
|
||||||
|
{BLOCKCMT} {}
|
||||||
|
"/"{WORD}+ |
|
||||||
|
"/$" out("lambda", yytext+1);
|
||||||
|
"%"{WORD}+ {
|
||||||
|
out("pragma", yytext+1);
|
||||||
|
yy_push_state(pragma);
|
||||||
|
}
|
||||||
|
|
||||||
|
<*>"{" {
|
||||||
|
lit();
|
||||||
|
yy_push_state(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
. lit();
|
||||||
|
|
||||||
|
|
||||||
|
<pragma>{
|
||||||
|
\n {
|
||||||
|
out("stop", ".");
|
||||||
|
yy_pop_state();
|
||||||
|
}
|
||||||
|
[[:space:]] {}
|
||||||
|
{SYM} tok("sym");
|
||||||
|
{LIT} tok("lit");
|
||||||
|
. lit();
|
||||||
|
}
|
||||||
|
|
||||||
|
<code>{
|
||||||
|
"}" {
|
||||||
|
lit();
|
||||||
|
yy_pop_state();
|
||||||
|
}
|
||||||
|
'{SCHAR}*' php();
|
||||||
|
\"{DCHAR}*\" php();
|
||||||
|
{COM}.* php();
|
||||||
|
{BLOCKCMT} php();
|
||||||
|
[^{}'"#/]+ php();
|
||||||
|
. php();
|
||||||
|
}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
void lit() {
|
||||||
|
char lit[] = "'.'";
|
||||||
|
lit[1] = *yytext;
|
||||||
|
out(lit, yytext);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tok(char*t) {
|
||||||
|
out(t, yytext);
|
||||||
|
}
|
||||||
|
|
||||||
|
void php() {
|
||||||
|
out("php", yytext);
|
||||||
|
}
|
||||||
|
|
||||||
|
void out(char*type, char*value) {
|
||||||
|
printf("%d\001%s\001%s", yylineno, type, value);
|
||||||
|
fputc(0, stdout);
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
# This is the grammar for all other grammar files that will work with the
|
||||||
|
# Lime LALR(1) Context-Free Grammar Parser Generator.
|
||||||
|
# You can read this to get an idea how things work, but this file is not
|
||||||
|
# actually used in the system. Rather, it's an implementation guide for the
|
||||||
|
# file "lime.bootstrap".
|
||||||
|
|
||||||
|
%class lime_metaparser
|
||||||
|
%start grammar
|
||||||
|
|
||||||
|
grammar
|
||||||
|
= {$$ = new lime();}
|
||||||
|
| grammar/$ pragma/p toklist/t stop {$$->pragma($p, $t);}
|
||||||
|
| grammar/$ rewrite/r stop {$r->update($$);}
|
||||||
|
.
|
||||||
|
|
||||||
|
rewrite
|
||||||
|
= sym/s '=' rhs/r {$$ = new lime_rewrite($s); $$->add_rhs($r);}
|
||||||
|
| rewrite/$ '|' rhs/r {$$->add_rhs($r);}
|
||||||
|
.
|
||||||
|
|
||||||
|
slot
|
||||||
|
= action/a {$$ = new lime_action($a, NULL);}
|
||||||
|
| action/a lambda/l {$$ = new lime_action($a, $l);}
|
||||||
|
| sym/s {$$ = new lime_glyph($s, NULL);}
|
||||||
|
| sym/s lambda/l {$$ = new lime_glyph($s, $l);}
|
||||||
|
| lit/l {$$ = new lime_glyph($l, NULL);}
|
||||||
|
.
|
||||||
|
|
||||||
|
rhs
|
||||||
|
= {$$ = new lime_rhs();}
|
||||||
|
| rhs/$ slot/s {$$->add($s);}
|
||||||
|
.
|
||||||
|
|
||||||
|
action = '{' code/$ '}' .
|
||||||
|
|
||||||
|
toklist = {$$=array();}
|
||||||
|
| toklist/$ sym/s {$$[] = $s;}
|
||||||
|
| toklist/$ lit/l {$$[] = $l;}
|
||||||
|
.
|
||||||
|
|
||||||
|
code = {}
|
||||||
|
| code/$ php/p {$$.=$p;}
|
||||||
|
| code/$ '{' code/c '}' {$$.='{'.$c.'}';}
|
||||||
|
.
|
|
@ -0,0 +1,252 @@
|
||||||
|
<?php
|
||||||
|
/*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Library General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
define('LIME_CALL_PROTOCOL', '$tokens, &$result');
|
||||||
|
|
||||||
|
abstract class lime_parser {
|
||||||
|
}
|
||||||
|
|
||||||
|
class parse_error extends Exception {} # If this happens, the input doesn't match the grammar.
|
||||||
|
class parse_bug extends Exception {} # If this happens, I made a mistake.
|
||||||
|
|
||||||
|
class parse_unexpected_token extends parse_error {
|
||||||
|
function __construct($type, $state) {
|
||||||
|
parent::__construct("Unexpected token of type ($type)");
|
||||||
|
$this->type = $type;
|
||||||
|
$this->state = $state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class parse_premature_eof extends parse_error {
|
||||||
|
function __construct() {
|
||||||
|
parent::__construct("Premature EOF");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class parse_stack {
|
||||||
|
function __construct($qi) {
|
||||||
|
$this->q = $qi;
|
||||||
|
$this->qs = array();
|
||||||
|
$this->ss = array();
|
||||||
|
}
|
||||||
|
function shift($q, $semantic) {
|
||||||
|
$this->ss[] = $semantic;
|
||||||
|
$this->qs[] = $this->q;
|
||||||
|
$this->q = $q;
|
||||||
|
# echo "Shift $q -- $semantic<br/>\n";
|
||||||
|
}
|
||||||
|
function top_n($n) {
|
||||||
|
if (!$n) return array();
|
||||||
|
return array_slice($this->ss, 0-$n);
|
||||||
|
}
|
||||||
|
function pop_n($n) {
|
||||||
|
if (!$n) return array();
|
||||||
|
$qq = array_splice($this->qs, 0-$n);
|
||||||
|
$this->q = $qq[0];
|
||||||
|
return array_splice($this->ss, 0-$n);
|
||||||
|
}
|
||||||
|
function occupied() { return !empty($this->ss); }
|
||||||
|
function index($n) {
|
||||||
|
if ($n) $this->q = $this->qs[count($this->qs)-$n];
|
||||||
|
}
|
||||||
|
function text() {
|
||||||
|
return $this->q." : ".implode(' . ', array_reverse($this->qs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class parse_engine {
|
||||||
|
function __construct($parser) {
|
||||||
|
$this->parser = $parser;
|
||||||
|
$this->qi = $parser->qi;
|
||||||
|
$this->rule = $parser->a;
|
||||||
|
$this->step = $parser->i;
|
||||||
|
#$this->prepare_callables();
|
||||||
|
$this->reset();
|
||||||
|
#$this->debug = false;
|
||||||
|
}
|
||||||
|
function reset() {
|
||||||
|
$this->accept = false;
|
||||||
|
$this->stack = new parse_stack($this->qi);
|
||||||
|
}
|
||||||
|
private function enter_error_tolerant_state() {
|
||||||
|
while ($this->stack->occupied()) {
|
||||||
|
if ($this->has_step_for('error')) return true;
|
||||||
|
$this->drop();
|
||||||
|
};
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
private function drop() { $this->stack->pop_n(1); }
|
||||||
|
function eat_eof() {
|
||||||
|
{/*
|
||||||
|
|
||||||
|
So that I don't get any brilliant misguided ideas:
|
||||||
|
|
||||||
|
The "accept" step happens when we try to eat a start symbol.
|
||||||
|
That happens because the reductions up the stack at the end
|
||||||
|
finally (and symetrically) tell the parser to eat a symbol
|
||||||
|
representing what they've just shifted off the end of the stack
|
||||||
|
and reduced. However, that doesn't put the parser into any
|
||||||
|
special different state. Therefore, it's back at the start
|
||||||
|
state.
|
||||||
|
|
||||||
|
That being said, the parser is ready to reduce an EOF to the
|
||||||
|
empty program, if given a grammar that allows them.
|
||||||
|
|
||||||
|
So anyway, if you literally tell the parser to eat an EOF
|
||||||
|
symbol, then after it's done reducing and accepting the prior
|
||||||
|
program, it's going to think it has another symbol to deal with.
|
||||||
|
That is the EOF symbol, which means to reduce the empty program,
|
||||||
|
accept it, and then continue trying to eat the terminal EOF.
|
||||||
|
|
||||||
|
This infinte loop quickly runs out of memory.
|
||||||
|
|
||||||
|
That's why the real EOF algorithm doesn't try to pretend that
|
||||||
|
EOF is a terminal. Like the invented start symbol, it's special.
|
||||||
|
|
||||||
|
Instead, we pretend to want to eat EOF, but never actually
|
||||||
|
try to get it into the parse stack. (It won't fit.) In short,
|
||||||
|
we look up what reduction is indicated at each step in the
|
||||||
|
process of rolling up the parse stack.
|
||||||
|
|
||||||
|
The repetition is because one reduction is not guaranteed to
|
||||||
|
cascade into another and clean up the entire parse stack.
|
||||||
|
Rather, it will instead shift each partial production as it
|
||||||
|
is forced to completion by the EOF lookahead.
|
||||||
|
*/}
|
||||||
|
|
||||||
|
# We must reduce as if having read the EOF symbol
|
||||||
|
do {
|
||||||
|
# and we have to try at least once, because if nothing
|
||||||
|
# has ever been shifted, then the stack will be empty
|
||||||
|
# at the start.
|
||||||
|
list($opcode, $operand) = $this->step_for('#');
|
||||||
|
switch ($opcode) {
|
||||||
|
case 'r': $this->reduce($operand); break;
|
||||||
|
case 'e': $this->premature_eof(); break;
|
||||||
|
default: throw new parse_bug(); break;
|
||||||
|
}
|
||||||
|
} while ($this->stack->occupied());
|
||||||
|
{/*
|
||||||
|
If the sentence is well-formed according to the grammar, then
|
||||||
|
this will eventually result in eating a start symbol, which
|
||||||
|
causes the "accept" instruction to fire. Otherwise, the
|
||||||
|
step('#') method will indicate an error in the syntax, which
|
||||||
|
here means a premature EOF.
|
||||||
|
|
||||||
|
Incedentally, some tremendous amount of voodoo with the parse
|
||||||
|
stack might help find the beginning of some unfinished
|
||||||
|
production that the sentence was cut off during, but as a
|
||||||
|
general rule that would require deeper knowledge.
|
||||||
|
*/}
|
||||||
|
if (!$this->accept) throw new parse_bug();
|
||||||
|
return $this->semantic;
|
||||||
|
}
|
||||||
|
private function premature_eof() {
|
||||||
|
$seen = array();
|
||||||
|
while ($this->enter_error_tolerant_state()) {
|
||||||
|
if (isset($seen[$this->state()])) {
|
||||||
|
// This means that it's pointless to try here.
|
||||||
|
// We're guaranteed that the stack is occupied.
|
||||||
|
$this->drop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$seen[$this->state()] = true;
|
||||||
|
|
||||||
|
$this->eat('error', NULL);
|
||||||
|
if ($this->has_step_for('#')) {
|
||||||
|
// Good. We can continue as normal.
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// That attempt to resolve the error condition
|
||||||
|
// did not work. There's no point trying to
|
||||||
|
// figure out how much to slice off the stack.
|
||||||
|
// The rest of the algorithm will make it happen.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new parse_premature_eof();
|
||||||
|
}
|
||||||
|
private function current_row() { return $this->step[$this->state()]; }
|
||||||
|
private function step_for($type) {
|
||||||
|
$row = $this->current_row();
|
||||||
|
if (!isset($row[$type])) return array('e', $this->stack->q);
|
||||||
|
return explode(' ', $row[$type]);
|
||||||
|
}
|
||||||
|
private function has_step_for($type) {
|
||||||
|
$row = $this->current_row();
|
||||||
|
return isset($row[$type]);
|
||||||
|
}
|
||||||
|
private function state() { return $this->stack->q; }
|
||||||
|
function eat($type, $semantic) {
|
||||||
|
# assert('$type == trim($type)');
|
||||||
|
# if ($this->debug) echo "Trying to eat a ($type)\n";
|
||||||
|
list($opcode, $operand) = $this->step_for($type);
|
||||||
|
switch ($opcode) {
|
||||||
|
case 's':
|
||||||
|
# if ($this->debug) echo "shift $type to state $operand\n";
|
||||||
|
$this->stack->shift($operand, $semantic);
|
||||||
|
# echo $this->stack->text()." shift $type<br/>\n";
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'r':
|
||||||
|
$this->reduce($operand);
|
||||||
|
$this->eat($type, $semantic);
|
||||||
|
# Yes, this is tail-recursive. It's also the simplest way.
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'a':
|
||||||
|
if ($this->stack->occupied()) throw new parse_bug('Accept should happen with empty stack.');
|
||||||
|
$this->accept = true;
|
||||||
|
#if ($this->debug) echo ("Accept\n\n");
|
||||||
|
$this->semantic = $semantic;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'e':
|
||||||
|
# This is thought to be the uncommon, exceptional path, so
|
||||||
|
# it's OK that this algorithm will cause the stack to
|
||||||
|
# flutter while the parse engine waits for an edible token.
|
||||||
|
# if ($this->debug) echo "($type) causes a problem.\n";
|
||||||
|
if ($this->enter_error_tolerant_state()) {
|
||||||
|
$this->eat('error', NULL);
|
||||||
|
if ($this->has_step_for($type)) $this->eat($type, $semantic);
|
||||||
|
} else {
|
||||||
|
# If that didn't work, give up:
|
||||||
|
throw new parse_error("Parse Error: ($type)($semantic) not expected");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new parse_bug("Bad parse table instruction ".htmlspecialchars($opcode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private function reduce($rule_id) {
|
||||||
|
$rule = $this->rule[$rule_id];
|
||||||
|
$len = $rule['len'];
|
||||||
|
$semantic = $this->perform_action($rule_id, $this->stack->top_n($len));
|
||||||
|
#echo $semantic.br();
|
||||||
|
if ($rule['replace']) $this->stack->pop_n($len);
|
||||||
|
else $this->stack->index($len);
|
||||||
|
$this->eat($rule['symbol'], $semantic);
|
||||||
|
}
|
||||||
|
private function perform_action($rule_id, $slice) {
|
||||||
|
# we have this weird calling convention....
|
||||||
|
$result = null;
|
||||||
|
$method = $this->parser->method[$rule_id];
|
||||||
|
#if ($this->debug) echo "rule $id: $method\n";
|
||||||
|
$this->parser->$method($slice, $result);
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
File: set.so.php
|
||||||
|
License: GPL
|
||||||
|
Purpose: We should really have a "set" data type. It's too useful.
|
||||||
|
*/
|
||||||
|
|
||||||
|
class set {
|
||||||
|
function __construct($list=array()) { $this->data = array_count_values($list); }
|
||||||
|
function has($item) { return isset($this->data[$item]); }
|
||||||
|
function add($item) { $this->data[$item] = true; }
|
||||||
|
function del($item) { unset($this->data[$item]); return $item;}
|
||||||
|
function all() { return array_keys($this->data); }
|
||||||
|
function one() { return key($this->data); }
|
||||||
|
function count() { return count($this->data); }
|
||||||
|
function pop() { return $this->del($this->one()); }
|
||||||
|
function union($that) {
|
||||||
|
$progress = false;
|
||||||
|
foreach ($that->all() as $item) if (!$this->has($item)) {
|
||||||
|
$this->add($item);
|
||||||
|
$progress = true;
|
||||||
|
}
|
||||||
|
return $progress;
|
||||||
|
}
|
||||||
|
function text() {
|
||||||
|
return ' { '.implode(' ', $this->all()).' } ';
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue