# $Header: /cvs/src/jbofihe/action.perl,v 1.6 2001/05/09 22:06:55 richard Exp $
#
# Perl script to read through the bison grammar for lojban and insert
# simple actions, typically to build a nonterminal node.
#
#
# Copyright (C) Richard P. Curnow  1998-2001
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
# 
#
#

$do_nonterm = 1;
while ($_ = shift @ARGV) {
    if (/-nononterm/) {
        $do_nonterm = 0;
    }
}

while (<>) {
    print;
    if (/^\%\%/) { last; }
}

%nonterms=();

$max_nchildren = 0;

$first = 1;
@flags=();
@eli=();

%eli_codes = (
    BEhO => 137,
    BOI => 157,
    CU => 266,
    DOhU => 362,
    FEhU => 432,
    GEhU => 530,
    KEI => 717,
    KEhE => 723,
    KU => 756,
    KUhE => 765,
    KUhO => 767,
    LIhU => 838,
    LOhO => 851,
    LUhU => 866,
    MEhU => 922,
    NUhU => 1062,
    SEhU => 1314,
    TEhU => 1412,
    TOI => 1431,
    TUhU => 1454,
    VAU => 1489,
    VEhO => 1509 );

while (<>) {

    s=/\* ET ([A-Zh]+) \*/=?$1=g;

    if (/^[ \t]*([a-z0-9A-Z_]+)[ \t]*:(.*)$/) {
        if ($first) {
            # Special processing assumes the outermost rule has 1 child and appears
            # first in the file.  This rule is to match the complete input.
            print;
            print sprintf("{ top = \$1; }\n");
            $first = 0;

        } else {
            $nonterm=$1;
            $nonterm =~ y/a-z/A-Z/;
            $nonterms{$nonterm} = 1;
            $childs = $2;
            @children = split(/[ \t]+/,$childs);
            $nchildren = $#children;
            if ($nchildren < 0) { $nchildren = 0; }
            $n_live_children = 0;
            for ($i=1; $i<=$nchildren; $i++) {
                if ($children[$i] =~ /START_EK|START_GIHEK|START_GUHEK|START_JEK|START_JOIK|START_GEK|START_BAI|EK_KE|EK_BO|JEK_KE|JEK_BO|JOIK_KE|JOIK_BO|I_JEKJOIK|I_BO|GIHEK_KE|GIHEK_BO|NAhE_BO|NAhE_time|NAhE_space|NAhE_CAhA|NA_KU|NUMBER_MAI|NUMBER_MOI|NUMBER_ROI|EOF_MARK/) {
                    $flags[$i] = 0;
                } else {
                    $flags[$i] = 1;
                    $n_live_children++;
                }
            }

            if ($n_live_children > $max_nchildren) { $max_nchildren = $n_live_children; }

            print;
            print sprintf("{\$\$ = new_node_%d(%s", $n_live_children, $nonterm);
            for ($i = 1; $i<=$nchildren; $i++) {
                if ($flags[$i]) {
                    print sprintf(",\$%d", $i);
                }
            }
            print ");}\n";
        }

        # IF there are rules with 'error' in them, 2 things are assumed :
         # 1. they are not the first rule for a particular non-terminal
        # 2. they have their own action pre-supplied in the source file.
        # so we ignore them.

        # If /* ET XXX */ occurs anywhere, XXX is an elidable terminator

    } elsif  (/^[ \t]*\|(.*)$/ && !/ error / && !/ error$/) {
        $childs = $1;
        @children = split(/[ \t]+/,$childs);
        $nchildren = $#children;
        if ($nchildren < 0) { $nchildren = 0; }
        $n_live_children = 0;
        for ($i=1; $i<=$nchildren; $i++) {
            if ($children[$i] =~ /START_EK|START_GIHEK|START_GUHEK|START_JEK|START_JOIK|START_GEK|START_BAI|EK_KE|EK_BO|JEK_KE|JEK_BO|JOIK_KE|JOIK_BO|I_JEKJOIK|I_BO|GIHEK_KE|GIHEK_BO|NAhE_BO|NAhE_time|NAhE_space|NAhE_CAhA|NA_KU|NUMBER_MAI|NUMBER_MOI|NUMBER_ROI|EOF_MARK/) {
                $flags[$i] = 0;
            } elsif ($children[$i] =~ m{\?([A-Zh]+)}) {
                $flags[$i] = 2;
                $eli[$i] = $1;
                $n_live_children++;
            } else {
                $flags[$i] = 1;
                $n_live_children++;
            }
        }
        
        if ($n_live_children > $max_nchildren) { $max_nchildren = $n_live_children; }
        s/\?[A-Zh]+//g;
        print;
        print sprintf("{\$\$ = new_node_%d(%s", $n_live_children, $nonterm);
        for ($i = 1, $j = 1; $i<=$nchildren; $i++) {
            if ($flags[$i] == 1) {
                print sprintf(",\$%d", $j++);
            } elsif ($flags[$i] == 2) {
                print sprintf(",new_elidable(%d,%s)", $eli_codes{$eli[$i]}, $eli[$i]);
            } elsif ($flags[$i] == 0) {
                $j++; # skip markers
            }
        }
        print ");}\n";
    } else {
        print;
    }
}

$nonterms{"AUGMENTED"} = 1;

@nt = sort keys(%nonterms);

unless ($do_nonterm) { exit 0; }

open(NT, ">nonterm.h");
print NT "#ifndef NONTERM_H\n#define NONTERM_H\n\n/* max childen = $max_nchildren */\n\nextern char *nonterm_names[];\n\ntypedef enum {\n";
$first=1;
for $nt (@nt) {
    if (!$first) {
        print NT ",\n  ";
    } else {
        print NT "  ";
    }
    $first=0;
    print NT $nt;
}
print NT "\n} NonTerm;\n\n#endif\n";
close (NT);

open(NTC, ">nonterm.c");
print NTC "char *nonterm_names[] = {\n";
$first = 1;
for $nt (@nt) {
    if (!$first) {
        print NTC ",\n  ";
    } else {
        print NTC "  ";
    }
    $first=0;
    print NTC "\"".$nt."\"";
}
print NTC "};\n";
close (NTC);


