[squeak-dev] Question about a parser for Squeak using the Boost Spirit framework

Hernán Morales Durand hernan.morales at gmail.com
Fri Jul 10 07:55:35 UTC 2009


 Dear all,
  I wrote a little VC++ Parser using the Boost/Spirit Parser Framework [1]
based in the Squeak EBNF grammar, which I took from the swiki [2]. I wonder
if anyone here could tell what's the best way to run an automated test suite
against a parser for Smalltalk from outside an image.
  I want to isolate some expressions resulting in recursion loops for my
parser, I include the source below just in case anybody wants to take a
look. Sorry for the long e-mail or uncorrect mailing list.

[1] http://spirit.sourceforge.net/
[2] http://wiki.squeak.org/squeak/409

//   Requires -  Spirit version 1.6.0 or later
#define BOOST_SPIRIT_DEBUG  ///$$$ DEFINE THIS WHEN DEBUGGING $$$///

#include <boost/spirit/core.hpp>
#include <boost/spirit/symbols/symbols.hpp>
#include <boost/spirit/utility/lists.hpp>
#include <boost/spirit/utility/confix.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>

////////////////////////////////
using namespace boost::spirit;
using namespace std;
//--------------------------------------------
//  Start grammar definition
//--------------------------------------------
struct st_grammar :
    public grammar<st_grammar>
{
    template <typename ScannerT>
    struct definition
    {
        definition(st_grammar const& self)
        {
            chlit<>    SUM('+');
            chlit<> SUB('-');
            chlit<>    MOD('\\');
            chlit<>    DIV('/');
            chlit<>    MUL('*');
            chlit<>    NOT('~');
            chlit<>    LT('<');
            chlit<>    GT('>');
            chlit<>    EQ('=');
            chlit<>    AT('@');
            chlit<>    PC('%');
            chlit<>    COMMA(',');
            chlit<> CARET('^');

program = method | block | expression;

method = message_pattern
        >> !temporaries
        >> !primitive_declaration
        >> !statements;

message_pattern = unary_selector
        | binary_selector >> argument_name
        | +(keyword >> argument_name);

temporaries = confix_p('|', *variable_name, '|');

primitive_declaration = confix_p('<', str_p("primitive:") >> +digit_p, '>');

identifier = lexeme_d [ alpha_p >> *(alnum_p | digit_p) ];

binary_selector = SUM | SUB | MOD | DIV | MUL | NOT | LT | GT | EQ | AT | PC
| COMMA;
keyword = identifier >> ch_p(':');
argument_name = identifier;

block =
    confix_p('[',
    !(+(ch_p(':') >> argument_name) >> '|') >>
          *temporaries >> *statements, ']');

statements = *(expression >> ch_p('.'))
            >> !CARET
            >> expression
            >> !ch_p('.');

expression = *(variable_name >> assignment_op)
    >> ( cascaded_message_expression
    | message_expression
    | primary );

primary = confix_p('(', expression, ')')
        | block
        | literal
        | argument_name
        | variable_name;

variable_name = identifier;

literal = array_constant
        |  strng
        | character_constant
        | symbol_constant
        | number;

message_expression = keyword_expression
        | binary_expression
        | unary_expression;

unary_selector = identifier >> ~eps_p(':');
unary_expression = primary >> +unary_selector;
unary_object_description = unary_expression | primary;

binary_object_description = binary_expression
            | unary_object_description;

binary_expression = unary_object_description
        >> binary_selector
        >> unary_object_description;

// keyword_expression = binary_object_description % keyword;
keyword_expression = binary_object_description >> +(keyword >>
binary_object_description);

cascaded_message_expression = message_expression
    >> +(ch_p(';')
        >> ( unary_selector
        | binary_selector >> unary_object_description
        | +(keyword >> binary_object_description)));

character_constant = ch_p('$')
        >> (ch_p('\'')
        | ch_p('\"')
        | anychar_p);

array_constant = ch_p('#') >> array;
array = confix_p('(', *( number | symbol | strng | character_constant |
array), ')');

symbol_constant = ch_p('#') >> symbol;
symbol = identifier
        | binary_selector
        | +(keyword)
        | strng;

any_inumber = int_p | uint_p;
any_rnumber = real_p | ureal_p;

number
    = !any_inumber
    >> !ch_p('r')
    >> (any_inumber ^ any_rnumber)
    >> !(ch_p('e') >> (any_inumber));

strng = confix_p('\"', *(anychar_p - ch_p('\"')), '\"');

assignment_op = str_p(":=");

BOOST_SPIRIT_DEBUG_NODE(program);
BOOST_SPIRIT_DEBUG_NODE(method);
BOOST_SPIRIT_DEBUG_NODE(message_pattern);
BOOST_SPIRIT_DEBUG_NODE(temporaries);
BOOST_SPIRIT_DEBUG_NODE(primitive_declaration);
BOOST_SPIRIT_DEBUG_NODE(identifier);
BOOST_SPIRIT_DEBUG_NODE(variable_name);
BOOST_SPIRIT_DEBUG_NODE(unary_selector);
BOOST_SPIRIT_DEBUG_NODE(binary_selector);
BOOST_SPIRIT_DEBUG_NODE(keyword);
BOOST_SPIRIT_DEBUG_NODE(argument_name);
BOOST_SPIRIT_DEBUG_NODE(statements);
BOOST_SPIRIT_DEBUG_NODE(expression);
BOOST_SPIRIT_DEBUG_NODE(message_expression);
BOOST_SPIRIT_DEBUG_NODE(assignment_op);
BOOST_SPIRIT_DEBUG_NODE(primary);
BOOST_SPIRIT_DEBUG_NODE(cascaded_message_expression);
BOOST_SPIRIT_DEBUG_NODE(literal);
BOOST_SPIRIT_DEBUG_NODE(block);
BOOST_SPIRIT_DEBUG_NODE(message_expression);
BOOST_SPIRIT_DEBUG_NODE(unary_expression);
BOOST_SPIRIT_DEBUG_NODE(binary_expression);
BOOST_SPIRIT_DEBUG_NODE(keyword_expression);
BOOST_SPIRIT_DEBUG_NODE(binary_object_description);
BOOST_SPIRIT_DEBUG_NODE(unary_object_description);
BOOST_SPIRIT_DEBUG_NODE(strng);
BOOST_SPIRIT_DEBUG_NODE(number);
BOOST_SPIRIT_DEBUG_NODE(symbol);
BOOST_SPIRIT_DEBUG_NODE(symbol_constant);
BOOST_SPIRIT_DEBUG_NODE(array);
BOOST_SPIRIT_DEBUG_NODE(array_constant);
BOOST_SPIRIT_DEBUG_NODE(character_constant);
BOOST_SPIRIT_DEBUG_NODE(any_rnumber);
BOOST_SPIRIT_DEBUG_NODE(any_inumber);
};

rule<ScannerT> const& start() const { return program; }

rule<ScannerT>
  program, method, message_pattern, temporaries,
  primitive_declaration, identifier,
  variable_name, unary_selector, binary_selector, keyword, argument_name,
  statements, expression, message_expression, primary,
cascaded_message_expression,
  literal, block, assignment_op, unary_expression, binary_expression,
  keyword_expression, binary_object_description, unary_object_description,
  character_constant, array_constant, symbol_constant, array, symbol,
number, strng,
  any_inumber, any_rnumber;
    };
};

int main(int /*argc*/, char* /*argv[]*/) {
    cout << "Write code in just a line\n\n" ;

    st_grammar g;
    BOOST_SPIRIT_DEBUG_NODE(g);

    string str;
    while (getline(cin, str))
    {
        if (str[0] == 'z' || str[0] == 'Z' )
            break;

        if (parse(str.c_str(), g, space_p).full)
        {
            cout << "parsing succeeded\n";
        }
        else
        {
            cout << "parsing failed\n";
        }
    }
    cout << "Bye... :-) \n\n";
    return 0;
}

Cheers,

Hernán
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.squeakfoundation.org/pipermail/squeak-dev/attachments/20090710/65dfaaf4/attachment.htm


More information about the Squeak-dev mailing list