//
// An experimental "tiny C" grammer by
// Ian Kaplan
//


options {
	language="Cpp";
}

class MyTinyCParser extends Parser;

options {
        k = 2;
        exportVocab=MyTinyC;
	buildAST = true;
}

tokens {
	NULL_NODE; FUNC_LIST; FUNC; FUNC_HEAD; FORMAL_LIST; DECL;
        BLOCK;
}

// Function list
funclist
  : ( function_def )* EOF!
    { #funclist = #([FUNC_LIST, "func_list"], #funclist ); }
  ;

// Function definition
function_def
  : func_header block
    { #function_def = #([FUNC, "func_decl"], #function_def ); }
  ;

// function header (function type, name and argument list)
func_header
  : func_name_decl LPAREN! ( formal_list )? RPAREN!
  { #func_header = #( [FUNC_HEAD, "func_head"], #func_header ); }
  ;

// function type (which is optional) and function name
func_name_decl
  : IDENT
  { #func_name_decl = #( #func_name_decl, [NULL_NODE, "null_node"] ); } 
  | base_type IDENT^
  ;

// function formal argument list
formal_list
  : base_type IDENT ( COMMA! base_type IDENT )*
    { #formal_list = #([FORMAL_LIST, "formal_list"], #formal_list); }
  ;

// types
base_type
  : ( "char"^ | "int"^ )
  ;

statement
  : block
  | assignment_statement
  | if_stmt
  | for_loop
  | return_statement
  | SEMICOLON!
    { #statement = #([ NULL_NODE, "null_stmt"]); }
  ;


// a bracketed block
block
  : LCURL! (decl)* (statement)* RCURL!
  { #block = #( [BLOCK, "block"], #block ); }
  ;

// variable declaration list
decl
  : base_type IDENT ( COMMA! IDENT )* SEMICOLON!
    { #decl = #([DECL,"decl"], #decl); }
  ;


assignment_statement
  : assignment SEMICOLON!
  ;

if_stmt
  : "if"^ LPAREN! expr RPAREN! statement 
    ( ("else") => else_part
      | () // nothing 
     )
  ;

else_part
  : "else"^ statement
  ;

for_loop
  : "for"^ loop_cntrl statement
  ;

// loop control
loop_cntrl
  : LPAREN! loop_init loop_cond loop_incr RPAREN!
  ;

loop_init
  : SEMICOLON!
  { #loop_init = #([NULL_NODE, "null_init"]); }
  | assignment SEMICOLON!
  ;

loop_cond
  : SEMICOLON!
  { #loop_cond = #([NULL_NODE, "null_cond"]); }
  | expr SEMICOLON!
  ;

loop_incr
  : () // empry
  { #loop_incr = #([NULL_NODE, "null_incr"]); }
  | assignment
  ;

return_statement
  : "return"^ expr SEMICOLON!
  ;

primary_expr
  : IDENT 
  | constant 
  | (LPAREN! expr RPAREN! ) 
  ;

assignment
  : (IDENT ASSIGN^ )? expr
  ;

postfix_expr: primary_expr ( (LPAREN) => arg_list )?
            ;

boolneg_expr
  : ( "not"^ )* postfix_expr
  ;

sign_expr
  : boolneg_expr
  | MINUS^ boolneg_expr
  ;

mul_expr
  : sign_expr (( TIMES^ | DIVIDE^ | MOD^ ) sign_expr)*
  ;

add_expr
  : mul_expr ( ( PLUS^ | MINUS^ ) mul_expr )*
  ;

shift_expr
  : add_expr (( SHIFT_LEFT^ | SHIFT_RIGHT^ )  add_expr )*
  ;

rel_expr
  : shift_expr (( LTHAN^ | GTHAN^ | GEQ^ | LEQ^ ) shift_expr)*
  ;

eq_expr
  : rel_expr (( EQ^ | NEQ^ ) rel_expr)*
  ;

lmul_expr
  : eq_expr ( "and"^ eq_expr )*
  ;

expr
  : lmul_expr ("or"^ lmul_expr)*
  ;


arg_list
  : LPAREN! expr ( COMMA! expr )* RPAREN!
  ;

constant
  : (ICON | CHCON)
  ;


class MyTinyCLexer extends Lexer;

options {
	k = 2;
        exportVocab=MyTinyC;
}


WS_     :       (' '
        |       '\t'
        |       '\n'  { newline(); } // increment the line counter
        |       '\r')
                { _ttype = Token::SKIP; }
        ;

IDENT
options {
	paraphrase = "identifier";
}
  :  ('a'..'z' | 'A'..'Z' | '_' ) ( ('a'..'z' | 'A'..'Z' | '_') | ('0'..'9' ))*
  ;

ICON
options {
	paraphrase = "integer constant";
}
  : '0'..'9' ('0'..'9')*
  ;

CHCON
options {
	paraphrase = "character constant";
}
  : "'" '\0'..'\255' "'"
  ;

COMMA
options {
	paraphrase = ",";
}
  : ','
  ;

SEMICOLON
options {
	paraphrase = ";";
}
  : ';'
  ;

LPAREN
options {
	paraphrase = "(";
}
  : '('
  ;

RPAREN
options {
	paraphrase = ")";
}
  : ')'
  ;

LCURL
options {
	paraphrase = "{";
}
  : '{'
  ;

RCURL
options {
	paraphrase = "}";
}
  : '}'
  ;

PLUS
options {
	paraphrase = "+";
}
  : '+'
  ;

MINUS
options {
	paraphrase = "-";
}
  : '-'
  ;

TIMES
options {
	paraphrase = "*";
}
  : '*'
  ;

DIVIDE
options {
	paraphrase = "/";
}
  : '/'
  ;

MOD
options {
	paraphrase = "%";
}
  : '%'
  ;

ASSIGN
options {
	paraphrase = "=";
}
  : '='
  ;

EQ
options {
	paraphrase = "==";
}
  : "=="
  ;

NEQ
options {
	paraphrase = "!=";
}
  : "!="
  ;

LTHAN
options {
	paraphrase = "<";
}
  :  '<'
  ;

GTHAN
options {
	paraphrase = ">";
}
  :  '>'
  ;

LEQ
options {
	paraphrase = "<=";
}
  :  "<="
  ;

GEQ
options {
	paraphrase = ">=";
}
  :  ">="
  ;

SHIFT_LEFT
options {
	paraphrase = "<<";
}
  : "<<"
  ;

SHIFT_RIGHT
options {
	paraphrase = ">>";
}
  : ">>"
  ;

back to ANTLR examples page