ack/util/ceg/EM_parser/common/mylex.c

331 lines
5.9 KiB
C

#include "Lpars.h"
#include <stdio.h>
#include <ctype.h>
/* This file contains the function mylex() which recognizes the following
* tokens :
* EOFILE
* C_INSTR - 'C_loc', 'C_lol', etc.
* DEF_C_INSTR - 'C_loe..', 'C_ste..', '..icon, '..fcon', etc
* CONDITION - C-expression, for example: '$1 == 481'
* ARROW - '==>'
* EQUIV - '::='
* CALL - C-style functioncall, for example: 'error( 17)'
* ASSEM_INSTR - C-style string, for example: '"mov r0, (r1)"'
* DEFAULT - 'default'
* ERROR - An error occured in one of the tokens.
*
* If the input matches non of these tokens the next character will be returned.
*
* Besides mylex() the following variable is exported :
*
* char yytext[]; - Contains the string representation of the current
* token.
* char *next; - Points to the first free position in yytext[].
*/
#define YYTEXT 256
char yytext[YYTEXT], /* string-buffer for the token */
*next; /* points to the first free posistion in yytext[] */
extern char scanc();
#define FALSE 0
#define TRUE 1
int CD_pos = FALSE; /* 'CD_pos' is used as a flag to signal if it is
* possible to match a CONDITION or DEFAULT-token at
* this moment. Thus mylex() knows about the grammar
* of the "EM_table"!!
* This flag is needed because CALL is a subset of
* CONDITION.
*/
int CALL_pos = FALSE; /* Needed to distinguish between
* C_INSTR CONDITION and CALL
*/
int mylex()
{
char c, skip_space();
static int special = FALSE; /* rule with conditions + default ? */
next = yytext;
c = *next++ = skip_space();
switch ( c) {
case EOF : next = yytext;
return( 0);
case '"' : read_string();
return( ASSEM_INSTR);
case '.' : c = scanc();
backc( c);
if ( c != '.') { /* Just a plain '.', not something like
* '..icon'.
*/
if ( special)
CD_pos = TRUE;
CALL_pos = FALSE;
return( '.');
}
break;
case ';' : return( ';');
case '=' : if ( arrow()) {
CD_pos = FALSE;
CALL_pos = TRUE;
return( ARROW);
}
break;
case ':' : if ( equiv()) {
CD_pos = FALSE;
CALL_pos = TRUE;
return( EQUIV);
}
break;
case 'd' : if ( CD_pos && _default()) {
CD_pos = FALSE;
special = FALSE;
return( DEFAULT);
}
break;
}
/* Possible tokens at this place : CONDITION, CALL, C_INSTR,
* DEF_C_INSTR
*/
if ( CD_pos) {
read_condition();
CD_pos = FALSE;
special = TRUE;
return( CONDITION);
}
if ( isalpha( c)) {
read_ident();
if ( CALL_pos) {
c = skip_space();
if ( c == '(') {
*next++ = c;
read_call();
return( CALL);
}
else {
backc( c);
return( ERROR);
}
}
else {
if ( is_DEF_C_INSTR( yytext)) {
CD_pos = TRUE;
return( DEF_C_INSTR);
}
if ( is_C_INSTR( yytext)) {
CD_pos = TRUE;
return( C_INSTR);
}
return( ERROR);
}
}
if ( c == '.') {
c = scanc();
if ( c == '.') {
*next++ = '.';
read_ident();
if ( is_DEF_C_INSTR( yytext)) {
CD_pos = TRUE;
return( DEF_C_INSTR);
}
return( ERROR);
}
else {
backc( c);
return( '.');
}
}
return( c);
}
static int isletter( c)
char c;
{
return( isalpha( c) || isdigit( c) || c == '_');
}
static char skip_space()
{
char c;
while ( isspace( c = scanc()))
;
return( c);
}
/* first character has been read */
static read_string()
/* match something like "mov r0, (r1)".
* strip the double quotes off! Inside a string, the character '"' must
* be preceded by a '\'.
*/
{
next--;
while( ( *next = scanc()) != '"' || *(next-1) == '\\')
next++;
}
int arrow() /* '==>' */
{
if ( ( *next++ = scanc()) == '=')
if ( ( *next++ = scanc()) == '>')
return( TRUE);
else
backc( *--next);
else
backc( *--next);
return( FALSE);
}
int equiv() /* '::=' */
{
if ( ( *next++ = scanc()) == ':')
if ( ( *next++ = scanc()) == '=')
return( TRUE);
else
backc( *--next);
else
backc( *--next);
return( FALSE);
}
int _default() /* 'default' */
{
char c, skip_space();
if ( ( *next++ = scanc()) == 'e')
if ( ( *next++ = scanc()) == 'f')
if ( ( *next++ = scanc()) == 'a')
if ( ( *next++ = scanc()) == 'u')
if ( ( *next++ = scanc()) == 'l')
if ( ( *next++ = scanc()) == 't')
if ( !isletter( c = skip_space())) {
backc( c);
return( TRUE);
}
else
backc( c);
else
backc( *--next);
else
backc( *--next);
else
backc( *--next);
else
backc( *--next);
else
backc( *--next);
else
backc( *--next);
return( FALSE);
}
read_ident()
{
char c;
while ( isletter( c = scanc()))
*next++ = c;
backc( c);
}
read_call()
{
int n = 1;
while ( TRUE)
switch( *next++ = scanc()) {
case EOF : return;
case '(' : n++;
break;
case ')' : n--;
if ( n == 0)
return;
break;
}
}
read_condition()
/* A CONDITION is followed by either '==>' or '::='.
*/
{
while ( TRUE) {
switch ( *next++ = scanc()) {
case EOF : return;
case '=' : if ( arrow()) {
backc( '>');
backc( '=');
backc( '=');
next -= 3;
return;
}
break;
case ':' : if ( equiv()) {
backc( '=');
backc( ':');
backc( ':');
next -= 3;
return;
}
break;
}
}
}
is_C_INSTR( str)
char *str;
{
if ( *str == 'C' && *(str+1) == '_') /* C_xxx */
return( TRUE);
else if ( strncmp( "locals", str, 6) == 0)
return( TRUE);
else if ( strncmp( "jump", str, 4) == 0)
return( TRUE);
else if ( strncmp( "prolog", str, 6) == 0)
return( TRUE);
else
return( FALSE);
}
is_DEF_C_INSTR( str)
char *str;
/* yytext[] contains either '..[letter]*' ( 2 dots possibly followed by an
* identifer) * or '[letter]+' ( just an identifier)
* Try to match something like 'C_loe..' or '..icon'
*/
{
if ( *str == '.' && *(str+1) == '.')
return( next > yytext+1);
if ( ( *next++ = scanc()) == '.')
if ( ( *next++ = scanc()) == '.')
return( next > yytext+1);
else
backc( *--next);
else
backc( *--next);
return( FALSE);
}