#include "Lpars.h" #include #include /* This file contains the function mylex() which recognizes the following * tokens : * EOFILE * C_INSTR - 'C_loc', 'C_lol', etc. * DEF_C_INSTR - 'C_loe..', 'C_ste..', '..icon, '..fcon', etc * CONDITION - C-expression, for example: '$1 == 481' * ARROW - '==>' * EQUIV - '::=' * CALL - C-style functioncall, for example: 'error( 17)' * ASSEM_INSTR - C-style string, for example: '"mov r0, (r1)"' * DEFAULT - 'default' * ERROR - An error occured in one of the tokens. * * If the input matches non of these tokens the next character will be returned. * * Besides mylex() the following variable is exported : * * char yytext[]; - Contains the string representation of the current * token. * char *next; - Points to the first free position in yytext[]. */ #define YYTEXT 256 char yytext[YYTEXT], /* string-buffer for the token */ *next; /* points to the first free posistion in yytext[] */ extern char scanc(); #define FALSE 0 #define TRUE 1 int CD_pos = FALSE; /* 'CD_pos' is used as a flag to signal if it is * possible to match a CONDITION or DEFAULT-token at * this moment. Thus mylex() knows about the grammar * of the "EM_table"!! * This flag is needed because CALL is a subset of * CONDITION. */ int mylex() { char c, skip_space(); static int special = FALSE; /* rule with conditions + default ? */ next = yytext; c = *next++ = skip_space(); switch ( c) { case EOF : next = yytext; return( 0); case '"' : read_string(); return( ASSEM_INSTR); case '.' : c = scanc(); backc( c); if ( c != '.') { /* Just a plain '.', not something like * '..icon'. */ if ( special) CD_pos = TRUE; return( '.'); } break; case ';' : return( ';'); case '=' : if ( arrow()) { CD_pos = FALSE; return( ARROW); } break; case ':' : if ( equiv()) { CD_pos = FALSE; return( EQUIV); } break; case 'd' : if ( CD_pos && _default()) { CD_pos = FALSE; special = FALSE; return( DEFAULT); } break; } /* Possible tokens at this place : CONDITION, CALL, C_INSTR, * DEF_C_INSTR */ if ( CD_pos) { read_condition(); CD_pos = FALSE; special = TRUE; return( CONDITION); } if ( isalpha( c)) { read_ident(); c = skip_space(); if ( c == '(') { *next++ = c; read_call(); return( CALL); } else { backc( c); if ( is_DEF_C_INSTR( yytext)) { CD_pos = TRUE; return( DEF_C_INSTR); } if ( is_C_INSTR( yytext)) { CD_pos = TRUE; return( C_INSTR); } return( ERROR); } } if ( c == '.') { c = scanc(); if ( c == '.') { *next++ = '.'; read_ident(); if ( is_DEF_C_INSTR( yytext)) { CD_pos = TRUE; return( DEF_C_INSTR); } return( ERROR); } else { backc( c); return( '.'); } } return( c); } static int isletter( c) char c; { return( isalpha( c) || isdigit( c) || c == '_'); } static char skip_space() { char c; while ( isspace( c = scanc())) ; return( c); } /* first character has been read */ static read_string() /* match something like "mov r0, (r1)". * strip the double quotes off! Inside a string, the character '"' must * be preceded by a '\'. */ { next--; while( ( *next = scanc()) != '"' || *(next-1) == '\\') next++; } int arrow() /* '==>' */ { if ( ( *next++ = scanc()) == '=') if ( ( *next++ = scanc()) == '>') return( TRUE); else backc( *--next); else backc( *--next); return( FALSE); } int equiv() /* '::=' */ { if ( ( *next++ = scanc()) == ':') if ( ( *next++ = scanc()) == '=') return( TRUE); else backc( *--next); else backc( *--next); return( FALSE); } int _default() /* 'default' */ { char c, skip_space(); if ( ( *next++ = scanc()) == 'e') if ( ( *next++ = scanc()) == 'f') if ( ( *next++ = scanc()) == 'a') if ( ( *next++ = scanc()) == 'u') if ( ( *next++ = scanc()) == 'l') if ( ( *next++ = scanc()) == 't') if ( !isletter( c = skip_space())) { backc( c); return( TRUE); } else backc( c); else backc( *--next); else backc( *--next); else backc( *--next); else backc( *--next); else backc( *--next); else backc( *--next); return( FALSE); } read_ident() { char c; while ( isletter( c = scanc())) *next++ = c; backc( c); } read_call() { int n = 1; while ( TRUE) switch( *next++ = scanc()) { case EOF : return; case '(' : n++; break; case ')' : n--; if ( n == 0) return; break; } } read_condition() /* A CONDITION is followed by either '==>' or '::='. */ { while ( TRUE) { switch ( *next++ = scanc()) { case EOF : return; case '=' : if ( arrow()) { backc( '>'); backc( '='); backc( '='); next -= 3; return; } break; case ':' : if ( equiv()) { backc( '='); backc( ':'); backc( ':'); next -= 3; return; } break; } } } is_C_INSTR( str) char *str; { if ( *str == 'C' && *(str+1) == '_') /* C_xxx */ return( TRUE); else if ( strncmp( "locals", str, 6) == 0) return( TRUE); else if ( strncmp( "jump", str, 4) == 0) return( TRUE); else if ( strncmp( "prolog", str, 6) == 0) return( TRUE); else return( FALSE); } is_DEF_C_INSTR( str) char *str; /* yytext[] contains either '..[letter]*' ( 2 dots possibly followed by an * identifer) * or '[letter]+' ( just an identifier) * Try to match something like 'C_loe..' or '..icon' */ { if ( *str == '.' && *(str+1) == '.') return( next > yytext+1); if ( ( *next++ = scanc()) == '.') if ( ( *next++ = scanc()) == '.') return( next > yytext+1); else backc( *--next); else backc( *--next); return( FALSE); }