initial version
This commit is contained in:
parent
e7b7aa3944
commit
29b2e6e3d4
24
util/flex/.distr
Normal file
24
util/flex/.distr
Normal file
|
@ -0,0 +1,24 @@
|
|||
COPYING
|
||||
Changes
|
||||
Headers
|
||||
Makefile
|
||||
README
|
||||
ccl.c
|
||||
dfa.c
|
||||
ecs.c
|
||||
flex.1
|
||||
flex.skel
|
||||
flexdef.h
|
||||
flexdoc.1
|
||||
gen.c
|
||||
initscan.c
|
||||
libmain.c
|
||||
main.c
|
||||
makefile
|
||||
misc.c
|
||||
nfa.c
|
||||
parse.y
|
||||
scan.l
|
||||
sym.c
|
||||
tblcmp.c
|
||||
yylex.c
|
38
util/flex/COPYING
Normal file
38
util/flex/COPYING
Normal file
|
@ -0,0 +1,38 @@
|
|||
Flex carries the copyright used for BSD software, slightly modified
|
||||
because it originated at the Lawrence Berkeley (not Livermore!) Laboratory,
|
||||
which operates under a contract with the Department of Energy:
|
||||
|
||||
Copyright (c) 1990 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
|
||||
This code is derived from software contributed to Berkeley by
|
||||
Vern Paxson.
|
||||
|
||||
The United States Government has rights in this work pursuant
|
||||
to contract no. DE-AC03-76SF00098 between the United States
|
||||
Department of Energy and the University of California.
|
||||
|
||||
Redistribution and use in source and binary forms are permitted
|
||||
provided that: (1) source distributions retain this entire
|
||||
copyright notice and comment, and (2) distributions including
|
||||
binaries display the following acknowledgement: ``This product
|
||||
includes software developed by the University of California,
|
||||
Berkeley and its contributors'' in the documentation or other
|
||||
materials provided with the distribution and in all advertising
|
||||
materials mentioning features or use of this software. Neither the
|
||||
name of the University nor the names of its contributors may be
|
||||
used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE.
|
||||
|
||||
This basically says "do whatever you please with this software except
|
||||
remove this notice or take advantage of the University's (or the flex
|
||||
authors') name".
|
||||
|
||||
Note that the "flex.skel" scanner skeleton carries no copyright notice.
|
||||
You are free to do whatever you please with scanners generated using flex;
|
||||
for them, you are not even bound by the above copyright.
|
337
util/flex/Changes
Normal file
337
util/flex/Changes
Normal file
|
@ -0,0 +1,337 @@
|
|||
Changes between 2.3 Patch #6 (29Aug90) and 2.3 Patch #5:
|
||||
|
||||
- Fixed a serious bug in yymore() which basically made it
|
||||
completely broken. Thanks goes to Jean Christophe of
|
||||
the Nethack development team for finding the problem
|
||||
and passing along the fix.
|
||||
|
||||
|
||||
Changes between 2.3 Patch #5 (16Aug90) and 2.3 Patch #4:
|
||||
|
||||
- An up-to-date version of initscan.c so "make test" will
|
||||
work after applying the previous patches
|
||||
|
||||
|
||||
Changes between 2.3 Patch #4 (14Aug90) and 2.3 Patch #3:
|
||||
|
||||
- Fixed bug in hexadecimal escapes which allowed only digits,
|
||||
not letters, in escapes
|
||||
- Fixed bug in previous "Changes" file!
|
||||
|
||||
|
||||
Changes between 2.3 Patch #3 (03Aug90) and 2.3 Patch #2:
|
||||
|
||||
- Correction to patch #2 for gcc compilation; thanks goes to
|
||||
Paul Eggert for catching this.
|
||||
|
||||
|
||||
Changes between 2.3 Patch #2 (02Aug90) and original 2.3 release:
|
||||
|
||||
- Fixed (hopefully) headaches involving declaring malloc()
|
||||
and free() for gcc, which defines __STDC__ but (often) doesn't
|
||||
come with the standard include files such as <stdlib.h>.
|
||||
Reordered #ifdef maze in the scanner skeleton in the hope of
|
||||
getting the declarations right for cfront and g++, too.
|
||||
|
||||
- Note that this patch supercedes patch #1 for release 2.3,
|
||||
which was never announced but was available briefly for
|
||||
anonymous ftp.
|
||||
|
||||
|
||||
Changes between 2.3 (full) release of 28Jun90 and 2.2 (alpha) release:
|
||||
|
||||
User-visible:
|
||||
|
||||
- A lone <<EOF>> rule (that is, one which is not qualified with
|
||||
a list of start conditions) now specifies the EOF action for
|
||||
*all* start conditions which haven't already had <<EOF>> actions
|
||||
given. To specify an end-of-file action for just the initial
|
||||
state, use <INITIAL><<EOF>>.
|
||||
|
||||
- -d debug output is now contigent on the global yy_flex_debug
|
||||
being set to a non-zero value, which it is by default.
|
||||
|
||||
- A new macro, YY_USER_INIT, is provided for the user to specify
|
||||
initialization action to be taken on the first call to the
|
||||
scanner. This action is done before the scanner does its
|
||||
own initialization.
|
||||
|
||||
- yy_new_buffer() has been added as an alias for yy_create_buffer()
|
||||
|
||||
- Comments beginning with '#' and extending to the end of the line
|
||||
now work, but have been deprecated (in anticipation of making
|
||||
flex recognize #line directives).
|
||||
|
||||
- The funky restrictions on when semi-colons could follow the
|
||||
YY_NEW_FILE and yyless macros have been removed. They now
|
||||
behave identically to functions.
|
||||
|
||||
- A bug in the sample redefinition of YY_INPUT in the documentation
|
||||
has been corrected.
|
||||
|
||||
- A bug in the sample simple tokener in the documentation has
|
||||
been corrected.
|
||||
|
||||
- The documentation on the incompatibilities between flex and
|
||||
lex has been reordered so that the discussion of yylineno
|
||||
and input() come first, as it's anticipated that these will
|
||||
be the most common source of headaches.
|
||||
|
||||
|
||||
Things which didn't used to be documented but now are:
|
||||
|
||||
- flex interprets "^foo|bar" differently from lex. flex interprets
|
||||
it as "match either a 'foo' or a 'bar', providing it comes at the
|
||||
beginning of a line", whereas lex interprets it as "match either
|
||||
a 'foo' at the beginning of a line, or a 'bar' anywhere".
|
||||
|
||||
- flex initializes the global "yyin" on the first call to the
|
||||
scanner, while lex initializes it at compile-time.
|
||||
|
||||
- yy_switch_to_buffer() can be used in the yywrap() macro/routine.
|
||||
|
||||
- flex scanners do not use stdio for their input, and hence when
|
||||
writing an interactive scanner one must explictly call fflush()
|
||||
after writing out a prompt.
|
||||
|
||||
- flex scanner can be made reentrant (after a fashion) by using
|
||||
"yyrestart( yyin );". This is useful for interactive scanners
|
||||
which have interrupt handlers that long-jump out of the scanner.
|
||||
|
||||
- a defense of why yylineno is not supported is included, along
|
||||
with a suggestion on how to convert scanners which rely on it.
|
||||
|
||||
|
||||
Other changes:
|
||||
|
||||
- Prototypes and proper declarations of void routines have
|
||||
been added to the flex source code, courtesy of Kevin B. Kenny.
|
||||
|
||||
- Routines dealing with memory allocation now use void* pointers
|
||||
instead of char* - see Makefile for porting implications.
|
||||
|
||||
- Error-checking is now done when flex closes a file.
|
||||
|
||||
- Various lint tweaks were added to reduce the number of gripes.
|
||||
|
||||
- Makefile has been further parameterized to aid in porting.
|
||||
|
||||
- Support for SCO Unix added.
|
||||
|
||||
- Flex now sports the latest & greatest UC copyright notice
|
||||
(which is only slightly different from the previous one).
|
||||
|
||||
- A note has been added to flexdoc.1 mentioning work in progress
|
||||
on modifying flex to generate straight C code rather than a
|
||||
table-driven automaton, with an email address of whom to contact
|
||||
if you are working along similar lines.
|
||||
|
||||
|
||||
Changes between 2.2 Patch #3 (30Mar90) and 2.2 Patch #2:
|
||||
|
||||
- fixed bug which caused -I scanners to bomb
|
||||
|
||||
|
||||
Changes between 2.2 Patch #2 (27Mar90) and 2.2 Patch #1:
|
||||
|
||||
- fixed bug writing past end of input buffer in yyunput()
|
||||
- fixed bug detecting NUL's at the end of a buffer
|
||||
|
||||
|
||||
Changes between 2.2 Patch #1 (23Mar90) and 2.2 (alpha) release:
|
||||
|
||||
- Makefile fixes: definition of MAKE variable for systems
|
||||
which don't have it; installation of flexdoc.1 along with
|
||||
flex.1; fixed two bugs which could cause "bigtest" to fail.
|
||||
|
||||
- flex.skel fix for compiling with g++.
|
||||
|
||||
- README and flexdoc.1 no longer list an out-of-date BITNET address
|
||||
for contacting me.
|
||||
|
||||
- minor typos and formatting changes to flex.1 and flexdoc.1.
|
||||
|
||||
|
||||
Changes between 2.2 (alpha) release of March '90 and previous release:
|
||||
|
||||
User-visible:
|
||||
|
||||
- Full user documentation now available.
|
||||
|
||||
- Support for 8-bit scanners.
|
||||
|
||||
- Scanners now accept NUL's.
|
||||
|
||||
- A facility has been added for dealing with multiple
|
||||
input buffers.
|
||||
|
||||
- Two manual entries now. One which fully describes flex
|
||||
(rather than just its differences from lex), and the
|
||||
other for quick(er) reference.
|
||||
|
||||
- A number of changes to bring flex closer into compliance
|
||||
with the latest POSIX lex draft:
|
||||
|
||||
%t support
|
||||
flex now accepts multiple input files and concatenates
|
||||
them together to form its input
|
||||
previous -c (compress) flag renamed -C
|
||||
do-nothing -c and -n flags added
|
||||
Any indented code or code within %{}'s in section 2 is
|
||||
now copied to the output
|
||||
|
||||
- yyleng is now a bona fide global integer.
|
||||
|
||||
- -d debug information now gives the line number of the
|
||||
matched rule instead of which number rule it was from
|
||||
the beginning of the file.
|
||||
|
||||
- -v output now includes a summary of the flags used to generate
|
||||
the scanner.
|
||||
|
||||
- unput() and yyrestart() are now globally callable.
|
||||
|
||||
- yyrestart() no longer closes the previous value of yyin.
|
||||
|
||||
- C++ support; generated scanners can be compiled with C++ compiler.
|
||||
|
||||
- Primitive -lfl library added, containing default main()
|
||||
which calls yylex(). A number of routines currently living
|
||||
in the scanner skeleton will probably migrate to here
|
||||
in the future (in particular, yywrap() will probably cease
|
||||
to be a macro and instead be a function in the -lfl library).
|
||||
|
||||
- Hexadecimal (\x) escape sequences added.
|
||||
|
||||
- Support for MS-DOS, VMS, and Turbo-C integrated.
|
||||
|
||||
- The %used/%unused operators have been deprecated. They
|
||||
may go away soon.
|
||||
|
||||
|
||||
Other changes:
|
||||
|
||||
- Makefile enhanced for easier testing and installation.
|
||||
- The parser has been tweaked to detect some erroneous
|
||||
constructions which previously were missed.
|
||||
- Scanner input buffer overflow is now detected.
|
||||
- Bugs with missing "const" declarations fixed.
|
||||
- Out-of-date Minix/Atari patches provided.
|
||||
- Scanners no longer require printf() unless FLEX_DEBUG is being used.
|
||||
- A subtle input() bug has been fixed.
|
||||
- Line numbers for "continued action" rules (those following
|
||||
the special '|' action) are now correct.
|
||||
- unput() bug fixed; had been causing problems porting flex to VMS.
|
||||
- yymore() handling rewritten to fix bug with interaction
|
||||
between yymore() and trailing context.
|
||||
- EOF in actions now generates an error message.
|
||||
- Bug involving -CFe and generating equivalence classes fixed.
|
||||
- Bug which made -CF be treated as -Cf fixed.
|
||||
- Support for SysV tmpnam() added.
|
||||
- Unused #define's for scanner no longer generated.
|
||||
- Error messages which are associated with a particular input
|
||||
line are now all identified with their input line in standard
|
||||
format.
|
||||
- % directives which are valid to lex but not to flex are
|
||||
now ignored instead of generating warnings.
|
||||
- -DSYS_V flag can now also be specified -DUSG for System V
|
||||
compilation.
|
||||
|
||||
|
||||
Changes between 2.1 beta-test release of June '89 and previous release:
|
||||
|
||||
User-visible:
|
||||
|
||||
- -p flag generates a performance report to stderr. The report
|
||||
consists of comments regarding features of the scanner rules
|
||||
which result in slower scanners.
|
||||
|
||||
- -b flag generates backtracking information to lex.backtrack.
|
||||
This is a list of scanner states which require backtracking
|
||||
and the characters on which they do so. By adding rules
|
||||
one can remove backtracking states. If all backtracking states
|
||||
are eliminated, the generated scanner will run faster.
|
||||
Backtracking is not yet documented in the manual entry.
|
||||
|
||||
- Variable trailing context now works, i.e., one can have
|
||||
rules like "(foo)*/[ \t]*bletch". Some trailing context
|
||||
patterns still cannot be properly matched and generate
|
||||
error messages. These are patterns where the ending of the
|
||||
first part of the rule matches the beginning of the second
|
||||
part, such as "zx*/xy*", where the 'x*' matches the 'x' at
|
||||
the beginning of the trailing context. Lex won't get these
|
||||
patterns right either.
|
||||
|
||||
- Faster scanners.
|
||||
|
||||
- End-of-file rules. The special rule "<<EOF>>" indicates
|
||||
actions which are to be taken when an end-of-file is
|
||||
encountered and yywrap() returns non-zero (i.e., indicates
|
||||
no further files to process). See manual entry for example.
|
||||
|
||||
- The -r (reject used) flag is gone. flex now scans the input
|
||||
for occurrences of the string "REJECT" to determine if the
|
||||
action is needed. It tries to be intelligent about this but
|
||||
can be fooled. One can force the presence or absence of
|
||||
REJECT by adding a line in the first section of the form
|
||||
"%used REJECT" or "%unused REJECT".
|
||||
|
||||
- yymore() has been implemented. Similarly to REJECT, flex
|
||||
detects the use of yymore(), which can be overridden using
|
||||
"%used" or "%unused".
|
||||
|
||||
- Patterns like "x{0,3}" now work (i.e., with lower-limit == 0).
|
||||
|
||||
- Removed '\^x' for ctrl-x misfeature.
|
||||
|
||||
- Added '\a' and '\v' escape sequences.
|
||||
|
||||
- \<digits> now works for octal escape sequences; previously
|
||||
\0<digits> was required.
|
||||
|
||||
- Better error reporting; line numbers are associated with rules.
|
||||
|
||||
- yyleng is a macro; it cannot be accessed outside of the
|
||||
scanner source file.
|
||||
|
||||
- yytext and yyleng should not be modified within a flex action.
|
||||
|
||||
- Generated scanners #define the name FLEX_SCANNER.
|
||||
|
||||
- Rules are internally separated by YY_BREAK in lex.yy.c rather
|
||||
than break, to allow redefinition.
|
||||
|
||||
- The macro YY_USER_ACTION can be redefined to provide an action
|
||||
which is always executed prior to the matched rule's action.
|
||||
|
||||
- yyrestart() is a new action which can be used to restart
|
||||
the scanner after it has seen an end-of-file (a "real" one,
|
||||
that is, one for which yywrap() returned non-zero). It takes
|
||||
a FILE* argument indicating a new file to scan and sets
|
||||
things up so that a subsequent call to yylex() will start
|
||||
scanning that file.
|
||||
|
||||
- Internal scanner names all preceded by "yy_"
|
||||
|
||||
- lex.yy.c is deleted if errors are encountered during processing.
|
||||
|
||||
- Comments may be put in the first section of the input by preceding
|
||||
them with '#'.
|
||||
|
||||
|
||||
|
||||
Other changes:
|
||||
|
||||
- Some portability-related bugs fixed, in particular for machines
|
||||
with unsigned characters or sizeof( int* ) != sizeof( int ).
|
||||
Also, tweaks for VMS and Microsoft C (MS-DOS), and identifiers all
|
||||
trimmed to be 31 or fewer characters. Shortened file names
|
||||
for dinosaur OS's. Checks for allocating > 64K memory
|
||||
on 16 bit'ers. Amiga tweaks. Compiles using gcc on a Sun-3.
|
||||
- Compressed and fast scanner skeletons merged.
|
||||
- Skeleton header files done away with.
|
||||
- Generated scanner uses prototypes and "const" for __STDC__.
|
||||
- -DSV flag is now -DSYS_V for System V compilation.
|
||||
- Removed all references to FTL language.
|
||||
- Software now covered by BSD Copyright.
|
||||
- flex will replace lex in subsequent BSD releases.
|
25
util/flex/Headers
Normal file
25
util/flex/Headers
Normal file
|
@ -0,0 +1,25 @@
|
|||
This file contains the original RCS Headers. Unfortunately, RCS will destroy
|
||||
them as soon as we bring our version under RCS. This file lives under RCS as
|
||||
well, so all occurences of a $ followed by Header are changed into $header.
|
||||
|
||||
Makefile:# @(#) $header: /usr/fsys/odin/a/vern/flex/RCS/Makefile,v 2.9 90/05/26 17:28:44 vern Exp $ (LBL)
|
||||
README:// $header: /usr/fsys/odin/a/vern/flex/RCS/README,v 2.8 90/05/26 17:31:27 vern Exp $
|
||||
ccl.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/ccl.c,v 2.5 90/06/27 23:48:13 vern Exp $ (LBL)";
|
||||
dfa.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/dfa.c,v 2.7 90/06/27 23:48:15 vern Exp $ (LBL)";
|
||||
ecs.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/ecs.c,v 2.5 90/06/27 23:48:17 vern Exp $ (LBL)";
|
||||
flex.skel: * $header: /usr/fsys/odin/a/vern/flex/RCS/flex.skel,v 2.16 90/08/03 14:09:36 vern Exp $
|
||||
flexdef.h:/* @(#) $header: /usr/fsys/odin/a/vern/flex/RCS/flexdef.h,v 2.10 90/08/03 14:09:52 vern Exp $ (LBL) */
|
||||
gen.c: "@(#) $header: /usr/helios/u0/vern/flex/RCS/gen.c,v 2.10 90/08/29 12:11:13 vern Exp $ (LBL)";
|
||||
initscan.c: * $header: /usr/fsys/odin/a/vern/flex/RCS/flex.skel,v 2.16 90/08/03 14:09:36 vern Exp $
|
||||
initscan.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.9 90/06/27 23:48:34 vern Exp $ (LBL)";
|
||||
libmain.c:/* $header: /usr/fsys/odin/a/vern/flex/RCS/libmain.c,v 1.2 90/05/26 16:50:08 vern Exp $ */
|
||||
main.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/main.c,v 2.9 90/06/27 23:48:24 vern Exp $ (LBL)";
|
||||
misc.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/misc.c,v 2.9 90/08/14 00:10:24 vern Exp $ (LBL)";
|
||||
nfa.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/nfa.c,v 2.6 90/06/27 23:48:29 vern Exp $ (LBL)";
|
||||
parse.y: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/parse.y,v 2.7 90/06/27 23:48:31 vern Exp $ (LBL)";
|
||||
scan.c: * $header: /usr/fsys/odin/a/vern/flex/RCS/flex.skel,v 2.16 90/08/03 14:09:36 vern Exp $
|
||||
scan.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.9 90/06/27 23:48:34 vern Exp $ (LBL)";
|
||||
scan.l: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.9 90/06/27 23:48:34 vern Exp $ (LBL)";
|
||||
sym.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/sym.c,v 2.4 90/06/27 23:48:36 vern Exp $ (LBL)";
|
||||
tblcmp.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/tblcmp.c,v 2.5 90/06/27 23:48:38 vern Exp $ (LBL)";
|
||||
yylex.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/yylex.c,v 2.5 90/06/27 23:48:40 vern Exp $ (LBL)";
|
190
util/flex/Makefile
Normal file
190
util/flex/Makefile
Normal file
|
@ -0,0 +1,190 @@
|
|||
# make file for "flex" tool
|
||||
|
||||
# @(#) $Header$ (LBL)
|
||||
|
||||
# Porting considerations:
|
||||
#
|
||||
# For System V Unix machines, add -DUSG to CFLAGS (if it's not
|
||||
# automatically defined)
|
||||
# For Vax/VMS, add "-DVMS -DUSG" to CFLAGS.
|
||||
# For MS-DOS, add "-DMS_DOS -DUSG" to CFLAGS. Create \tmp if not present.
|
||||
# You will also want to rename flex.skel to something with a three
|
||||
# character extension, change SKELETON_FILE below appropriately,
|
||||
# See MSDOS.notes for more info.
|
||||
# For Amiga, add "-DAMIGA -DUSG" to CFLAGS.
|
||||
# For SCO Unix, add "-DSCO_UNIX" to CFLAGS.
|
||||
#
|
||||
# For C compilers which don't know about "void", add -Dvoid=int to CFLAGS.
|
||||
#
|
||||
# If your C compiler is ANSI standard but does not include the <stdlib.h>
|
||||
# header file (some installations of gcc have this problem), then add
|
||||
# -DDONT_HAVE_STDLIB_H to CFLAGS.
|
||||
#
|
||||
# By default, flex will be configured to generate 8-bit scanners only
|
||||
# if the -8 flag is given. If you want it to always generate 8-bit
|
||||
# scanners, add "-DDEFAULT_CSIZE=256" to CFLAGS. Note that doing
|
||||
# so will double the size of all uncompressed scanners.
|
||||
#
|
||||
# If on your system you have trouble building flex due to 8-bit
|
||||
# character problems, remove the -8 from FLEX_FLAGS and the
|
||||
# "#define FLEX_8_BIT_CHARS" from the beginning of flexdef.h.
|
||||
|
||||
|
||||
# the first time around use "make first_flex"
|
||||
|
||||
|
||||
# Installation targeting. Files will be installed under the tree rooted
|
||||
# at DESTDIR. User commands will be installed in BINDIR, library files
|
||||
# in LIBDIR (which will be created if necessary), auxiliary files in
|
||||
# AUXDIR, manual pages will be installed in MANDIR with extension MANEXT.
|
||||
# Raw, unformatted troff source will be installed if INSTALLMAN=man, nroff
|
||||
# preformatted versions will be installed if INSTALLMAN=cat.
|
||||
DESTDIR =
|
||||
BINDIR = /usr/local
|
||||
LIBDIR = /usr/local/lib
|
||||
AUXDIR = /usr/local/lib
|
||||
MANDIR = /usr/man/manl
|
||||
MANEXT = l
|
||||
INSTALLMAN = man
|
||||
|
||||
# MAKE = make
|
||||
|
||||
|
||||
SKELETON_FILE = $(DESTDIR)$(AUXDIR)/flex.skel
|
||||
SKELFLAGS = -DDEFAULT_SKELETON_FILE=\"$(SKELETON_FILE)\"
|
||||
CFLAGS = -O
|
||||
LDFLAGS = -s
|
||||
|
||||
COMPRESSION =
|
||||
FLEX_FLAGS = -ist8 -Sflex.skel
|
||||
# which "flex" to use to generate scan.c from scan.l
|
||||
FLEX = ./flex
|
||||
# CC = cc
|
||||
|
||||
AR = ar
|
||||
RANLIB = ranlib
|
||||
|
||||
FLEXOBJS = \
|
||||
ccl.o \
|
||||
dfa.o \
|
||||
ecs.o \
|
||||
gen.o \
|
||||
main.o \
|
||||
misc.o \
|
||||
nfa.o \
|
||||
parse.o \
|
||||
scan.o \
|
||||
sym.o \
|
||||
tblcmp.o \
|
||||
yylex.o
|
||||
|
||||
FLEX_C_SOURCES = \
|
||||
ccl.c \
|
||||
dfa.c \
|
||||
ecs.c \
|
||||
gen.c \
|
||||
main.c \
|
||||
misc.c \
|
||||
nfa.c \
|
||||
parse.c \
|
||||
scan.c \
|
||||
sym.c \
|
||||
tblcmp.c \
|
||||
yylex.c
|
||||
|
||||
FLEX_LIB_OBJS = \
|
||||
libmain.o
|
||||
|
||||
FLEXLIB = flexlib.a
|
||||
|
||||
|
||||
all : flex $(FLEXLIB)
|
||||
|
||||
flex : $(FLEXOBJS)
|
||||
$(CC) $(CFLAGS) -o flex $(LDFLAGS) $(FLEXOBJS)
|
||||
|
||||
first_flex:
|
||||
cp initscan.c scan.c
|
||||
$(MAKE) $(MFLAGS) flex
|
||||
|
||||
parse.h parse.c : parse.y
|
||||
$(YACC) -d parse.y
|
||||
@mv y.tab.c parse.c
|
||||
@mv y.tab.h parse.h
|
||||
|
||||
scan.c : scan.l
|
||||
$(FLEX) $(FLEX_FLAGS) $(COMPRESSION) scan.l >scan.c
|
||||
|
||||
scan.o : scan.c parse.h flexdef.h
|
||||
|
||||
main.o : main.c flexdef.h
|
||||
$(CC) $(CFLAGS) -c $(SKELFLAGS) main.c
|
||||
|
||||
ccl.o : ccl.c flexdef.h
|
||||
dfa.o : dfa.c flexdef.h
|
||||
ecs.o : ecs.c flexdef.h
|
||||
gen.o : gen.c flexdef.h
|
||||
misc.o : misc.c flexdef.h
|
||||
nfa.o : nfa.c flexdef.h
|
||||
parse.o : parse.c flexdef.h
|
||||
sym.o : sym.c flexdef.h
|
||||
tblcmp.o : tblcmp.c flexdef.h
|
||||
yylex.o : yylex.c flexdef.h
|
||||
|
||||
flex.man : flex.1
|
||||
nroff -man flex.1 >flex.man
|
||||
|
||||
$(FLEXLIB) : $(FLEX_LIB_OBJS)
|
||||
$(AR) cru $(FLEXLIB) $(FLEX_LIB_OBJS)
|
||||
|
||||
lint : $(FLEX_C_SOURCES)
|
||||
lint $(FLEX_C_SOURCES) > flex.lint
|
||||
|
||||
distrib :
|
||||
mv scan.c initscan.c
|
||||
chmod 444 initscan.c
|
||||
$(MAKE) $(MFLAGS) clean
|
||||
|
||||
install: flex $(DESTDIR)$(LIBDIR) flex.skel install.$(INSTALLMAN) install-lib
|
||||
install -s -m 755 flex $(DESTDIR)$(BINDIR)/flex
|
||||
install -c -m 644 flex.skel $(SKELETON_FILE)
|
||||
|
||||
install-lib: $(DESTDIR)$(LIBDIR) $(FLEXLIB)
|
||||
install -c -m 644 $(FLEXLIB) $(DESTDIR)$(LIBDIR)/libfl.a
|
||||
$(RANLIB) $(DESTDIR)$(LIBDIR)/libfl.a
|
||||
|
||||
$(DESTDIR)$(LIBDIR):
|
||||
mkdir $@
|
||||
|
||||
install.man: flex.1 flexdoc.1
|
||||
install -c -m 644 flex.1 $(DESTDIR)$(MANDIR)/flex.$(MANEXT)
|
||||
install -c -m 644 flexdoc.1 $(DESTDIR)$(MANDIR)/flexdoc.$(MANEXT)
|
||||
|
||||
install.cat: flex.1 flexdoc.1
|
||||
nroff -h -man flex.1 > $(DESTDIR)$(MANDIR)/flex.$(MANEXT)
|
||||
nroff -h -man flexdoc.1 > $(DESTDIR)$(MANDIR)/flexdoc.$(MANEXT)
|
||||
chmod 644 $(DESTDIR)$(MANDIR)/flex.$(MANEXT)
|
||||
chmod 644 $(DESTDIR)$(MANDIR)/flexdoc.$(MANEXT)
|
||||
|
||||
clean :
|
||||
rm -f core errs flex *.o parse.c *.lint parse.h flex.man tags \
|
||||
$(FLEXLIB)
|
||||
|
||||
tags :
|
||||
ctags $(FLEX_C_SOURCES)
|
||||
|
||||
vms : flex.man
|
||||
$(MAKE) $(MFLAGS) distrib
|
||||
|
||||
test : flex
|
||||
./flex $(FLEX_FLAGS) $(COMPRESSION) scan.l | diff scan.c -
|
||||
|
||||
bigtest :
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-C" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-CFe" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Cf" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-CF" test
|
||||
rm -f scan.c ; $(MAKE)
|
78
util/flex/README
Normal file
78
util/flex/README
Normal file
|
@ -0,0 +1,78 @@
|
|||
// $Header$
|
||||
|
||||
This is release 2.3 of flex - a full release.
|
||||
|
||||
The flex distribution consists of the following files:
|
||||
|
||||
README This message
|
||||
|
||||
Makefile
|
||||
flexdef.h
|
||||
parse.y
|
||||
scan.l
|
||||
ccl.c
|
||||
dfa.c
|
||||
ecs.c flex sources
|
||||
gen.c
|
||||
main.c
|
||||
misc.c
|
||||
nfa.c
|
||||
sym.c
|
||||
tblcmp.c
|
||||
yylex.c
|
||||
|
||||
libmain.c flex library (-lfl) source
|
||||
|
||||
initscan.c pre-flex'd version of scan.l
|
||||
|
||||
flex.skel skeleton for generated scanners
|
||||
|
||||
flexdoc.1 full user documentation
|
||||
flex.1 reference documentation
|
||||
|
||||
Changes Differences between this release and the previous one
|
||||
|
||||
COPYING flex's copyright
|
||||
|
||||
MISC/ a directory containing miscellaneous porting-related
|
||||
notes (for Atari, MS-DOS, Turbo-C, and VMS)
|
||||
|
||||
|
||||
Decide where you want to keep flex.skel (suggestion: /usr/local/lib),
|
||||
but don't move it there yet. Edit "Makefile" and change the definition
|
||||
of SKELETON_FILE to reflect the full pathname of flex.skel.
|
||||
|
||||
Read the "Porting considerations" note in the Makefile and make
|
||||
the necessary changes.
|
||||
|
||||
To make flex for the first time, use:
|
||||
|
||||
make first_flex
|
||||
|
||||
which uses the pre-generated copy of the flex scanner (the scanner
|
||||
itself is written using flex).
|
||||
|
||||
Assuming it builds successfully, you can test it using
|
||||
|
||||
make test
|
||||
|
||||
The "diff" should not show any differences.
|
||||
|
||||
If you're feeling adventurous, issue "make bigtest" and be prepared
|
||||
to wait a while.
|
||||
|
||||
Install flex using:
|
||||
|
||||
make install
|
||||
|
||||
|
||||
Please send problems and feedback to:
|
||||
|
||||
vern@cs.cornell.edu
|
||||
decvax!cornell!vern
|
||||
|
||||
Vern Paxson
|
||||
CS Department
|
||||
4126 Upson Hall
|
||||
Cornell University
|
||||
Ithaca, NY 14853-7501
|
175
util/flex/ccl.c
Normal file
175
util/flex/ccl.c
Normal file
|
@ -0,0 +1,175 @@
|
|||
/* ccl - routines for character classes */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
/* ccladd - add a single character to a ccl
|
||||
*
|
||||
* synopsis
|
||||
* int cclp;
|
||||
* int ch;
|
||||
* ccladd( cclp, ch );
|
||||
*/
|
||||
|
||||
void ccladd( cclp, ch )
|
||||
int cclp;
|
||||
int ch;
|
||||
|
||||
{
|
||||
int ind, len, newpos, i;
|
||||
|
||||
len = ccllen[cclp];
|
||||
ind = cclmap[cclp];
|
||||
|
||||
/* check to see if the character is already in the ccl */
|
||||
|
||||
for ( i = 0; i < len; ++i )
|
||||
if ( ccltbl[ind + i] == ch )
|
||||
return;
|
||||
|
||||
newpos = ind + len;
|
||||
|
||||
if ( newpos >= current_max_ccl_tbl_size )
|
||||
{
|
||||
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
ccltbl = reallocate_character_array( ccltbl, current_max_ccl_tbl_size );
|
||||
}
|
||||
|
||||
ccllen[cclp] = len + 1;
|
||||
ccltbl[newpos] = ch;
|
||||
}
|
||||
|
||||
|
||||
/* cclinit - make an empty ccl
|
||||
*
|
||||
* synopsis
|
||||
* int cclinit();
|
||||
* new_ccl = cclinit();
|
||||
*/
|
||||
|
||||
int cclinit()
|
||||
|
||||
{
|
||||
if ( ++lastccl >= current_maxccls )
|
||||
{
|
||||
current_maxccls += MAX_CCLS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
cclmap = reallocate_integer_array( cclmap, current_maxccls );
|
||||
ccllen = reallocate_integer_array( ccllen, current_maxccls );
|
||||
cclng = reallocate_integer_array( cclng, current_maxccls );
|
||||
}
|
||||
|
||||
if ( lastccl == 1 )
|
||||
/* we're making the first ccl */
|
||||
cclmap[lastccl] = 0;
|
||||
|
||||
else
|
||||
/* the new pointer is just past the end of the last ccl. Since
|
||||
* the cclmap points to the \first/ character of a ccl, adding the
|
||||
* length of the ccl to the cclmap pointer will produce a cursor
|
||||
* to the first free space
|
||||
*/
|
||||
cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
|
||||
|
||||
ccllen[lastccl] = 0;
|
||||
cclng[lastccl] = 0; /* ccl's start out life un-negated */
|
||||
|
||||
return ( lastccl );
|
||||
}
|
||||
|
||||
|
||||
/* cclnegate - negate a ccl
|
||||
*
|
||||
* synopsis
|
||||
* int cclp;
|
||||
* cclnegate( ccl );
|
||||
*/
|
||||
|
||||
void cclnegate( cclp )
|
||||
int cclp;
|
||||
|
||||
{
|
||||
cclng[cclp] = 1;
|
||||
}
|
||||
|
||||
|
||||
/* list_character_set - list the members of a set of characters in CCL form
|
||||
*
|
||||
* synopsis
|
||||
* int cset[CSIZE];
|
||||
* FILE *file;
|
||||
* list_character_set( cset );
|
||||
*
|
||||
* writes to the given file a character-class representation of those
|
||||
* characters present in the given set. A character is present if it
|
||||
* has a non-zero value in the set array.
|
||||
*/
|
||||
|
||||
void list_character_set( file, cset )
|
||||
FILE *file;
|
||||
int cset[];
|
||||
|
||||
{
|
||||
register int i;
|
||||
char *readable_form();
|
||||
|
||||
putc( '[', file );
|
||||
|
||||
for ( i = 0; i < csize; ++i )
|
||||
{
|
||||
if ( cset[i] )
|
||||
{
|
||||
register int start_char = i;
|
||||
|
||||
putc( ' ', file );
|
||||
|
||||
fputs( readable_form( i ), file );
|
||||
|
||||
while ( ++i < csize && cset[i] )
|
||||
;
|
||||
|
||||
if ( i - 1 > start_char )
|
||||
/* this was a run */
|
||||
fprintf( file, "-%s", readable_form( i - 1 ) );
|
||||
|
||||
putc( ' ', file );
|
||||
}
|
||||
}
|
||||
|
||||
putc( ']', file );
|
||||
}
|
1075
util/flex/dfa.c
Normal file
1075
util/flex/dfa.c
Normal file
File diff suppressed because it is too large
Load diff
349
util/flex/ecs.c
Normal file
349
util/flex/ecs.c
Normal file
|
@ -0,0 +1,349 @@
|
|||
/* ecs - equivalence class routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
/* ccl2ecl - convert character classes to set of equivalence classes
|
||||
*
|
||||
* synopsis
|
||||
* ccl2ecl();
|
||||
*/
|
||||
|
||||
void ccl2ecl()
|
||||
|
||||
{
|
||||
int i, ich, newlen, cclp, ccls, cclmec;
|
||||
|
||||
for ( i = 1; i <= lastccl; ++i )
|
||||
{
|
||||
/* we loop through each character class, and for each character
|
||||
* in the class, add the character's equivalence class to the
|
||||
* new "character" class we are creating. Thus when we are all
|
||||
* done, character classes will really consist of collections
|
||||
* of equivalence classes
|
||||
*/
|
||||
|
||||
newlen = 0;
|
||||
cclp = cclmap[i];
|
||||
|
||||
for ( ccls = 0; ccls < ccllen[i]; ++ccls )
|
||||
{
|
||||
ich = ccltbl[cclp + ccls];
|
||||
cclmec = ecgroup[ich];
|
||||
|
||||
if ( xlation && cclmec < 0 )
|
||||
{
|
||||
/* special hack--if we're doing %t tables then it's
|
||||
* possible that no representative of this character's
|
||||
* equivalence class is in the ccl. So waiting till
|
||||
* we see the representative would be disastrous. Instead,
|
||||
* we add this character's equivalence class anyway, if it's
|
||||
* not already present.
|
||||
*/
|
||||
int j;
|
||||
|
||||
/* this loop makes this whole process n^2; but we don't
|
||||
* really care about %t performance anyway
|
||||
*/
|
||||
for ( j = 0; j < newlen; ++j )
|
||||
if ( ccltbl[cclp + j] == -cclmec )
|
||||
break;
|
||||
|
||||
if ( j >= newlen )
|
||||
{ /* no representative yet, add this one in */
|
||||
ccltbl[cclp + newlen] = -cclmec;
|
||||
++newlen;
|
||||
}
|
||||
}
|
||||
|
||||
else if ( cclmec > 0 )
|
||||
{
|
||||
ccltbl[cclp + newlen] = cclmec;
|
||||
++newlen;
|
||||
}
|
||||
}
|
||||
|
||||
ccllen[i] = newlen;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* cre8ecs - associate equivalence class numbers with class members
|
||||
*
|
||||
* synopsis
|
||||
* int cre8ecs();
|
||||
* number of classes = cre8ecs( fwd, bck, num );
|
||||
*
|
||||
* fwd is the forward linked-list of equivalence class members. bck
|
||||
* is the backward linked-list, and num is the number of class members.
|
||||
*
|
||||
* Returned is the number of classes.
|
||||
*/
|
||||
|
||||
int cre8ecs( fwd, bck, num )
|
||||
int fwd[], bck[], num;
|
||||
|
||||
{
|
||||
int i, j, numcl;
|
||||
|
||||
numcl = 0;
|
||||
|
||||
/* create equivalence class numbers. From now on, abs( bck(x) )
|
||||
* is the equivalence class number for object x. If bck(x)
|
||||
* is positive, then x is the representative of its equivalence
|
||||
* class.
|
||||
*/
|
||||
for ( i = 1; i <= num; ++i )
|
||||
if ( bck[i] == NIL )
|
||||
{
|
||||
bck[i] = ++numcl;
|
||||
for ( j = fwd[i]; j != NIL; j = fwd[j] )
|
||||
bck[j] = -numcl;
|
||||
}
|
||||
|
||||
return ( numcl );
|
||||
}
|
||||
|
||||
|
||||
/* ecs_from_xlation - associate equivalence class numbers using %t table
|
||||
*
|
||||
* synopsis
|
||||
* numecs = ecs_from_xlation( ecmap );
|
||||
*
|
||||
* Upon return, ecmap will map each character code to its equivalence
|
||||
* class. The mapping will be positive if the character is the representative
|
||||
* of its class, negative otherwise.
|
||||
*
|
||||
* Returns the number of equivalence classes used.
|
||||
*/
|
||||
|
||||
int ecs_from_xlation( ecmap )
|
||||
int ecmap[];
|
||||
|
||||
{
|
||||
int i;
|
||||
int nul_is_alone = false;
|
||||
int did_default_xlation_class = false;
|
||||
|
||||
if ( xlation[0] != 0 )
|
||||
{
|
||||
/* if NUL shares its translation with other characters, choose one
|
||||
* of the other characters as the representative for the equivalence
|
||||
* class. This allows a cheap test later to see whether we can
|
||||
* do away with NUL's equivalence class.
|
||||
*/
|
||||
for ( i = 1; i < csize; ++i )
|
||||
if ( xlation[i] == -xlation[0] )
|
||||
{
|
||||
xlation[i] = xlation[0];
|
||||
ecmap[0] = -xlation[0];
|
||||
break;
|
||||
}
|
||||
|
||||
if ( i >= csize )
|
||||
/* didn't find a companion character--remember this fact */
|
||||
nul_is_alone = true;
|
||||
}
|
||||
|
||||
for ( i = 1; i < csize; ++i )
|
||||
if ( xlation[i] == 0 )
|
||||
{
|
||||
if ( did_default_xlation_class )
|
||||
ecmap[i] = -num_xlations;
|
||||
|
||||
else
|
||||
{
|
||||
/* make an equivalence class for those characters not
|
||||
* specified in the %t table
|
||||
*/
|
||||
++num_xlations;
|
||||
ecmap[i] = num_xlations;
|
||||
did_default_xlation_class = true;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
ecmap[i] = xlation[i];
|
||||
|
||||
if ( nul_is_alone )
|
||||
/* force NUL's equivalence class to be the last one */
|
||||
{
|
||||
++num_xlations;
|
||||
ecmap[0] = num_xlations;
|
||||
|
||||
/* there's actually a bug here: if someone is fanatic enough to
|
||||
* put every character in its own translation class, then right
|
||||
* now we just promoted NUL's equivalence class to be csize + 1;
|
||||
* we can handle NUL's class number being == csize (by instead
|
||||
* putting it in its own table), but we can't handle some *other*
|
||||
* character having to be put in its own table, too. So in
|
||||
* this case we bail out.
|
||||
*/
|
||||
if ( num_xlations > csize )
|
||||
flexfatal( "too many %t classes!" );
|
||||
}
|
||||
|
||||
return num_xlations;
|
||||
}
|
||||
|
||||
|
||||
/* mkeccl - update equivalence classes based on character class xtions
|
||||
*
|
||||
* synopsis
|
||||
* Char ccls[];
|
||||
* int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
|
||||
* mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping );
|
||||
*
|
||||
* where ccls contains the elements of the character class, lenccl is the
|
||||
* number of elements in the ccl, fwd is the forward link-list of equivalent
|
||||
* characters, bck is the backward link-list, and llsiz size of the link-list
|
||||
*
|
||||
* NUL_mapping is the value which NUL (0) should be mapped to.
|
||||
*/
|
||||
|
||||
void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
|
||||
Char ccls[];
|
||||
int lenccl, fwd[], bck[], llsiz, NUL_mapping;
|
||||
|
||||
{
|
||||
int cclp, oldec, newec;
|
||||
int cclm, i, j;
|
||||
static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */
|
||||
|
||||
/* note that it doesn't matter whether or not the character class is
|
||||
* negated. The same results will be obtained in either case.
|
||||
*/
|
||||
|
||||
cclp = 0;
|
||||
|
||||
while ( cclp < lenccl )
|
||||
{
|
||||
cclm = ccls[cclp];
|
||||
|
||||
if ( NUL_mapping && cclm == 0 )
|
||||
cclm = NUL_mapping;
|
||||
|
||||
oldec = bck[cclm];
|
||||
newec = cclm;
|
||||
|
||||
j = cclp + 1;
|
||||
|
||||
for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
|
||||
{ /* look for the symbol in the character class */
|
||||
for ( ; j < lenccl; ++j )
|
||||
{
|
||||
register int ccl_char;
|
||||
|
||||
if ( NUL_mapping && ccls[j] == 0 )
|
||||
ccl_char = NUL_mapping;
|
||||
else
|
||||
ccl_char = ccls[j];
|
||||
|
||||
if ( ccl_char > i )
|
||||
break;
|
||||
|
||||
if ( ccl_char == i && ! cclflags[j] )
|
||||
{
|
||||
/* we found an old companion of cclm in the ccl.
|
||||
* link it into the new equivalence class and flag it as
|
||||
* having been processed
|
||||
*/
|
||||
|
||||
bck[i] = newec;
|
||||
fwd[newec] = i;
|
||||
newec = i;
|
||||
cclflags[j] = 1; /* set flag so we don't reprocess */
|
||||
|
||||
/* get next equivalence class member */
|
||||
/* continue 2 */
|
||||
goto next_pt;
|
||||
}
|
||||
}
|
||||
|
||||
/* symbol isn't in character class. Put it in the old equivalence
|
||||
* class
|
||||
*/
|
||||
|
||||
bck[i] = oldec;
|
||||
|
||||
if ( oldec != NIL )
|
||||
fwd[oldec] = i;
|
||||
|
||||
oldec = i;
|
||||
next_pt:
|
||||
;
|
||||
}
|
||||
|
||||
if ( bck[cclm] != NIL || oldec != bck[cclm] )
|
||||
{
|
||||
bck[cclm] = NIL;
|
||||
fwd[oldec] = NIL;
|
||||
}
|
||||
|
||||
fwd[newec] = NIL;
|
||||
|
||||
/* find next ccl member to process */
|
||||
|
||||
for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
|
||||
{
|
||||
/* reset "doesn't need processing" flag */
|
||||
cclflags[cclp] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkechar - create equivalence class for single character
|
||||
*
|
||||
* synopsis
|
||||
* int tch, fwd[], bck[];
|
||||
* mkechar( tch, fwd, bck );
|
||||
*/
|
||||
|
||||
void mkechar( tch, fwd, bck )
|
||||
int tch, fwd[], bck[];
|
||||
|
||||
{
|
||||
/* if until now the character has been a proper subset of
|
||||
* an equivalence class, break it away to create a new ec
|
||||
*/
|
||||
|
||||
if ( fwd[tch] != NIL )
|
||||
bck[fwd[tch]] = bck[tch];
|
||||
|
||||
if ( bck[tch] != NIL )
|
||||
fwd[bck[tch]] = fwd[tch];
|
||||
|
||||
fwd[tch] = NIL;
|
||||
bck[tch] = NIL;
|
||||
}
|
781
util/flex/flex.1
Normal file
781
util/flex/flex.1
Normal file
|
@ -0,0 +1,781 @@
|
|||
.TH FLEX 1 "26 May 1990" "Version 2.3"
|
||||
.SH NAME
|
||||
flex - fast lexical analyzer generator
|
||||
.SH SYNOPSIS
|
||||
.B flex
|
||||
.B [-bcdfinpstvFILT8 -C[efmF] -Sskeleton]
|
||||
.I [filename ...]
|
||||
.SH DESCRIPTION
|
||||
.I flex
|
||||
is a tool for generating
|
||||
.I scanners:
|
||||
programs which recognized lexical patterns in text.
|
||||
.I flex
|
||||
reads
|
||||
the given input files, or its standard input if no file names are given,
|
||||
for a description of a scanner to generate. The description is in
|
||||
the form of pairs
|
||||
of regular expressions and C code, called
|
||||
.I rules. flex
|
||||
generates as output a C source file,
|
||||
.B lex.yy.c,
|
||||
which defines a routine
|
||||
.B yylex().
|
||||
This file is compiled and linked with the
|
||||
.B -lfl
|
||||
library to produce an executable. When the executable is run,
|
||||
it analyzes its input for occurrences
|
||||
of the regular expressions. Whenever it finds one, it executes
|
||||
the corresponding C code.
|
||||
.LP
|
||||
For full documentation, see
|
||||
.B flexdoc(1).
|
||||
This manual entry is intended for use as a quick reference.
|
||||
.SH OPTIONS
|
||||
.I flex
|
||||
has the following options:
|
||||
.TP
|
||||
.B -b
|
||||
Generate backtracking information to
|
||||
.I lex.backtrack.
|
||||
This is a list of scanner states which require backtracking
|
||||
and the input characters on which they do so. By adding rules one
|
||||
can remove backtracking states. If all backtracking states
|
||||
are eliminated and
|
||||
.B -f
|
||||
or
|
||||
.B -F
|
||||
is used, the generated scanner will run faster.
|
||||
.TP
|
||||
.B -c
|
||||
is a do-nothing, deprecated option included for POSIX compliance.
|
||||
.IP
|
||||
.B NOTE:
|
||||
in previous releases of
|
||||
.I flex
|
||||
.B -c
|
||||
specified table-compression options. This functionality is
|
||||
now given by the
|
||||
.B -C
|
||||
flag. To ease the the impact of this change, when
|
||||
.I flex
|
||||
encounters
|
||||
.B -c,
|
||||
it currently issues a warning message and assumes that
|
||||
.B -C
|
||||
was desired instead. In the future this "promotion" of
|
||||
.B -c
|
||||
to
|
||||
.B -C
|
||||
will go away in the name of full POSIX compliance (unless
|
||||
the POSIX meaning is removed first).
|
||||
.TP
|
||||
.B -d
|
||||
makes the generated scanner run in
|
||||
.I debug
|
||||
mode. Whenever a pattern is recognized and the global
|
||||
.B yy_flex_debug
|
||||
is non-zero (which is the default), the scanner will
|
||||
write to
|
||||
.I stderr
|
||||
a line of the form:
|
||||
.nf
|
||||
|
||||
--accepting rule at line 53 ("the matched text")
|
||||
|
||||
.fi
|
||||
The line number refers to the location of the rule in the file
|
||||
defining the scanner (i.e., the file that was fed to flex). Messages
|
||||
are also generated when the scanner backtracks, accepts the
|
||||
default rule, reaches the end of its input buffer (or encounters
|
||||
a NUL; the two look the same as far as the scanner's concerned),
|
||||
or reaches an end-of-file.
|
||||
.TP
|
||||
.B -f
|
||||
specifies (take your pick)
|
||||
.I full table
|
||||
or
|
||||
.I fast scanner.
|
||||
No table compression is done. The result is large but fast.
|
||||
This option is equivalent to
|
||||
.B -Cf
|
||||
(see below).
|
||||
.TP
|
||||
.B -i
|
||||
instructs
|
||||
.I flex
|
||||
to generate a
|
||||
.I case-insensitive
|
||||
scanner. The case of letters given in the
|
||||
.I flex
|
||||
input patterns will
|
||||
be ignored, and tokens in the input will be matched regardless of case. The
|
||||
matched text given in
|
||||
.I yytext
|
||||
will have the preserved case (i.e., it will not be folded).
|
||||
.TP
|
||||
.B -n
|
||||
is another do-nothing, deprecated option included only for
|
||||
POSIX compliance.
|
||||
.TP
|
||||
.B -p
|
||||
generates a performance report to stderr. The report
|
||||
consists of comments regarding features of the
|
||||
.I flex
|
||||
input file which will cause a loss of performance in the resulting scanner.
|
||||
.TP
|
||||
.B -s
|
||||
causes the
|
||||
.I default rule
|
||||
(that unmatched scanner input is echoed to
|
||||
.I stdout)
|
||||
to be suppressed. If the scanner encounters input that does not
|
||||
match any of its rules, it aborts with an error.
|
||||
.TP
|
||||
.B -t
|
||||
instructs
|
||||
.I flex
|
||||
to write the scanner it generates to standard output instead
|
||||
of
|
||||
.B lex.yy.c.
|
||||
.TP
|
||||
.B -v
|
||||
specifies that
|
||||
.I flex
|
||||
should write to
|
||||
.I stderr
|
||||
a summary of statistics regarding the scanner it generates.
|
||||
.TP
|
||||
.B -F
|
||||
specifies that the
|
||||
.ul
|
||||
fast
|
||||
scanner table representation should be used. This representation is
|
||||
about as fast as the full table representation
|
||||
.ul
|
||||
(-f),
|
||||
and for some sets of patterns will be considerably smaller (and for
|
||||
others, larger). See
|
||||
.B flexdoc(1)
|
||||
for details.
|
||||
.IP
|
||||
This option is equivalent to
|
||||
.B -CF
|
||||
(see below).
|
||||
.TP
|
||||
.B -I
|
||||
instructs
|
||||
.I flex
|
||||
to generate an
|
||||
.I interactive
|
||||
scanner, that is, a scanner which stops immediately rather than
|
||||
looking ahead if it knows
|
||||
that the currently scanned text cannot be part of a longer rule's match.
|
||||
Again, see
|
||||
.B flexdoc(1)
|
||||
for details.
|
||||
.IP
|
||||
Note,
|
||||
.B -I
|
||||
cannot be used in conjunction with
|
||||
.I full
|
||||
or
|
||||
.I fast tables,
|
||||
i.e., the
|
||||
.B -f, -F, -Cf,
|
||||
or
|
||||
.B -CF
|
||||
flags.
|
||||
.TP
|
||||
.B -L
|
||||
instructs
|
||||
.I flex
|
||||
not to generate
|
||||
.B #line
|
||||
directives in
|
||||
.B lex.yy.c.
|
||||
The default is to generate such directives so error
|
||||
messages in the actions will be correctly
|
||||
located with respect to the original
|
||||
.I flex
|
||||
input file, and not to
|
||||
the fairly meaningless line numbers of
|
||||
.B lex.yy.c.
|
||||
.TP
|
||||
.B -T
|
||||
makes
|
||||
.I flex
|
||||
run in
|
||||
.I trace
|
||||
mode. It will generate a lot of messages to
|
||||
.I stdout
|
||||
concerning
|
||||
the form of the input and the resultant non-deterministic and deterministic
|
||||
finite automata. This option is mostly for use in maintaining
|
||||
.I flex.
|
||||
.TP
|
||||
.B -8
|
||||
instructs
|
||||
.I flex
|
||||
to generate an 8-bit scanner.
|
||||
On some sites, this is the default. On others, the default
|
||||
is 7-bit characters. To see which is the case, check the verbose
|
||||
.B (-v)
|
||||
output for "equivalence classes created". If the denominator of
|
||||
the number shown is 128, then by default
|
||||
.I flex
|
||||
is generating 7-bit characters. If it is 256, then the default is
|
||||
8-bit characters.
|
||||
.TP
|
||||
.B -C[efmF]
|
||||
controls the degree of table compression.
|
||||
.IP
|
||||
.B -Ce
|
||||
directs
|
||||
.I flex
|
||||
to construct
|
||||
.I equivalence classes,
|
||||
i.e., sets of characters
|
||||
which have identical lexical properties.
|
||||
Equivalence classes usually give
|
||||
dramatic reductions in the final table/object file sizes (typically
|
||||
a factor of 2-5) and are pretty cheap performance-wise (one array
|
||||
look-up per character scanned).
|
||||
.IP
|
||||
.B -Cf
|
||||
specifies that the
|
||||
.I full
|
||||
scanner tables should be generated -
|
||||
.I flex
|
||||
should not compress the
|
||||
tables by taking advantages of similar transition functions for
|
||||
different states.
|
||||
.IP
|
||||
.B -CF
|
||||
specifies that the alternate fast scanner representation (described in
|
||||
.B flexdoc(1))
|
||||
should be used.
|
||||
.IP
|
||||
.B -Cm
|
||||
directs
|
||||
.I flex
|
||||
to construct
|
||||
.I meta-equivalence classes,
|
||||
which are sets of equivalence classes (or characters, if equivalence
|
||||
classes are not being used) that are commonly used together. Meta-equivalence
|
||||
classes are often a big win when using compressed tables, but they
|
||||
have a moderate performance impact (one or two "if" tests and one
|
||||
array look-up per character scanned).
|
||||
.IP
|
||||
A lone
|
||||
.B -C
|
||||
specifies that the scanner tables should be compressed but neither
|
||||
equivalence classes nor meta-equivalence classes should be used.
|
||||
.IP
|
||||
The options
|
||||
.B -Cf
|
||||
or
|
||||
.B -CF
|
||||
and
|
||||
.B -Cm
|
||||
do not make sense together - there is no opportunity for meta-equivalence
|
||||
classes if the table is not being compressed. Otherwise the options
|
||||
may be freely mixed.
|
||||
.IP
|
||||
The default setting is
|
||||
.B -Cem,
|
||||
which specifies that
|
||||
.I flex
|
||||
should generate equivalence classes
|
||||
and meta-equivalence classes. This setting provides the highest
|
||||
degree of table compression. You can trade off
|
||||
faster-executing scanners at the cost of larger tables with
|
||||
the following generally being true:
|
||||
.nf
|
||||
|
||||
slowest & smallest
|
||||
-Cem
|
||||
-Cm
|
||||
-Ce
|
||||
-C
|
||||
-C{f,F}e
|
||||
-C{f,F}
|
||||
fastest & largest
|
||||
|
||||
.fi
|
||||
.IP
|
||||
.B -C
|
||||
options are not cumulative; whenever the flag is encountered, the
|
||||
previous -C settings are forgotten.
|
||||
.TP
|
||||
.B -Sskeleton_file
|
||||
overrides the default skeleton file from which
|
||||
.I flex
|
||||
constructs its scanners. You'll never need this option unless you are doing
|
||||
.I flex
|
||||
maintenance or development.
|
||||
.SH SUMMARY OF FLEX REGULAR EXPRESSIONS
|
||||
The patterns in the input are written using an extended set of regular
|
||||
expressions. These are:
|
||||
.nf
|
||||
|
||||
x match the character 'x'
|
||||
. any character except newline
|
||||
[xyz] a "character class"; in this case, the pattern
|
||||
matches either an 'x', a 'y', or a 'z'
|
||||
[abj-oZ] a "character class" with a range in it; matches
|
||||
an 'a', a 'b', any letter from 'j' through 'o',
|
||||
or a 'Z'
|
||||
[^A-Z] a "negated character class", i.e., any character
|
||||
but those in the class. In this case, any
|
||||
character EXCEPT an uppercase letter.
|
||||
[^A-Z\\n] any character EXCEPT an uppercase letter or
|
||||
a newline
|
||||
r* zero or more r's, where r is any regular expression
|
||||
r+ one or more r's
|
||||
r? zero or one r's (that is, "an optional r")
|
||||
r{2,5} anywhere from two to five r's
|
||||
r{2,} two or more r's
|
||||
r{4} exactly 4 r's
|
||||
{name} the expansion of the "name" definition
|
||||
(see above)
|
||||
"[xyz]\\"foo"
|
||||
the literal string: [xyz]"foo
|
||||
\\X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v',
|
||||
then the ANSI-C interpretation of \\x.
|
||||
Otherwise, a literal 'X' (used to escape
|
||||
operators such as '*')
|
||||
\\123 the character with octal value 123
|
||||
\\x2a the character with hexadecimal value 2a
|
||||
(r) match an r; parentheses are used to override
|
||||
precedence (see below)
|
||||
|
||||
|
||||
rs the regular expression r followed by the
|
||||
regular expression s; called "concatenation"
|
||||
|
||||
|
||||
r|s either an r or an s
|
||||
|
||||
|
||||
r/s an r but only if it is followed by an s. The
|
||||
s is not part of the matched text. This type
|
||||
of pattern is called as "trailing context".
|
||||
^r an r, but only at the beginning of a line
|
||||
r$ an r, but only at the end of a line. Equivalent
|
||||
to "r/\\n".
|
||||
|
||||
|
||||
<s>r an r, but only in start condition s (see
|
||||
below for discussion of start conditions)
|
||||
<s1,s2,s3>r
|
||||
same, but in any of start conditions s1,
|
||||
s2, or s3
|
||||
|
||||
|
||||
<<EOF>> an end-of-file
|
||||
<s1,s2><<EOF>>
|
||||
an end-of-file when in start condition s1 or s2
|
||||
|
||||
.fi
|
||||
The regular expressions listed above are grouped according to
|
||||
precedence, from highest precedence at the top to lowest at the bottom.
|
||||
Those grouped together have equal precedence.
|
||||
.LP
|
||||
Some notes on patterns:
|
||||
.IP -
|
||||
Negated character classes
|
||||
.I match newlines
|
||||
unless "\\n" (or an equivalent escape sequence) is one of the
|
||||
characters explicitly present in the negated character class
|
||||
(e.g., "[^A-Z\\n]").
|
||||
.IP -
|
||||
A rule can have at most one instance of trailing context (the '/' operator
|
||||
or the '$' operator). The start condition, '^', and "<<EOF>>" patterns
|
||||
can only occur at the beginning of a pattern, and, as well as with '/' and '$',
|
||||
cannot be grouped inside parentheses. The following are all illegal:
|
||||
.nf
|
||||
|
||||
foo/bar$
|
||||
foo|(bar$)
|
||||
foo|^bar
|
||||
<sc1>foo<sc2>bar
|
||||
|
||||
.fi
|
||||
.SH SUMMARY OF SPECIAL ACTIONS
|
||||
In addition to arbitrary C code, the following can appear in actions:
|
||||
.IP -
|
||||
.B ECHO
|
||||
copies yytext to the scanner's output.
|
||||
.IP -
|
||||
.B BEGIN
|
||||
followed by the name of a start condition places the scanner in the
|
||||
corresponding start condition.
|
||||
.IP -
|
||||
.B REJECT
|
||||
directs the scanner to proceed on to the "second best" rule which matched the
|
||||
input (or a prefix of the input).
|
||||
.B yytext
|
||||
and
|
||||
.B yyleng
|
||||
are set up appropriately. Note that
|
||||
.B REJECT
|
||||
is a particularly expensive feature in terms scanner performance;
|
||||
if it is used in
|
||||
.I any
|
||||
of the scanner's actions it will slow down
|
||||
.I all
|
||||
of the scanner's matching. Furthermore,
|
||||
.B REJECT
|
||||
cannot be used with the
|
||||
.I -f
|
||||
or
|
||||
.I -F
|
||||
options.
|
||||
.IP
|
||||
Note also that unlike the other special actions,
|
||||
.B REJECT
|
||||
is a
|
||||
.I branch;
|
||||
code immediately following it in the action will
|
||||
.I not
|
||||
be executed.
|
||||
.IP -
|
||||
.B yymore()
|
||||
tells the scanner that the next time it matches a rule, the corresponding
|
||||
token should be
|
||||
.I appended
|
||||
onto the current value of
|
||||
.B yytext
|
||||
rather than replacing it.
|
||||
.IP -
|
||||
.B yyless(n)
|
||||
returns all but the first
|
||||
.I n
|
||||
characters of the current token back to the input stream, where they
|
||||
will be rescanned when the scanner looks for the next match.
|
||||
.B yytext
|
||||
and
|
||||
.B yyleng
|
||||
are adjusted appropriately (e.g.,
|
||||
.B yyleng
|
||||
will now be equal to
|
||||
.I n
|
||||
).
|
||||
.IP -
|
||||
.B unput(c)
|
||||
puts the character
|
||||
.I c
|
||||
back onto the input stream. It will be the next character scanned.
|
||||
.IP -
|
||||
.B input()
|
||||
reads the next character from the input stream (this routine is called
|
||||
.B yyinput()
|
||||
if the scanner is compiled using
|
||||
.B C++).
|
||||
.IP -
|
||||
.B yyterminate()
|
||||
can be used in lieu of a return statement in an action. It terminates
|
||||
the scanner and returns a 0 to the scanner's caller, indicating "all done".
|
||||
.IP
|
||||
By default,
|
||||
.B yyterminate()
|
||||
is also called when an end-of-file is encountered. It is a macro and
|
||||
may be redefined.
|
||||
.IP -
|
||||
.B YY_NEW_FILE
|
||||
is an action available only in <<EOF>> rules. It means "Okay, I've
|
||||
set up a new input file, continue scanning".
|
||||
.IP -
|
||||
.B yy_create_buffer( file, size )
|
||||
takes a
|
||||
.I FILE
|
||||
pointer and an integer
|
||||
.I size.
|
||||
It returns a YY_BUFFER_STATE
|
||||
handle to a new input buffer large enough to accomodate
|
||||
.I size
|
||||
characters and associated with the given file. When in doubt, use
|
||||
.B YY_BUF_SIZE
|
||||
for the size.
|
||||
.IP -
|
||||
.B yy_switch_to_buffer( new_buffer )
|
||||
switches the scanner's processing to scan for tokens from
|
||||
the given buffer, which must be a YY_BUFFER_STATE.
|
||||
.IP -
|
||||
.B yy_delete_buffer( buffer )
|
||||
deletes the given buffer.
|
||||
.SH VALUES AVAILABLE TO THE USER
|
||||
.IP -
|
||||
.B char *yytext
|
||||
holds the text of the current token. It may not be modified.
|
||||
.IP -
|
||||
.B int yyleng
|
||||
holds the length of the current token. It may not be modified.
|
||||
.IP -
|
||||
.B FILE *yyin
|
||||
is the file which by default
|
||||
.I flex
|
||||
reads from. It may be redefined but doing so only makes sense before
|
||||
scanning begins. Changing it in the middle of scanning will have
|
||||
unexpected results since
|
||||
.I flex
|
||||
buffers its input. Once scanning terminates because an end-of-file
|
||||
has been seen,
|
||||
.B
|
||||
void yyrestart( FILE *new_file )
|
||||
may be called to point
|
||||
.I yyin
|
||||
at the new input file.
|
||||
.IP -
|
||||
.B FILE *yyout
|
||||
is the file to which
|
||||
.B ECHO
|
||||
actions are done. It can be reassigned by the user.
|
||||
.IP -
|
||||
.B YY_CURRENT_BUFFER
|
||||
returns a
|
||||
.B YY_BUFFER_STATE
|
||||
handle to the current buffer.
|
||||
.SH MACROS THE USER CAN REDEFINE
|
||||
.IP -
|
||||
.B YY_DECL
|
||||
controls how the scanning routine is declared.
|
||||
By default, it is "int yylex()", or, if prototypes are being
|
||||
used, "int yylex(void)". This definition may be changed by redefining
|
||||
the "YY_DECL" macro. Note that
|
||||
if you give arguments to the scanning routine using a
|
||||
K&R-style/non-prototyped function declaration, you must terminate
|
||||
the definition with a semi-colon (;).
|
||||
.IP -
|
||||
The nature of how the scanner
|
||||
gets its input can be controlled by redefining the
|
||||
.B YY_INPUT
|
||||
macro.
|
||||
YY_INPUT's calling sequence is "YY_INPUT(buf,result,max_size)". Its
|
||||
action is to place up to
|
||||
.I max_size
|
||||
characters in the character array
|
||||
.I buf
|
||||
and return in the integer variable
|
||||
.I result
|
||||
either the
|
||||
number of characters read or the constant YY_NULL (0 on Unix systems)
|
||||
to indicate EOF. The default YY_INPUT reads from the
|
||||
global file-pointer "yyin".
|
||||
A sample redefinition of YY_INPUT (in the definitions
|
||||
section of the input file):
|
||||
.nf
|
||||
|
||||
%{
|
||||
#undef YY_INPUT
|
||||
#define YY_INPUT(buf,result,max_size) \\
|
||||
{ \\
|
||||
int c = getchar(); \\
|
||||
result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \\
|
||||
}
|
||||
%}
|
||||
|
||||
.fi
|
||||
.IP -
|
||||
When the scanner receives an end-of-file indication from YY_INPUT,
|
||||
it then checks the
|
||||
.B yywrap()
|
||||
function. If
|
||||
.B yywrap()
|
||||
returns false (zero), then it is assumed that the
|
||||
function has gone ahead and set up
|
||||
.I yyin
|
||||
to point to another input file, and scanning continues. If it returns
|
||||
true (non-zero), then the scanner terminates, returning 0 to its
|
||||
caller.
|
||||
.IP
|
||||
The default
|
||||
.B yywrap()
|
||||
always returns 1. Presently, to redefine it you must first
|
||||
"#undef yywrap", as it is currently implemented as a macro. It is
|
||||
likely that
|
||||
.B yywrap()
|
||||
will soon be defined to be a function rather than a macro.
|
||||
.IP -
|
||||
YY_USER_ACTION
|
||||
can be redefined to provide an action
|
||||
which is always executed prior to the matched rule's action.
|
||||
.IP -
|
||||
The macro
|
||||
.B YY_USER_INIT
|
||||
may be redefined to provide an action which is always executed before
|
||||
the first scan.
|
||||
.IP -
|
||||
In the generated scanner, the actions are all gathered in one large
|
||||
switch statement and separated using
|
||||
.B YY_BREAK,
|
||||
which may be redefined. By default, it is simply a "break", to separate
|
||||
each rule's action from the following rule's.
|
||||
.SH FILES
|
||||
.TP
|
||||
.I flex.skel
|
||||
skeleton scanner.
|
||||
.TP
|
||||
.I lex.yy.c
|
||||
generated scanner (called
|
||||
.I lexyy.c
|
||||
on some systems).
|
||||
.TP
|
||||
.I lex.backtrack
|
||||
backtracking information for
|
||||
.B -b
|
||||
flag (called
|
||||
.I lex.bck
|
||||
on some systems).
|
||||
.TP
|
||||
.B -lfl
|
||||
library with which to link the scanners.
|
||||
.SH "SEE ALSO"
|
||||
.LP
|
||||
flexdoc(1), lex(1), yacc(1), sed(1), awk(1).
|
||||
.LP
|
||||
M. E. Lesk and E. Schmidt,
|
||||
.I LEX - Lexical Analyzer Generator
|
||||
.SH DIAGNOSTICS
|
||||
.I reject_used_but_not_detected undefined
|
||||
or
|
||||
.LP
|
||||
.I yymore_used_but_not_detected undefined -
|
||||
These errors can occur at compile time. They indicate that the
|
||||
scanner uses
|
||||
.B REJECT
|
||||
or
|
||||
.B yymore()
|
||||
but that
|
||||
.I flex
|
||||
failed to notice the fact, meaning that
|
||||
.I flex
|
||||
scanned the first two sections looking for occurrences of these actions
|
||||
and failed to find any, but somehow you snuck some in (via a #include
|
||||
file, for example). Make an explicit reference to the action in your
|
||||
.I flex
|
||||
input file. (Note that previously
|
||||
.I flex
|
||||
supported a
|
||||
.B %used/%unused
|
||||
mechanism for dealing with this problem; this feature is still supported
|
||||
but now deprecated, and will go away soon unless the author hears from
|
||||
people who can argue compellingly that they need it.)
|
||||
.LP
|
||||
.I flex scanner jammed -
|
||||
a scanner compiled with
|
||||
.B -s
|
||||
has encountered an input string which wasn't matched by
|
||||
any of its rules.
|
||||
.LP
|
||||
.I flex input buffer overflowed -
|
||||
a scanner rule matched a string long enough to overflow the
|
||||
scanner's internal input buffer (16K bytes - controlled by
|
||||
.B YY_BUF_MAX
|
||||
in "flex.skel").
|
||||
.LP
|
||||
.I scanner requires -8 flag -
|
||||
Your scanner specification includes recognizing 8-bit characters and
|
||||
you did not specify the -8 flag (and your site has not installed flex
|
||||
with -8 as the default).
|
||||
.LP
|
||||
.I
|
||||
fatal flex scanner internal error--end of buffer missed -
|
||||
This can occur in an scanner which is reentered after a long-jump
|
||||
has jumped out (or over) the scanner's activation frame. Before
|
||||
reentering the scanner, use:
|
||||
.nf
|
||||
|
||||
yyrestart( yyin );
|
||||
|
||||
.fi
|
||||
.LP
|
||||
.I too many %t classes! -
|
||||
You managed to put every single character into its own %t class.
|
||||
.I flex
|
||||
requires that at least one of the classes share characters.
|
||||
.SH AUTHOR
|
||||
Vern Paxson, with the help of many ideas and much inspiration from
|
||||
Van Jacobson. Original version by Jef Poskanzer.
|
||||
.LP
|
||||
See flexdoc(1) for additional credits and the address to send comments to.
|
||||
.SH DEFICIENCIES / BUGS
|
||||
.LP
|
||||
Some trailing context
|
||||
patterns cannot be properly matched and generate
|
||||
warning messages ("Dangerous trailing context"). These are
|
||||
patterns where the ending of the
|
||||
first part of the rule matches the beginning of the second
|
||||
part, such as "zx*/xy*", where the 'x*' matches the 'x' at
|
||||
the beginning of the trailing context. (Note that the POSIX draft
|
||||
states that the text matched by such patterns is undefined.)
|
||||
.LP
|
||||
For some trailing context rules, parts which are actually fixed-length are
|
||||
not recognized as such, leading to the abovementioned performance loss.
|
||||
In particular, parts using '|' or {n} (such as "foo{3}") are always
|
||||
considered variable-length.
|
||||
.LP
|
||||
Combining trailing context with the special '|' action can result in
|
||||
.I fixed
|
||||
trailing context being turned into the more expensive
|
||||
.I variable
|
||||
trailing context. For example, this happens in the following example:
|
||||
.nf
|
||||
|
||||
%%
|
||||
abc |
|
||||
xyz/def
|
||||
|
||||
.fi
|
||||
.LP
|
||||
Use of unput() invalidates yytext and yyleng.
|
||||
.LP
|
||||
Use of unput() to push back more text than was matched can
|
||||
result in the pushed-back text matching a beginning-of-line ('^')
|
||||
rule even though it didn't come at the beginning of the line
|
||||
(though this is rare!).
|
||||
.LP
|
||||
Pattern-matching of NUL's is substantially slower than matching other
|
||||
characters.
|
||||
.LP
|
||||
.I flex
|
||||
does not generate correct #line directives for code internal
|
||||
to the scanner; thus, bugs in
|
||||
.I flex.skel
|
||||
yield bogus line numbers.
|
||||
.LP
|
||||
Due to both buffering of input and read-ahead, you cannot intermix
|
||||
calls to <stdio.h> routines, such as, for example,
|
||||
.B getchar(),
|
||||
with
|
||||
.I flex
|
||||
rules and expect it to work. Call
|
||||
.B input()
|
||||
instead.
|
||||
.LP
|
||||
The total table entries listed by the
|
||||
.B -v
|
||||
flag excludes the number of table entries needed to determine
|
||||
what rule has been matched. The number of entries is equal
|
||||
to the number of DFA states if the scanner does not use
|
||||
.B REJECT,
|
||||
and somewhat greater than the number of states if it does.
|
||||
.LP
|
||||
.B REJECT
|
||||
cannot be used with the
|
||||
.I -f
|
||||
or
|
||||
.I -F
|
||||
options.
|
||||
.LP
|
||||
Some of the macros, such as
|
||||
.B yywrap(),
|
||||
may in the future become functions which live in the
|
||||
.B -lfl
|
||||
library. This will doubtless break a lot of code, but may be
|
||||
required for POSIX-compliance.
|
||||
.LP
|
||||
The
|
||||
.I flex
|
||||
internal algorithms need documentation.
|
858
util/flex/flex.skel
Normal file
858
util/flex/flex.skel
Normal file
|
@ -0,0 +1,858 @@
|
|||
/* A lexical scanner generated by flex */
|
||||
|
||||
/* scanner skeleton version:
|
||||
* $Header$
|
||||
*/
|
||||
|
||||
#define FLEX_SCANNER
|
||||
|
||||
#define ACK_MOD
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
|
||||
#ifdef c_plusplus
|
||||
#ifndef __cplusplus
|
||||
#define __cplusplus
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <osfcn.h>
|
||||
|
||||
/* use prototypes in function declarations */
|
||||
#define YY_USE_PROTOS
|
||||
|
||||
/* the "const" storage-class-modifier is valid */
|
||||
#define YY_USE_CONST
|
||||
|
||||
#else /* ! __cplusplus */
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <stddef.h>
|
||||
void *malloc( size_t );
|
||||
void free( void* );
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#define YY_USE_PROTOS
|
||||
#define YY_USE_CONST
|
||||
|
||||
#endif /* __STDC__ */
|
||||
#endif /* ! __cplusplus */
|
||||
|
||||
|
||||
#ifdef __TURBOC__
|
||||
#define YY_USE_CONST
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef YY_USE_CONST
|
||||
#define const
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
#define YY_PROTO(proto) proto
|
||||
#else
|
||||
#define YY_PROTO(proto) ()
|
||||
/* we can't get here if it's an ANSI C compiler, or a C++ compiler,
|
||||
* so it's got to be a K&R compiler, and therefore there's no standard
|
||||
* place from which to include these definitions
|
||||
*/
|
||||
char *malloc();
|
||||
int free();
|
||||
int read();
|
||||
#endif
|
||||
|
||||
|
||||
/* amount of stuff to slurp up with each read */
|
||||
#ifndef YY_READ_BUF_SIZE
|
||||
#define YY_READ_BUF_SIZE 8192
|
||||
#endif
|
||||
|
||||
/* returned upon end-of-file */
|
||||
#define YY_END_TOK 0
|
||||
|
||||
/* copy whatever the last rule matched to the standard output */
|
||||
|
||||
/* cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */
|
||||
/* this used to be an fputs(), but since the string might contain NUL's,
|
||||
* we now use fwrite()
|
||||
*/
|
||||
#define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout )
|
||||
|
||||
/* gets input and stuffs it into "buf". number of characters read, or YY_NULL,
|
||||
* is returned in "result".
|
||||
*/
|
||||
#define YY_INPUT(buf,result,max_size) \
|
||||
if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \
|
||||
YY_FATAL_ERROR( "read() in flex scanner failed" );
|
||||
#define YY_NULL 0
|
||||
|
||||
/* no semi-colon after return; correct usage is to write "yyterminate();" -
|
||||
* we don't want an extra ';' after the "return" because that will cause
|
||||
* some compilers to complain about unreachable statements.
|
||||
*/
|
||||
#define yyterminate() return ( YY_NULL )
|
||||
|
||||
/* report a fatal error */
|
||||
|
||||
/* The funky do-while is used to turn this macro definition into
|
||||
* a single C statement (which needs a semi-colon terminator).
|
||||
* This avoids problems with code like:
|
||||
*
|
||||
* if ( something_happens )
|
||||
* YY_FATAL_ERROR( "oops, the something happened" );
|
||||
* else
|
||||
* everything_okay();
|
||||
*
|
||||
* Prior to using the do-while the compiler would get upset at the
|
||||
* "else" because it interpreted the "if" statement as being all
|
||||
* done when it reached the ';' after the YY_FATAL_ERROR() call.
|
||||
*/
|
||||
|
||||
#define YY_FATAL_ERROR(msg) \
|
||||
do \
|
||||
{ \
|
||||
(void) fputs( msg, stderr ); \
|
||||
(void) putc( '\n', stderr ); \
|
||||
exit( 1 ); \
|
||||
} \
|
||||
while ( 0 )
|
||||
|
||||
/* default yywrap function - always treat EOF as an EOF */
|
||||
#define yywrap() 1
|
||||
|
||||
/* enter a start condition. This macro really ought to take a parameter,
|
||||
* but we do it the disgusting crufty way forced on us by the ()-less
|
||||
* definition of BEGIN
|
||||
*/
|
||||
#define BEGIN yy_start = 1 + 2 *
|
||||
|
||||
/* action number for EOF rule of a given start state */
|
||||
#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
|
||||
|
||||
/* special action meaning "start processing a new file" */
|
||||
#define YY_NEW_FILE \
|
||||
do \
|
||||
{ \
|
||||
yy_init_buffer( yy_current_buffer, yyin ); \
|
||||
yy_load_buffer_state(); \
|
||||
} \
|
||||
while ( 0 )
|
||||
|
||||
/* default declaration of generated scanner - a define so the user can
|
||||
* easily add parameters
|
||||
*/
|
||||
#define YY_DECL int yylex YY_PROTO(( void ))
|
||||
|
||||
/* code executed at the end of each rule */
|
||||
#define YY_BREAK break;
|
||||
|
||||
#define YY_END_OF_BUFFER_CHAR 0
|
||||
|
||||
#ifndef YY_BUF_SIZE
|
||||
#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */
|
||||
#endif
|
||||
|
||||
typedef struct yy_buffer_state *YY_BUFFER_STATE;
|
||||
|
||||
%% section 1 definitions go here
|
||||
|
||||
/* done after the current pattern has been matched and before the
|
||||
* corresponding action - sets up yytext
|
||||
*/
|
||||
#define YY_DO_BEFORE_ACTION \
|
||||
yytext = yy_bp; \
|
||||
%% code to fiddle yytext and yyleng for yymore() goes here
|
||||
yy_hold_char = *yy_cp; \
|
||||
*yy_cp = '\0'; \
|
||||
yy_c_buf_p = yy_cp;
|
||||
|
||||
#define EOB_ACT_CONTINUE_SCAN 0
|
||||
#define EOB_ACT_END_OF_FILE 1
|
||||
#define EOB_ACT_LAST_MATCH 2
|
||||
|
||||
/* return all but the first 'n' matched characters back to the input stream */
|
||||
#define yyless(n) \
|
||||
do \
|
||||
{ \
|
||||
/* undo effects of setting up yytext */ \
|
||||
*yy_cp = yy_hold_char; \
|
||||
yy_c_buf_p = yy_cp = yy_bp + n; \
|
||||
YY_DO_BEFORE_ACTION; /* set up yytext again */ \
|
||||
} \
|
||||
while ( 0 )
|
||||
|
||||
#define unput(c) yyunput( c, yytext )
|
||||
|
||||
|
||||
struct yy_buffer_state
|
||||
{
|
||||
FILE *yy_input_file;
|
||||
|
||||
YY_CHAR *yy_ch_buf; /* input buffer */
|
||||
YY_CHAR *yy_buf_pos; /* current position in input buffer */
|
||||
|
||||
/* size of input buffer in bytes, not including room for EOB characters*/
|
||||
int yy_buf_size;
|
||||
|
||||
/* number of characters read into yy_ch_buf, not including EOB characters */
|
||||
int yy_n_chars;
|
||||
|
||||
int yy_eof_status; /* whether we've seen an EOF on this buffer */
|
||||
#define EOF_NOT_SEEN 0
|
||||
/* "pending" happens when the EOF has been seen but there's still
|
||||
* some text process
|
||||
*/
|
||||
#define EOF_PENDING 1
|
||||
#define EOF_DONE 2
|
||||
};
|
||||
|
||||
static YY_BUFFER_STATE yy_current_buffer;
|
||||
|
||||
/* we provide macros for accessing buffer states in case in the
|
||||
* future we want to put the buffer states in a more general
|
||||
* "scanner state"
|
||||
*/
|
||||
#define YY_CURRENT_BUFFER yy_current_buffer
|
||||
|
||||
|
||||
/* yy_hold_char holds the character lost when yytext is formed */
|
||||
static YY_CHAR yy_hold_char;
|
||||
|
||||
static int yy_n_chars; /* number of characters read into yy_ch_buf */
|
||||
|
||||
|
||||
|
||||
#ifndef YY_USER_ACTION
|
||||
#define YY_USER_ACTION
|
||||
#endif
|
||||
|
||||
#ifndef YY_USER_INIT
|
||||
#define YY_USER_INIT
|
||||
#endif
|
||||
|
||||
extern YY_CHAR *yytext;
|
||||
extern int yyleng;
|
||||
extern FILE *yyin, *yyout;
|
||||
|
||||
YY_CHAR *yytext;
|
||||
int yyleng;
|
||||
|
||||
FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
|
||||
|
||||
%% data tables for the DFA go here
|
||||
|
||||
/* these variables are all declared out here so that section 3 code can
|
||||
* manipulate them
|
||||
*/
|
||||
/* points to current character in buffer */
|
||||
static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0;
|
||||
static int yy_init = 1; /* whether we need to initialize */
|
||||
static int yy_start = 0; /* start state number */
|
||||
|
||||
/* flag which is used to allow yywrap()'s to do buffer switches
|
||||
* instead of setting up a fresh yyin. A bit of a hack ...
|
||||
*/
|
||||
static int yy_did_buffer_switch_on_eof;
|
||||
|
||||
static yy_state_type yy_get_previous_state YY_PROTO(( void ));
|
||||
static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
|
||||
static int yy_get_next_buffer YY_PROTO(( void ));
|
||||
static void yyunput YY_PROTO(( YY_CHAR c, YY_CHAR *buf_ptr ));
|
||||
void yyrestart YY_PROTO(( FILE *input_file ));
|
||||
void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
|
||||
void yy_load_buffer_state YY_PROTO(( void ));
|
||||
YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
|
||||
void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
|
||||
void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
|
||||
|
||||
#define yy_new_buffer yy_create_buffer
|
||||
|
||||
#ifdef __cplusplus
|
||||
static int yyinput YY_PROTO(( void ));
|
||||
#else
|
||||
static int input YY_PROTO(( void ));
|
||||
#endif
|
||||
|
||||
YY_DECL
|
||||
{
|
||||
register yy_state_type yy_current_state;
|
||||
register YY_CHAR *yy_cp, *yy_bp;
|
||||
register int yy_act;
|
||||
|
||||
%% user's declarations go here
|
||||
|
||||
if ( yy_init )
|
||||
{
|
||||
YY_USER_INIT;
|
||||
|
||||
if ( ! yy_start )
|
||||
yy_start = 1; /* first start state */
|
||||
|
||||
if ( ! yyin )
|
||||
yyin = stdin;
|
||||
|
||||
if ( ! yyout )
|
||||
yyout = stdout;
|
||||
|
||||
if ( yy_current_buffer )
|
||||
yy_init_buffer( yy_current_buffer, yyin );
|
||||
else
|
||||
yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
|
||||
|
||||
yy_load_buffer_state();
|
||||
|
||||
yy_init = 0;
|
||||
}
|
||||
|
||||
while ( 1 ) /* loops until end-of-file is reached */
|
||||
{
|
||||
%% yymore()-related code goes here
|
||||
yy_cp = yy_c_buf_p;
|
||||
|
||||
/* support of yytext */
|
||||
*yy_cp = yy_hold_char;
|
||||
|
||||
/* yy_bp points to the position in yy_ch_buf of the start of the
|
||||
* current run.
|
||||
*/
|
||||
yy_bp = yy_cp;
|
||||
|
||||
%% code to set up and find next match goes here
|
||||
|
||||
yy_find_action:
|
||||
%% code to find the action number goes here
|
||||
|
||||
YY_DO_BEFORE_ACTION;
|
||||
YY_USER_ACTION;
|
||||
|
||||
do_action: /* this label is used only to access EOF actions */
|
||||
|
||||
%% debug code goes here
|
||||
|
||||
switch ( yy_act )
|
||||
{
|
||||
%% actions go here
|
||||
|
||||
case YY_END_OF_BUFFER:
|
||||
{
|
||||
/* amount of text matched not including the EOB char */
|
||||
int yy_amount_of_matched_text = yy_cp - yytext - 1;
|
||||
|
||||
/* undo the effects of YY_DO_BEFORE_ACTION */
|
||||
*yy_cp = yy_hold_char;
|
||||
|
||||
/* note that here we test for yy_c_buf_p "<=" to the position
|
||||
* of the first EOB in the buffer, since yy_c_buf_p will
|
||||
* already have been incremented past the NUL character
|
||||
* (since all states make transitions on EOB to the end-
|
||||
* of-buffer state). Contrast this with the test in yyinput().
|
||||
*/
|
||||
if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
|
||||
/* this was really a NUL */
|
||||
{
|
||||
yy_state_type yy_next_state;
|
||||
|
||||
yy_c_buf_p = yytext + yy_amount_of_matched_text;
|
||||
|
||||
yy_current_state = yy_get_previous_state();
|
||||
|
||||
/* okay, we're now positioned to make the
|
||||
* NUL transition. We couldn't have
|
||||
* yy_get_previous_state() go ahead and do it
|
||||
* for us because it doesn't know how to deal
|
||||
* with the possibility of jamming (and we
|
||||
* don't want to build jamming into it because
|
||||
* then it will run more slowly)
|
||||
*/
|
||||
|
||||
yy_next_state = yy_try_NUL_trans( yy_current_state );
|
||||
|
||||
yy_bp = yytext + YY_MORE_ADJ;
|
||||
|
||||
if ( yy_next_state )
|
||||
{
|
||||
/* consume the NUL */
|
||||
yy_cp = ++yy_c_buf_p;
|
||||
yy_current_state = yy_next_state;
|
||||
goto yy_match;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
%% code to do backtracking for compressed tables and set up yy_cp goes here
|
||||
goto yy_find_action;
|
||||
}
|
||||
}
|
||||
|
||||
else switch ( yy_get_next_buffer() )
|
||||
{
|
||||
case EOB_ACT_END_OF_FILE:
|
||||
{
|
||||
yy_did_buffer_switch_on_eof = 0;
|
||||
|
||||
if ( yywrap() )
|
||||
{
|
||||
/* note: because we've taken care in
|
||||
* yy_get_next_buffer() to have set up yytext,
|
||||
* we can now set up yy_c_buf_p so that if some
|
||||
* total hoser (like flex itself) wants
|
||||
* to call the scanner after we return the
|
||||
* YY_NULL, it'll still work - another YY_NULL
|
||||
* will get returned.
|
||||
*/
|
||||
yy_c_buf_p = yytext + YY_MORE_ADJ;
|
||||
|
||||
yy_act = YY_STATE_EOF((yy_start - 1) / 2);
|
||||
goto do_action;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if ( ! yy_did_buffer_switch_on_eof )
|
||||
YY_NEW_FILE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case EOB_ACT_CONTINUE_SCAN:
|
||||
yy_c_buf_p = yytext + yy_amount_of_matched_text;
|
||||
|
||||
yy_current_state = yy_get_previous_state();
|
||||
|
||||
yy_cp = yy_c_buf_p;
|
||||
yy_bp = yytext + YY_MORE_ADJ;
|
||||
goto yy_match;
|
||||
|
||||
case EOB_ACT_LAST_MATCH:
|
||||
yy_c_buf_p =
|
||||
&yy_current_buffer->yy_ch_buf[yy_n_chars];
|
||||
|
||||
yy_current_state = yy_get_previous_state();
|
||||
|
||||
yy_cp = yy_c_buf_p;
|
||||
yy_bp = yytext + YY_MORE_ADJ;
|
||||
goto yy_find_action;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
#ifdef FLEX_DEBUG
|
||||
printf( "action # %d\n", yy_act );
|
||||
#endif
|
||||
YY_FATAL_ERROR(
|
||||
"fatal flex scanner internal error--no action found" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* yy_get_next_buffer - try to read in a new buffer
|
||||
*
|
||||
* synopsis
|
||||
* int yy_get_next_buffer();
|
||||
*
|
||||
* returns a code representing an action
|
||||
* EOB_ACT_LAST_MATCH -
|
||||
* EOB_ACT_CONTINUE_SCAN - continue scanning from current position
|
||||
* EOB_ACT_END_OF_FILE - end of file
|
||||
*/
|
||||
|
||||
static int yy_get_next_buffer()
|
||||
|
||||
{
|
||||
register YY_CHAR *dest = yy_current_buffer->yy_ch_buf;
|
||||
register YY_CHAR *source = yytext - 1; /* copy prev. char, too */
|
||||
register int number_to_move, i;
|
||||
int ret_val;
|
||||
|
||||
if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
|
||||
YY_FATAL_ERROR(
|
||||
"fatal flex scanner internal error--end of buffer missed" );
|
||||
|
||||
/* try to read more data */
|
||||
|
||||
/* first move last chars to start of buffer */
|
||||
number_to_move = yy_c_buf_p - yytext;
|
||||
|
||||
for ( i = 0; i < number_to_move; ++i )
|
||||
*(dest++) = *(source++);
|
||||
|
||||
if ( yy_current_buffer->yy_eof_status != EOF_NOT_SEEN )
|
||||
/* don't do the read, it's not guaranteed to return an EOF,
|
||||
* just force an EOF
|
||||
*/
|
||||
yy_n_chars = 0;
|
||||
|
||||
else
|
||||
{
|
||||
int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1;
|
||||
|
||||
if ( num_to_read > YY_READ_BUF_SIZE )
|
||||
num_to_read = YY_READ_BUF_SIZE;
|
||||
|
||||
else if ( num_to_read <= 0 )
|
||||
YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" );
|
||||
|
||||
/* read in more data */
|
||||
YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
|
||||
yy_n_chars, num_to_read );
|
||||
}
|
||||
|
||||
if ( yy_n_chars == 0 )
|
||||
{
|
||||
if ( number_to_move == 1 )
|
||||
{
|
||||
ret_val = EOB_ACT_END_OF_FILE;
|
||||
yy_current_buffer->yy_eof_status = EOF_DONE;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
ret_val = EOB_ACT_LAST_MATCH;
|
||||
yy_current_buffer->yy_eof_status = EOF_PENDING;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
ret_val = EOB_ACT_CONTINUE_SCAN;
|
||||
|
||||
yy_n_chars += number_to_move;
|
||||
yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
|
||||
yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
|
||||
|
||||
/* yytext begins at the second character in yy_ch_buf; the first
|
||||
* character is the one which preceded it before reading in the latest
|
||||
* buffer; it needs to be kept around in case it's a newline, so
|
||||
* yy_get_previous_state() will have with '^' rules active
|
||||
*/
|
||||
|
||||
yytext = &yy_current_buffer->yy_ch_buf[1];
|
||||
|
||||
return ( ret_val );
|
||||
}
|
||||
|
||||
|
||||
/* yy_get_previous_state - get the state just before the EOB char was reached
|
||||
*
|
||||
* synopsis
|
||||
* yy_state_type yy_get_previous_state();
|
||||
*/
|
||||
|
||||
static yy_state_type yy_get_previous_state()
|
||||
|
||||
{
|
||||
register yy_state_type yy_current_state;
|
||||
register YY_CHAR *yy_cp;
|
||||
|
||||
%% code to get the start state into yy_current_state goes here
|
||||
|
||||
for ( yy_cp = yytext + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
|
||||
{
|
||||
%% code to find the next state goes here
|
||||
}
|
||||
|
||||
return ( yy_current_state );
|
||||
}
|
||||
|
||||
|
||||
/* yy_try_NUL_trans - try to make a transition on the NUL character
|
||||
*
|
||||
* synopsis
|
||||
* next_state = yy_try_NUL_trans( current_state );
|
||||
*/
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
static yy_state_type yy_try_NUL_trans( register yy_state_type yy_current_state )
|
||||
#else
|
||||
static yy_state_type yy_try_NUL_trans( yy_current_state )
|
||||
register yy_state_type yy_current_state;
|
||||
#endif
|
||||
|
||||
{
|
||||
register int yy_is_jam;
|
||||
%% code to find the next state, and perhaps do backtracking, goes here
|
||||
|
||||
return ( yy_is_jam ? 0 : yy_current_state );
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
static void yyunput( YY_CHAR c, register YY_CHAR *yy_bp )
|
||||
#else
|
||||
static void yyunput( c, yy_bp )
|
||||
YY_CHAR c;
|
||||
register YY_CHAR *yy_bp;
|
||||
#endif
|
||||
|
||||
{
|
||||
register YY_CHAR *yy_cp = yy_c_buf_p;
|
||||
|
||||
/* undo effects of setting up yytext */
|
||||
*yy_cp = yy_hold_char;
|
||||
|
||||
if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
|
||||
{ /* need to shift things up to make room */
|
||||
register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */
|
||||
register YY_CHAR *dest =
|
||||
&yy_current_buffer->yy_ch_buf[yy_current_buffer->yy_buf_size + 2];
|
||||
register YY_CHAR *source =
|
||||
&yy_current_buffer->yy_ch_buf[number_to_move];
|
||||
|
||||
while ( source > yy_current_buffer->yy_ch_buf )
|
||||
*--dest = *--source;
|
||||
|
||||
yy_cp += dest - source;
|
||||
yy_bp += dest - source;
|
||||
yy_n_chars = yy_current_buffer->yy_buf_size;
|
||||
|
||||
if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
|
||||
YY_FATAL_ERROR( "flex scanner push-back overflow" );
|
||||
}
|
||||
|
||||
if ( yy_cp > yy_bp && yy_cp[-1] == '\n' )
|
||||
yy_cp[-2] = '\n';
|
||||
|
||||
*--yy_cp = c;
|
||||
|
||||
/* note: the formal parameter *must* be called "yy_bp" for this
|
||||
* macro to now work correctly
|
||||
*/
|
||||
YY_DO_BEFORE_ACTION; /* set up yytext again */
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
static int yyinput()
|
||||
#else
|
||||
static int input()
|
||||
#endif
|
||||
|
||||
{
|
||||
int c;
|
||||
YY_CHAR *yy_cp = yy_c_buf_p;
|
||||
|
||||
*yy_cp = yy_hold_char;
|
||||
|
||||
if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
|
||||
{
|
||||
/* yy_c_buf_p now points to the character we want to return.
|
||||
* If this occurs *before* the EOB characters, then it's a
|
||||
* valid NUL; if not, then we've hit the end of the buffer.
|
||||
*/
|
||||
if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
|
||||
/* this was really a NUL */
|
||||
*yy_c_buf_p = '\0';
|
||||
|
||||
else
|
||||
{ /* need more input */
|
||||
yytext = yy_c_buf_p;
|
||||
++yy_c_buf_p;
|
||||
|
||||
switch ( yy_get_next_buffer() )
|
||||
{
|
||||
case EOB_ACT_END_OF_FILE:
|
||||
{
|
||||
if ( yywrap() )
|
||||
{
|
||||
yy_c_buf_p = yytext + YY_MORE_ADJ;
|
||||
return ( EOF );
|
||||
}
|
||||
|
||||
YY_NEW_FILE;
|
||||
|
||||
#ifdef __cplusplus
|
||||
return ( yyinput() );
|
||||
#else
|
||||
return ( input() );
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
case EOB_ACT_CONTINUE_SCAN:
|
||||
yy_c_buf_p = yytext + YY_MORE_ADJ;
|
||||
break;
|
||||
|
||||
case EOB_ACT_LAST_MATCH:
|
||||
#ifdef __cplusplus
|
||||
YY_FATAL_ERROR( "unexpected last match in yyinput()" );
|
||||
#else
|
||||
YY_FATAL_ERROR( "unexpected last match in input()" );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c = *yy_c_buf_p;
|
||||
yy_hold_char = *++yy_c_buf_p;
|
||||
|
||||
return ( c );
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
void yyrestart( FILE *input_file )
|
||||
#else
|
||||
void yyrestart( input_file )
|
||||
FILE *input_file;
|
||||
#endif
|
||||
|
||||
{
|
||||
yy_init_buffer( yy_current_buffer, input_file );
|
||||
yy_load_buffer_state();
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
|
||||
#else
|
||||
void yy_switch_to_buffer( new_buffer )
|
||||
YY_BUFFER_STATE new_buffer;
|
||||
#endif
|
||||
|
||||
{
|
||||
if ( yy_current_buffer == new_buffer )
|
||||
return;
|
||||
|
||||
if ( yy_current_buffer )
|
||||
{
|
||||
/* flush out information for old buffer */
|
||||
*yy_c_buf_p = yy_hold_char;
|
||||
yy_current_buffer->yy_buf_pos = yy_c_buf_p;
|
||||
yy_current_buffer->yy_n_chars = yy_n_chars;
|
||||
}
|
||||
|
||||
yy_current_buffer = new_buffer;
|
||||
yy_load_buffer_state();
|
||||
|
||||
/* we don't actually know whether we did this switch during
|
||||
* EOF (yywrap()) processing, but the only time this flag
|
||||
* is looked at is after yywrap() is called, so it's safe
|
||||
* to go ahead and always set it.
|
||||
*/
|
||||
yy_did_buffer_switch_on_eof = 1;
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
void yy_load_buffer_state( void )
|
||||
#else
|
||||
void yy_load_buffer_state()
|
||||
#endif
|
||||
|
||||
{
|
||||
yy_n_chars = yy_current_buffer->yy_n_chars;
|
||||
yytext = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
|
||||
yyin = yy_current_buffer->yy_input_file;
|
||||
yy_hold_char = *yy_c_buf_p;
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
|
||||
#else
|
||||
YY_BUFFER_STATE yy_create_buffer( file, size )
|
||||
FILE *file;
|
||||
int size;
|
||||
#endif
|
||||
|
||||
{
|
||||
YY_BUFFER_STATE b;
|
||||
|
||||
b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) );
|
||||
|
||||
if ( ! b )
|
||||
YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
|
||||
|
||||
b->yy_buf_size = size;
|
||||
|
||||
/* yy_ch_buf has to be 2 characters longer than the size given because
|
||||
* we need to put in 2 end-of-buffer characters.
|
||||
*/
|
||||
b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) );
|
||||
|
||||
if ( ! b->yy_ch_buf )
|
||||
YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
|
||||
|
||||
yy_init_buffer( b, file );
|
||||
|
||||
return ( b );
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
void yy_delete_buffer( YY_BUFFER_STATE b )
|
||||
#else
|
||||
void yy_delete_buffer( b )
|
||||
YY_BUFFER_STATE b;
|
||||
#endif
|
||||
|
||||
{
|
||||
if ( b == yy_current_buffer )
|
||||
yy_current_buffer = (YY_BUFFER_STATE) 0;
|
||||
|
||||
free( (char *) b->yy_ch_buf );
|
||||
free( (char *) b );
|
||||
}
|
||||
|
||||
|
||||
#ifdef YY_USE_PROTOS
|
||||
void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
|
||||
#else
|
||||
void yy_init_buffer( b, file )
|
||||
YY_BUFFER_STATE b;
|
||||
FILE *file;
|
||||
#endif
|
||||
|
||||
{
|
||||
b->yy_input_file = file;
|
||||
|
||||
/* we put in the '\n' and start reading from [1] so that an
|
||||
* initial match-at-newline will be true.
|
||||
*/
|
||||
|
||||
b->yy_ch_buf[0] = '\n';
|
||||
b->yy_n_chars = 1;
|
||||
|
||||
/* we always need two end-of-buffer characters. The first causes
|
||||
* a transition to the end-of-buffer state. The second causes
|
||||
* a jam in that state.
|
||||
*/
|
||||
b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
|
||||
b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR;
|
||||
|
||||
b->yy_buf_pos = &b->yy_ch_buf[1];
|
||||
|
||||
b->yy_eof_status = EOF_NOT_SEEN;
|
||||
}
|
||||
|
||||
#ifdef ACK_MOD
|
||||
/* redefine yyless() so that it does not access local variables of YYDECL */
|
||||
|
||||
#undef yyless
|
||||
|
||||
/* return all but the first 'n' matched characters back to the input stream */
|
||||
#define yyless(n) \
|
||||
do \
|
||||
{ \
|
||||
/* undo effects of setting up yytext */ \
|
||||
yytext[yyleng] = yy_hold_char; \
|
||||
yy_c_buf_p = yytext + n; \
|
||||
yy_hold_char = *yy_c_buf_p; \
|
||||
*yy_c_buf_p = '\0'; \
|
||||
yyleng = n; \
|
||||
} \
|
||||
while ( 0 )
|
||||
|
||||
#endif ACK_MOD
|
877
util/flex/flexdef.h
Normal file
877
util/flex/flexdef.h
Normal file
|
@ -0,0 +1,877 @@
|
|||
/* flexdef - definitions file for flex */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* @(#) $Header$ (LBL) */
|
||||
|
||||
#ifndef FILE
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
/* always be prepared to generate an 8-bit scanner */
|
||||
#define FLEX_8_BIT_CHARS
|
||||
|
||||
#ifdef FLEX_8_BIT_CHARS
|
||||
#define CSIZE 256
|
||||
#define Char unsigned char
|
||||
#else
|
||||
#define Char char
|
||||
#define CSIZE 128
|
||||
#endif
|
||||
|
||||
/* size of input alphabet - should be size of ASCII set */
|
||||
#ifndef DEFAULT_CSIZE
|
||||
#define DEFAULT_CSIZE 128
|
||||
#endif
|
||||
|
||||
#ifndef PROTO
|
||||
#ifdef __STDC__
|
||||
#define PROTO(proto) proto
|
||||
#else
|
||||
#define PROTO(proto) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef ACK_MOD
|
||||
#ifdef USG
|
||||
#define SYS_V
|
||||
#endif
|
||||
|
||||
#ifdef SYS_V
|
||||
#include <string.h>
|
||||
#else
|
||||
|
||||
#include <strings.h>
|
||||
#ifdef lint
|
||||
char *sprintf(); /* keep lint happy */
|
||||
#endif
|
||||
#ifdef SCO_UNIX
|
||||
void *memset();
|
||||
#else
|
||||
char *memset();
|
||||
#endif
|
||||
#endif
|
||||
#else /* ACK_MOD */
|
||||
extern char *strcpy();
|
||||
#endif /* ACK_MOD */
|
||||
|
||||
#ifndef ACK_MOD
|
||||
#ifdef AMIGA
|
||||
#define bzero(s, n) setmem((char *)(s), n, '\0')
|
||||
#ifndef abs
|
||||
#define abs(x) ((x) < 0 ? -(x) : (x))
|
||||
#endif
|
||||
#else
|
||||
#define bzero(s, n) (void) memset((char *)(s), '\0', n)
|
||||
#endif
|
||||
#endif /* not ACK_MOD */
|
||||
|
||||
#ifdef VMS
|
||||
#define unlink delete
|
||||
#define SHORT_FILE_NAMES
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <stddef.h>
|
||||
void *malloc( size_t );
|
||||
void free( void* );
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#else /* ! __STDC__ */
|
||||
char *malloc(), *realloc();
|
||||
#endif
|
||||
|
||||
|
||||
/* maximum line length we'll have to deal with */
|
||||
#define MAXLINE BUFSIZ
|
||||
|
||||
/* maximum size of file name */
|
||||
#define FILENAMESIZE 1024
|
||||
|
||||
#ifndef min
|
||||
#define min(x,y) ((x) < (y) ? (x) : (y))
|
||||
#endif
|
||||
#ifndef max
|
||||
#define max(x,y) ((x) > (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifdef MS_DOS
|
||||
#ifndef abs
|
||||
#define abs(x) ((x) < 0 ? -(x) : (x))
|
||||
#endif
|
||||
#define SHORT_FILE_NAMES
|
||||
#endif
|
||||
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
|
||||
#ifndef DEFAULT_SKELETON_FILE
|
||||
#define DEFAULT_SKELETON_FILE "flex.skel"
|
||||
#endif
|
||||
|
||||
/* special chk[] values marking the slots taking by end-of-buffer and action
|
||||
* numbers
|
||||
*/
|
||||
#define EOB_POSITION -1
|
||||
#define ACTION_POSITION -2
|
||||
|
||||
/* number of data items per line for -f output */
|
||||
#define NUMDATAITEMS 10
|
||||
|
||||
/* number of lines of data in -f output before inserting a blank line for
|
||||
* readability.
|
||||
*/
|
||||
#define NUMDATALINES 10
|
||||
|
||||
/* transition_struct_out() definitions */
|
||||
#define TRANS_STRUCT_PRINT_LENGTH 15
|
||||
|
||||
/* returns true if an nfa state has an epsilon out-transition slot
|
||||
* that can be used. This definition is currently not used.
|
||||
*/
|
||||
#define FREE_EPSILON(state) \
|
||||
(transchar[state] == SYM_EPSILON && \
|
||||
trans2[state] == NO_TRANSITION && \
|
||||
finalst[state] != state)
|
||||
|
||||
/* returns true if an nfa state has an epsilon out-transition character
|
||||
* and both slots are free
|
||||
*/
|
||||
#define SUPER_FREE_EPSILON(state) \
|
||||
(transchar[state] == SYM_EPSILON && \
|
||||
trans1[state] == NO_TRANSITION) \
|
||||
|
||||
/* maximum number of NFA states that can comprise a DFA state. It's real
|
||||
* big because if there's a lot of rules, the initial state will have a
|
||||
* huge epsilon closure.
|
||||
*/
|
||||
#define INITIAL_MAX_DFA_SIZE 750
|
||||
#define MAX_DFA_SIZE_INCREMENT 750
|
||||
|
||||
|
||||
/* a note on the following masks. They are used to mark accepting numbers
|
||||
* as being special. As such, they implicitly limit the number of accepting
|
||||
* numbers (i.e., rules) because if there are too many rules the rule numbers
|
||||
* will overload the mask bits. Fortunately, this limit is \large/ (0x2000 ==
|
||||
* 8192) so unlikely to actually cause any problems. A check is made in
|
||||
* new_rule() to ensure that this limit is not reached.
|
||||
*/
|
||||
|
||||
/* mask to mark a trailing context accepting number */
|
||||
#define YY_TRAILING_MASK 0x2000
|
||||
|
||||
/* mask to mark the accepting number of the "head" of a trailing context rule */
|
||||
#define YY_TRAILING_HEAD_MASK 0x4000
|
||||
|
||||
/* maximum number of rules, as outlined in the above note */
|
||||
#define MAX_RULE (YY_TRAILING_MASK - 1)
|
||||
|
||||
|
||||
/* NIL must be 0. If not, its special meaning when making equivalence classes
|
||||
* (it marks the representative of a given e.c.) will be unidentifiable
|
||||
*/
|
||||
#define NIL 0
|
||||
|
||||
#define JAM -1 /* to mark a missing DFA transition */
|
||||
#define NO_TRANSITION NIL
|
||||
#define UNIQUE -1 /* marks a symbol as an e.c. representative */
|
||||
#define INFINITY -1 /* for x{5,} constructions */
|
||||
|
||||
#define INITIAL_MAX_CCLS 100 /* max number of unique character classes */
|
||||
#define MAX_CCLS_INCREMENT 100
|
||||
|
||||
/* size of table holding members of character classes */
|
||||
#define INITIAL_MAX_CCL_TBL_SIZE 500
|
||||
#define MAX_CCL_TBL_SIZE_INCREMENT 250
|
||||
|
||||
#define INITIAL_MAX_RULES 100 /* default maximum number of rules */
|
||||
#define MAX_RULES_INCREMENT 100
|
||||
|
||||
#define INITIAL_MNS 2000 /* default maximum number of nfa states */
|
||||
#define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */
|
||||
|
||||
#define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */
|
||||
#define MAX_DFAS_INCREMENT 1000
|
||||
|
||||
#define JAMSTATE -32766 /* marks a reference to the state that always jams */
|
||||
|
||||
/* enough so that if it's subtracted from an NFA state number, the result
|
||||
* is guaranteed to be negative
|
||||
*/
|
||||
#define MARKER_DIFFERENCE 32000
|
||||
#define MAXIMUM_MNS 31999
|
||||
|
||||
/* maximum number of nxt/chk pairs for non-templates */
|
||||
#define INITIAL_MAX_XPAIRS 2000
|
||||
#define MAX_XPAIRS_INCREMENT 2000
|
||||
|
||||
/* maximum number of nxt/chk pairs needed for templates */
|
||||
#define INITIAL_MAX_TEMPLATE_XPAIRS 2500
|
||||
#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500
|
||||
|
||||
#define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */
|
||||
|
||||
#define INITIAL_MAX_SCS 40 /* maximum number of start conditions */
|
||||
#define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */
|
||||
|
||||
#define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */
|
||||
#define SAME_TRANS -1 /* transition is the same as "default" entry for state */
|
||||
|
||||
/* the following percentages are used to tune table compression:
|
||||
|
||||
* the percentage the number of out-transitions a state must be of the
|
||||
* number of equivalence classes in order to be considered for table
|
||||
* compaction by using protos
|
||||
*/
|
||||
#define PROTO_SIZE_PERCENTAGE 15
|
||||
|
||||
/* the percentage the number of homogeneous out-transitions of a state
|
||||
* must be of the number of total out-transitions of the state in order
|
||||
* that the state's transition table is first compared with a potential
|
||||
* template of the most common out-transition instead of with the first
|
||||
* proto in the proto queue
|
||||
*/
|
||||
#define CHECK_COM_PERCENTAGE 50
|
||||
|
||||
/* the percentage the number of differences between a state's transition
|
||||
* table and the proto it was first compared with must be of the total
|
||||
* number of out-transitions of the state in order to keep the first
|
||||
* proto as a good match and not search any further
|
||||
*/
|
||||
#define FIRST_MATCH_DIFF_PERCENTAGE 10
|
||||
|
||||
/* the percentage the number of differences between a state's transition
|
||||
* table and the most similar proto must be of the state's total number
|
||||
* of out-transitions to use the proto as an acceptable close match
|
||||
*/
|
||||
#define ACCEPTABLE_DIFF_PERCENTAGE 50
|
||||
|
||||
/* the percentage the number of homogeneous out-transitions of a state
|
||||
* must be of the number of total out-transitions of the state in order
|
||||
* to consider making a template from the state
|
||||
*/
|
||||
#define TEMPLATE_SAME_PERCENTAGE 60
|
||||
|
||||
/* the percentage the number of differences between a state's transition
|
||||
* table and the most similar proto must be of the state's total number
|
||||
* of out-transitions to create a new proto from the state
|
||||
*/
|
||||
#define NEW_PROTO_DIFF_PERCENTAGE 20
|
||||
|
||||
/* the percentage the total number of out-transitions of a state must be
|
||||
* of the number of equivalence classes in order to consider trying to
|
||||
* fit the transition table into "holes" inside the nxt/chk table.
|
||||
*/
|
||||
#define INTERIOR_FIT_PERCENTAGE 15
|
||||
|
||||
/* size of region set aside to cache the complete transition table of
|
||||
* protos on the proto queue to enable quick comparisons
|
||||
*/
|
||||
#define PROT_SAVE_SIZE 2000
|
||||
|
||||
#define MSP 50 /* maximum number of saved protos (protos on the proto queue) */
|
||||
|
||||
/* maximum number of out-transitions a state can have that we'll rummage
|
||||
* around through the interior of the internal fast table looking for a
|
||||
* spot for it
|
||||
*/
|
||||
#define MAX_XTIONS_FULL_INTERIOR_FIT 4
|
||||
|
||||
/* maximum number of rules which will be reported as being associated
|
||||
* with a DFA state
|
||||
*/
|
||||
#define MAX_ASSOC_RULES 100
|
||||
|
||||
/* number that, if used to subscript an array, has a good chance of producing
|
||||
* an error; should be small enough to fit into a short
|
||||
*/
|
||||
#define BAD_SUBSCRIPT -32767
|
||||
|
||||
/* absolute value of largest number that can be stored in a short, with a
|
||||
* bit of slop thrown in for general paranoia.
|
||||
*/
|
||||
#define MAX_SHORT 32766
|
||||
|
||||
|
||||
/* Declarations for global variables. */
|
||||
|
||||
/* variables for symbol tables:
|
||||
* sctbl - start-condition symbol table
|
||||
* ndtbl - name-definition symbol table
|
||||
* ccltab - character class text symbol table
|
||||
*/
|
||||
|
||||
struct hash_entry
|
||||
{
|
||||
struct hash_entry *prev, *next;
|
||||
char *name;
|
||||
char *str_val;
|
||||
int int_val;
|
||||
} ;
|
||||
|
||||
typedef struct hash_entry *hash_table[];
|
||||
|
||||
#define NAME_TABLE_HASH_SIZE 101
|
||||
#define START_COND_HASH_SIZE 101
|
||||
#define CCL_HASH_SIZE 101
|
||||
|
||||
extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
|
||||
extern struct hash_entry *sctbl[START_COND_HASH_SIZE];
|
||||
extern struct hash_entry *ccltab[CCL_HASH_SIZE];
|
||||
|
||||
|
||||
/* variables for flags:
|
||||
* printstats - if true (-v), dump statistics
|
||||
* syntaxerror - true if a syntax error has been found
|
||||
* eofseen - true if we've seen an eof in the input file
|
||||
* ddebug - if true (-d), make a "debug" scanner
|
||||
* trace - if true (-T), trace processing
|
||||
* spprdflt - if true (-s), suppress the default rule
|
||||
* interactive - if true (-I), generate an interactive scanner
|
||||
* caseins - if true (-i), generate a case-insensitive scanner
|
||||
* useecs - if true (-Ce flag), use equivalence classes
|
||||
* fulltbl - if true (-Cf flag), don't compress the DFA state table
|
||||
* usemecs - if true (-Cm flag), use meta-equivalence classes
|
||||
* fullspd - if true (-F flag), use Jacobson method of table representation
|
||||
* gen_line_dirs - if true (i.e., no -L flag), generate #line directives
|
||||
* performance_report - if true (i.e., -p flag), generate a report relating
|
||||
* to scanner performance
|
||||
* backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file
|
||||
* listing backtracking states
|
||||
* csize - size of character set for the scanner we're generating;
|
||||
* 128 for 7-bit chars and 256 for 8-bit
|
||||
* yymore_used - if true, yymore() is used in input rules
|
||||
* reject - if true, generate backtracking tables for REJECT macro
|
||||
* real_reject - if true, scanner really uses REJECT (as opposed to just
|
||||
* having "reject" set for variable trailing context)
|
||||
* continued_action - true if this rule's action is to "fall through" to
|
||||
* the next rule's action (i.e., the '|' action)
|
||||
* yymore_really_used - has a REALLY_xxx value indicating whether a
|
||||
* %used or %notused was used with yymore()
|
||||
* reject_really_used - same for REJECT
|
||||
*/
|
||||
|
||||
extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
|
||||
extern int interactive, caseins, useecs, fulltbl, usemecs;
|
||||
extern int fullspd, gen_line_dirs, performance_report, backtrack_report, csize;
|
||||
extern int yymore_used, reject, real_reject, continued_action;
|
||||
|
||||
#define REALLY_NOT_DETERMINED 0
|
||||
#define REALLY_USED 1
|
||||
#define REALLY_NOT_USED 2
|
||||
extern int yymore_really_used, reject_really_used;
|
||||
|
||||
|
||||
/* variables used in the flex input routines:
|
||||
* datapos - characters on current output line
|
||||
* dataline - number of contiguous lines of data in current data
|
||||
* statement. Used to generate readable -f output
|
||||
* linenum - current input line number
|
||||
* skelfile - the skeleton file
|
||||
* yyin - input file
|
||||
* temp_action_file - temporary file to hold actions
|
||||
* backtrack_file - file to summarize backtracking states to
|
||||
* infilename - name of input file
|
||||
* action_file_name - name of the temporary file
|
||||
* input_files - array holding names of input files
|
||||
* num_input_files - size of input_files array
|
||||
* program_name - name with which program was invoked
|
||||
*/
|
||||
|
||||
extern int datapos, dataline, linenum;
|
||||
extern FILE *skelfile, *yyin, *temp_action_file, *backtrack_file;
|
||||
extern char *infilename;
|
||||
extern char *action_file_name;
|
||||
extern char **input_files;
|
||||
extern int num_input_files;
|
||||
extern char *program_name;
|
||||
|
||||
|
||||
/* variables for stack of states having only one out-transition:
|
||||
* onestate - state number
|
||||
* onesym - transition symbol
|
||||
* onenext - target state
|
||||
* onedef - default base entry
|
||||
* onesp - stack pointer
|
||||
*/
|
||||
|
||||
extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
|
||||
extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
|
||||
|
||||
|
||||
/* variables for nfa machine data:
|
||||
* current_mns - current maximum on number of NFA states
|
||||
* num_rules - number of the last accepting state; also is number of
|
||||
* rules created so far
|
||||
* current_max_rules - current maximum number of rules
|
||||
* lastnfa - last nfa state number created
|
||||
* firstst - physically the first state of a fragment
|
||||
* lastst - last physical state of fragment
|
||||
* finalst - last logical state of fragment
|
||||
* transchar - transition character
|
||||
* trans1 - transition state
|
||||
* trans2 - 2nd transition state for epsilons
|
||||
* accptnum - accepting number
|
||||
* assoc_rule - rule associated with this NFA state (or 0 if none)
|
||||
* state_type - a STATE_xxx type identifying whether the state is part
|
||||
* of a normal rule, the leading state in a trailing context
|
||||
* rule (i.e., the state which marks the transition from
|
||||
* recognizing the text-to-be-matched to the beginning of
|
||||
* the trailing context), or a subsequent state in a trailing
|
||||
* context rule
|
||||
* rule_type - a RULE_xxx type identifying whether this a a ho-hum
|
||||
* normal rule or one which has variable head & trailing
|
||||
* context
|
||||
* rule_linenum - line number associated with rule
|
||||
*/
|
||||
|
||||
extern int current_mns, num_rules, current_max_rules, lastnfa;
|
||||
extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
|
||||
extern int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum;
|
||||
|
||||
/* different types of states; values are useful as masks, as well, for
|
||||
* routines like check_trailing_context()
|
||||
*/
|
||||
#define STATE_NORMAL 0x1
|
||||
#define STATE_TRAILING_CONTEXT 0x2
|
||||
|
||||
/* global holding current type of state we're making */
|
||||
|
||||
extern int current_state_type;
|
||||
|
||||
/* different types of rules */
|
||||
#define RULE_NORMAL 0
|
||||
#define RULE_VARIABLE 1
|
||||
|
||||
/* true if the input rules include a rule with both variable-length head
|
||||
* and trailing context, false otherwise
|
||||
*/
|
||||
extern int variable_trailing_context_rules;
|
||||
|
||||
|
||||
/* variables for protos:
|
||||
* numtemps - number of templates created
|
||||
* numprots - number of protos created
|
||||
* protprev - backlink to a more-recently used proto
|
||||
* protnext - forward link to a less-recently used proto
|
||||
* prottbl - base/def table entry for proto
|
||||
* protcomst - common state of proto
|
||||
* firstprot - number of the most recently used proto
|
||||
* lastprot - number of the least recently used proto
|
||||
* protsave contains the entire state array for protos
|
||||
*/
|
||||
|
||||
extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
|
||||
extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
|
||||
|
||||
|
||||
/* variables for managing equivalence classes:
|
||||
* numecs - number of equivalence classes
|
||||
* nextecm - forward link of Equivalence Class members
|
||||
* ecgroup - class number or backward link of EC members
|
||||
* nummecs - number of meta-equivalence classes (used to compress
|
||||
* templates)
|
||||
* tecfwd - forward link of meta-equivalence classes members
|
||||
* tecbck - backward link of MEC's
|
||||
* xlation - maps character codes to their translations, or nil if no %t table
|
||||
* num_xlations - number of different xlation values
|
||||
*/
|
||||
|
||||
/* reserve enough room in the equivalence class arrays so that we
|
||||
* can use the CSIZE'th element to hold equivalence class information
|
||||
* for the NUL character. Later we'll move this information into
|
||||
* the 0th element.
|
||||
*/
|
||||
extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs;
|
||||
|
||||
/* meta-equivalence classes are indexed starting at 1, so it's possible
|
||||
* that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1
|
||||
* slots total (since the arrays are 0-based). nextecm[] and ecgroup[]
|
||||
* don't require the extra position since they're indexed from 1 .. CSIZE - 1.
|
||||
*/
|
||||
extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1];
|
||||
|
||||
extern int *xlation;
|
||||
extern int num_xlations;
|
||||
|
||||
|
||||
/* variables for start conditions:
|
||||
* lastsc - last start condition created
|
||||
* current_max_scs - current limit on number of start conditions
|
||||
* scset - set of rules active in start condition
|
||||
* scbol - set of rules active only at the beginning of line in a s.c.
|
||||
* scxclu - true if start condition is exclusive
|
||||
* sceof - true if start condition has EOF rule
|
||||
* scname - start condition name
|
||||
* actvsc - stack of active start conditions for the current rule
|
||||
*/
|
||||
|
||||
extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
|
||||
extern char **scname;
|
||||
|
||||
|
||||
/* variables for dfa machine data:
|
||||
* current_max_dfa_size - current maximum number of NFA states in DFA
|
||||
* current_max_xpairs - current maximum number of non-template xtion pairs
|
||||
* current_max_template_xpairs - current maximum number of template pairs
|
||||
* current_max_dfas - current maximum number DFA states
|
||||
* lastdfa - last dfa state number created
|
||||
* nxt - state to enter upon reading character
|
||||
* chk - check value to see if "nxt" applies
|
||||
* tnxt - internal nxt table for templates
|
||||
* base - offset into "nxt" for given state
|
||||
* def - where to go if "chk" disallows "nxt" entry
|
||||
* nultrans - NUL transition for each state
|
||||
* NUL_ec - equivalence class of the NUL character
|
||||
* tblend - last "nxt/chk" table entry being used
|
||||
* firstfree - first empty entry in "nxt/chk" table
|
||||
* dss - nfa state set for each dfa
|
||||
* dfasiz - size of nfa state set for each dfa
|
||||
* dfaacc - accepting set for each dfa state (or accepting number, if
|
||||
* -r is not given)
|
||||
* accsiz - size of accepting set for each dfa state
|
||||
* dhash - dfa state hash value
|
||||
* numas - number of DFA accepting states created; note that this
|
||||
* is not necessarily the same value as num_rules, which is the analogous
|
||||
* value for the NFA
|
||||
* numsnpairs - number of state/nextstate transition pairs
|
||||
* jambase - position in base/def where the default jam table starts
|
||||
* jamstate - state number corresponding to "jam" state
|
||||
* end_of_buffer_state - end-of-buffer dfa state number
|
||||
*/
|
||||
|
||||
extern int current_max_dfa_size, current_max_xpairs;
|
||||
extern int current_max_template_xpairs, current_max_dfas;
|
||||
extern int lastdfa, lasttemp, *nxt, *chk, *tnxt;
|
||||
extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz;
|
||||
extern union dfaacc_union
|
||||
{
|
||||
int *dfaacc_set;
|
||||
int dfaacc_state;
|
||||
} *dfaacc;
|
||||
extern int *accsiz, *dhash, numas;
|
||||
extern int numsnpairs, jambase, jamstate;
|
||||
extern int end_of_buffer_state;
|
||||
|
||||
/* variables for ccl information:
|
||||
* lastccl - ccl index of the last created ccl
|
||||
* current_maxccls - current limit on the maximum number of unique ccl's
|
||||
* cclmap - maps a ccl index to its set pointer
|
||||
* ccllen - gives the length of a ccl
|
||||
* cclng - true for a given ccl if the ccl is negated
|
||||
* cclreuse - counts how many times a ccl is re-used
|
||||
* current_max_ccl_tbl_size - current limit on number of characters needed
|
||||
* to represent the unique ccl's
|
||||
* ccltbl - holds the characters in each ccl - indexed by cclmap
|
||||
*/
|
||||
|
||||
extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
|
||||
extern int current_max_ccl_tbl_size;
|
||||
extern Char *ccltbl;
|
||||
|
||||
|
||||
/* variables for miscellaneous information:
|
||||
* starttime - real-time when we started
|
||||
* endtime - real-time when we ended
|
||||
* nmstr - last NAME scanned by the scanner
|
||||
* sectnum - section number currently being parsed
|
||||
* nummt - number of empty nxt/chk table entries
|
||||
* hshcol - number of hash collisions detected by snstods
|
||||
* dfaeql - number of times a newly created dfa was equal to an old one
|
||||
* numeps - number of epsilon NFA states created
|
||||
* eps2 - number of epsilon states which have 2 out-transitions
|
||||
* num_reallocs - number of times it was necessary to realloc() a group
|
||||
* of arrays
|
||||
* tmpuses - number of DFA states that chain to templates
|
||||
* totnst - total number of NFA states used to make DFA states
|
||||
* peakpairs - peak number of transition pairs we had to store internally
|
||||
* numuniq - number of unique transitions
|
||||
* numdup - number of duplicate transitions
|
||||
* hshsave - number of hash collisions saved by checking number of states
|
||||
* num_backtracking - number of DFA states requiring back-tracking
|
||||
* bol_needed - whether scanner needs beginning-of-line recognition
|
||||
*/
|
||||
|
||||
extern char *starttime, *endtime, nmstr[MAXLINE];
|
||||
extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
|
||||
extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
|
||||
extern int num_backtracking, bol_needed;
|
||||
|
||||
void *allocate_array(), *reallocate_array();
|
||||
|
||||
#define allocate_integer_array(size) \
|
||||
(int *) allocate_array( size, sizeof( int ) )
|
||||
|
||||
#define reallocate_integer_array(array,size) \
|
||||
(int *) reallocate_array( (void *) array, size, sizeof( int ) )
|
||||
|
||||
#define allocate_int_ptr_array(size) \
|
||||
(int **) allocate_array( size, sizeof( int * ) )
|
||||
|
||||
#define allocate_char_ptr_array(size) \
|
||||
(char **) allocate_array( size, sizeof( char * ) )
|
||||
|
||||
#define allocate_dfaacc_union(size) \
|
||||
(union dfaacc_union *) \
|
||||
allocate_array( size, sizeof( union dfaacc_union ) )
|
||||
|
||||
#define reallocate_int_ptr_array(array,size) \
|
||||
(int **) reallocate_array( (void *) array, size, sizeof( int * ) )
|
||||
|
||||
#define reallocate_char_ptr_array(array,size) \
|
||||
(char **) reallocate_array( (void *) array, size, sizeof( char * ) )
|
||||
|
||||
#define reallocate_dfaacc_union(array, size) \
|
||||
(union dfaacc_union *) \
|
||||
reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) )
|
||||
|
||||
#define allocate_character_array(size) \
|
||||
(Char *) allocate_array( size, sizeof( Char ) )
|
||||
|
||||
#define reallocate_character_array(array,size) \
|
||||
(Char *) reallocate_array( (void *) array, size, sizeof( Char ) )
|
||||
|
||||
|
||||
/* used to communicate between scanner and parser. The type should really
|
||||
* be YYSTYPE, but we can't easily get our hands on it.
|
||||
*/
|
||||
extern int yylval;
|
||||
|
||||
|
||||
/* external functions that are cross-referenced among the flex source files */
|
||||
|
||||
|
||||
/* from file ccl.c */
|
||||
|
||||
extern void ccladd PROTO((int, int)); /* Add a single character to a ccl */
|
||||
extern int cclinit PROTO(()); /* make an empty ccl */
|
||||
extern void cclnegate PROTO((int)); /* negate a ccl */
|
||||
|
||||
/* list the members of a set of characters in CCL form */
|
||||
extern void list_character_set PROTO((FILE*, int[]));
|
||||
|
||||
|
||||
/* from file dfa.c */
|
||||
|
||||
/* increase the maximum number of dfas */
|
||||
extern void increase_max_dfas PROTO(());
|
||||
|
||||
extern void ntod PROTO(()); /* convert a ndfa to a dfa */
|
||||
|
||||
|
||||
/* from file ecs.c */
|
||||
|
||||
/* convert character classes to set of equivalence classes */
|
||||
extern void ccl2ecl PROTO(());
|
||||
|
||||
/* associate equivalence class numbers with class members */
|
||||
extern int cre8ecs PROTO((int[], int[], int));
|
||||
|
||||
/* associate equivalence class numbers using %t table */
|
||||
extern int ecs_from_xlation PROTO((int[]));
|
||||
|
||||
/* update equivalence classes based on character class transitions */
|
||||
extern void mkeccl PROTO((Char[], int, int[], int[], int, int));
|
||||
|
||||
/* create equivalence class for single character */
|
||||
extern void mkechar PROTO((int, int[], int[]));
|
||||
|
||||
|
||||
/* from file gen.c */
|
||||
|
||||
extern void make_tables PROTO(()); /* generate transition tables */
|
||||
|
||||
|
||||
/* from file main.c */
|
||||
|
||||
extern void flexend PROTO((int));
|
||||
|
||||
|
||||
/* from file misc.c */
|
||||
|
||||
/* write out the actions from the temporary file to lex.yy.c */
|
||||
extern void action_out PROTO(());
|
||||
|
||||
/* true if a string is all lower case */
|
||||
extern int all_lower PROTO((register Char *));
|
||||
|
||||
/* true if a string is all upper case */
|
||||
extern int all_upper PROTO((register Char *));
|
||||
|
||||
/* bubble sort an integer array */
|
||||
extern void bubble PROTO((int [], int));
|
||||
|
||||
/* shell sort a character array */
|
||||
extern void cshell PROTO((Char [], int, int));
|
||||
|
||||
extern void dataend PROTO(()); /* finish up a block of data declarations */
|
||||
|
||||
/* report an error message and terminate */
|
||||
extern void flexerror PROTO((char[]));
|
||||
|
||||
/* report a fatal error message and terminate */
|
||||
extern void flexfatal PROTO((char[]));
|
||||
|
||||
/* report an error message formatted with one integer argument */
|
||||
extern void lerrif PROTO((char[], int));
|
||||
|
||||
/* report an error message formatted with one string argument */
|
||||
extern void lerrsf PROTO((char[], char[]));
|
||||
|
||||
/* spit out a "# line" statement */
|
||||
extern void line_directive_out PROTO((FILE*));
|
||||
|
||||
/* generate a data statment for a two-dimensional array */
|
||||
extern void mk2data PROTO((int));
|
||||
|
||||
extern void mkdata PROTO((int)); /* generate a data statement */
|
||||
|
||||
/* return the integer represented by a string of digits */
|
||||
extern int myctoi PROTO((Char []));
|
||||
|
||||
/* write out one section of the skeleton file */
|
||||
extern void skelout PROTO(());
|
||||
|
||||
/* output a yy_trans_info structure */
|
||||
extern void transition_struct_out PROTO((int, int));
|
||||
|
||||
|
||||
/* from file nfa.c */
|
||||
|
||||
/* add an accepting state to a machine */
|
||||
extern void add_accept PROTO((int, int));
|
||||
|
||||
/* make a given number of copies of a singleton machine */
|
||||
extern int copysingl PROTO((int, int));
|
||||
|
||||
/* debugging routine to write out an nfa */
|
||||
extern void dumpnfa PROTO((int));
|
||||
|
||||
/* finish up the processing for a rule */
|
||||
extern void finish_rule PROTO((int, int, int, int));
|
||||
|
||||
/* connect two machines together */
|
||||
extern int link_machines PROTO((int, int));
|
||||
|
||||
/* mark each "beginning" state in a machine as being a "normal" (i.e.,
|
||||
* not trailing context associated) state
|
||||
*/
|
||||
extern void mark_beginning_as_normal PROTO((register int));
|
||||
|
||||
/* make a machine that branches to two machines */
|
||||
extern int mkbranch PROTO((int, int));
|
||||
|
||||
extern int mkclos PROTO((int)); /* convert a machine into a closure */
|
||||
extern int mkopt PROTO((int)); /* make a machine optional */
|
||||
|
||||
/* make a machine that matches either one of two machines */
|
||||
extern int mkor PROTO((int, int));
|
||||
|
||||
/* convert a machine into a positive closure */
|
||||
extern int mkposcl PROTO((int));
|
||||
|
||||
extern int mkrep PROTO((int, int, int)); /* make a replicated machine */
|
||||
|
||||
/* create a state with a transition on a given symbol */
|
||||
extern int mkstate PROTO((int));
|
||||
|
||||
extern void new_rule PROTO(()); /* initialize for a new rule */
|
||||
|
||||
|
||||
/* from file parse.y */
|
||||
|
||||
/* write out a message formatted with one string, pinpointing its location */
|
||||
extern void format_pinpoint_message PROTO((char[], char[]));
|
||||
|
||||
/* write out a message, pinpointing its location */
|
||||
extern void pinpoint_message PROTO((char[]));
|
||||
|
||||
extern void synerr PROTO((char [])); /* report a syntax error */
|
||||
extern int yyparse PROTO(()); /* the YACC parser */
|
||||
|
||||
|
||||
/* from file scan.l */
|
||||
|
||||
extern int flexscan PROTO(()); /* the Flex-generated scanner for flex */
|
||||
|
||||
/* open the given file (if NULL, stdin) for scanning */
|
||||
extern void set_input_file PROTO((char*));
|
||||
|
||||
extern int yywrap PROTO(()); /* wrapup a file in the lexical analyzer */
|
||||
|
||||
|
||||
/* from file sym.c */
|
||||
|
||||
/* save the text of a character class */
|
||||
extern void cclinstal PROTO ((Char [], int));
|
||||
|
||||
/* lookup the number associated with character class */
|
||||
extern int ccllookup PROTO((Char []));
|
||||
|
||||
extern void ndinstal PROTO((char[], Char[])); /* install a name definition */
|
||||
extern void scinstal PROTO((char[], int)); /* make a start condition */
|
||||
|
||||
/* lookup the number associated with a start condition */
|
||||
extern int sclookup PROTO((char[]));
|
||||
|
||||
|
||||
/* from file tblcmp.c */
|
||||
|
||||
/* build table entries for dfa state */
|
||||
extern void bldtbl PROTO((int[], int, int, int, int));
|
||||
|
||||
extern void cmptmps PROTO(()); /* compress template table entries */
|
||||
extern void inittbl PROTO(()); /* initialize transition tables */
|
||||
extern void mkdeftbl PROTO(()); /* make the default, "jam" table entries */
|
||||
|
||||
/* create table entries for a state (or state fragment) which has
|
||||
* only one out-transition */
|
||||
extern void mk1tbl PROTO((int, int, int, int));
|
||||
|
||||
/* place a state into full speed transition table */
|
||||
extern void place_state PROTO((int*, int, int));
|
||||
|
||||
/* save states with only one out-transition to be processed later */
|
||||
extern void stack1 PROTO((int, int, int, int));
|
||||
|
||||
|
||||
/* from file yylex.c */
|
||||
|
||||
extern int yylex PROTO(());
|
||||
|
||||
|
||||
/* The Unix kernel calls used here */
|
||||
|
||||
extern int read PROTO((int, char*, int));
|
||||
extern int unlink PROTO((char*));
|
||||
extern int write PROTO((int, char*, int));
|
2446
util/flex/flexdoc.1
Normal file
2446
util/flex/flexdoc.1
Normal file
File diff suppressed because it is too large
Load diff
1336
util/flex/gen.c
Normal file
1336
util/flex/gen.c
Normal file
File diff suppressed because it is too large
Load diff
2294
util/flex/initscan.c
Normal file
2294
util/flex/initscan.c
Normal file
File diff suppressed because it is too large
Load diff
13
util/flex/libmain.c
Normal file
13
util/flex/libmain.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
/* libmain - flex run-time support library "main" function */
|
||||
|
||||
/* $Header$ */
|
||||
|
||||
extern int yylex();
|
||||
|
||||
int main( argc, argv )
|
||||
int argc;
|
||||
char *argv[];
|
||||
|
||||
{
|
||||
return yylex();
|
||||
}
|
769
util/flex/main.c
Normal file
769
util/flex/main.c
Normal file
|
@ -0,0 +1,769 @@
|
|||
/* flex - tool to generate fast lexical analyzers */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
char copyright[] =
|
||||
"@(#) Copyright (c) 1990 The Regents of the University of California.\n\
|
||||
All rights reserved.\n";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
static char flex_version[] = "2.3";
|
||||
|
||||
|
||||
/* declare functions that have forward references */
|
||||
|
||||
void flexinit PROTO((int, char**));
|
||||
void readin PROTO(());
|
||||
void set_up_initial_allocations PROTO(());
|
||||
|
||||
|
||||
/* these globals are all defined and commented in flexdef.h */
|
||||
int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
|
||||
int interactive, caseins, useecs, fulltbl, usemecs;
|
||||
int fullspd, gen_line_dirs, performance_report, backtrack_report, csize;
|
||||
int yymore_used, reject, real_reject, continued_action;
|
||||
int yymore_really_used, reject_really_used;
|
||||
int datapos, dataline, linenum;
|
||||
FILE *skelfile = NULL;
|
||||
char *infilename = NULL;
|
||||
int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
|
||||
int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
|
||||
int current_mns, num_rules, current_max_rules, lastnfa;
|
||||
int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
|
||||
int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum;
|
||||
int current_state_type;
|
||||
int variable_trailing_context_rules;
|
||||
int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
|
||||
int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
|
||||
int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
|
||||
int tecbck[CSIZE + 1];
|
||||
int *xlation = (int *) 0;
|
||||
int num_xlations;
|
||||
int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
|
||||
char **scname;
|
||||
int current_max_dfa_size, current_max_xpairs;
|
||||
int current_max_template_xpairs, current_max_dfas;
|
||||
int lastdfa, *nxt, *chk, *tnxt;
|
||||
int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz;
|
||||
union dfaacc_union *dfaacc;
|
||||
int *accsiz, *dhash, numas;
|
||||
int numsnpairs, jambase, jamstate;
|
||||
int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
|
||||
int current_max_ccl_tbl_size;
|
||||
Char *ccltbl;
|
||||
char *starttime, *endtime, nmstr[MAXLINE];
|
||||
int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
|
||||
int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
|
||||
int num_backtracking, bol_needed;
|
||||
FILE *temp_action_file;
|
||||
FILE *backtrack_file;
|
||||
int end_of_buffer_state;
|
||||
char *action_file_name = NULL;
|
||||
char **input_files;
|
||||
int num_input_files;
|
||||
char *program_name;
|
||||
|
||||
#ifndef SHORT_FILE_NAMES
|
||||
static char *outfile = "lex.yy.c";
|
||||
#else
|
||||
static char *outfile = "lexyy.c";
|
||||
#endif
|
||||
static int outfile_created = 0;
|
||||
static int use_stdout;
|
||||
static char *skelname = NULL;
|
||||
|
||||
|
||||
int main( argc, argv )
|
||||
int argc;
|
||||
char **argv;
|
||||
|
||||
{
|
||||
flexinit( argc, argv );
|
||||
|
||||
readin();
|
||||
|
||||
if ( syntaxerror )
|
||||
flexend( 1 );
|
||||
|
||||
if ( yymore_really_used == REALLY_USED )
|
||||
yymore_used = true;
|
||||
else if ( yymore_really_used == REALLY_NOT_USED )
|
||||
yymore_used = false;
|
||||
|
||||
if ( reject_really_used == REALLY_USED )
|
||||
reject = true;
|
||||
else if ( reject_really_used == REALLY_NOT_USED )
|
||||
reject = false;
|
||||
|
||||
if ( performance_report )
|
||||
{
|
||||
if ( interactive )
|
||||
fprintf( stderr,
|
||||
"-I (interactive) entails a minor performance penalty\n" );
|
||||
|
||||
if ( yymore_used )
|
||||
fprintf( stderr, "yymore() entails a minor performance penalty\n" );
|
||||
|
||||
if ( reject )
|
||||
fprintf( stderr, "REJECT entails a large performance penalty\n" );
|
||||
|
||||
if ( variable_trailing_context_rules )
|
||||
fprintf( stderr,
|
||||
"Variable trailing context rules entail a large performance penalty\n" );
|
||||
}
|
||||
|
||||
if ( reject )
|
||||
real_reject = true;
|
||||
|
||||
if ( variable_trailing_context_rules )
|
||||
reject = true;
|
||||
|
||||
if ( (fulltbl || fullspd) && reject )
|
||||
{
|
||||
if ( real_reject )
|
||||
flexerror( "REJECT cannot be used with -f or -F" );
|
||||
else
|
||||
flexerror(
|
||||
"variable trailing context rules cannot be used with -f or -F" );
|
||||
}
|
||||
|
||||
ntod();
|
||||
|
||||
/* generate the C state transition tables from the DFA */
|
||||
make_tables();
|
||||
|
||||
/* note, flexend does not return. It exits with its argument as status. */
|
||||
|
||||
flexend( 0 );
|
||||
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
|
||||
/* flexend - terminate flex
|
||||
*
|
||||
* synopsis
|
||||
* int status;
|
||||
* flexend( status );
|
||||
*
|
||||
* status is exit status.
|
||||
*
|
||||
* note
|
||||
* This routine does not return.
|
||||
*/
|
||||
|
||||
void flexend( status )
|
||||
int status;
|
||||
|
||||
{
|
||||
int tblsiz;
|
||||
char *flex_gettime();
|
||||
|
||||
if ( skelfile != NULL )
|
||||
{
|
||||
if ( ferror( skelfile ) )
|
||||
flexfatal( "error occurred when writing skeleton file" );
|
||||
|
||||
else if ( fclose( skelfile ) )
|
||||
flexfatal( "error occurred when closing skeleton file" );
|
||||
}
|
||||
|
||||
if ( temp_action_file )
|
||||
{
|
||||
if ( ferror( temp_action_file ) )
|
||||
flexfatal( "error occurred when writing temporary action file" );
|
||||
|
||||
else if ( fclose( temp_action_file ) )
|
||||
flexfatal( "error occurred when closing temporary action file" );
|
||||
|
||||
else if ( unlink( action_file_name ) )
|
||||
flexfatal( "error occurred when deleting temporary action file" );
|
||||
}
|
||||
|
||||
if ( status != 0 && outfile_created )
|
||||
{
|
||||
if ( ferror( stdout ) )
|
||||
flexfatal( "error occurred when writing output file" );
|
||||
|
||||
else if ( fclose( stdout ) )
|
||||
flexfatal( "error occurred when closing output file" );
|
||||
|
||||
else if ( unlink( outfile ) )
|
||||
flexfatal( "error occurred when deleting output file" );
|
||||
}
|
||||
|
||||
if ( backtrack_report && backtrack_file )
|
||||
{
|
||||
if ( num_backtracking == 0 )
|
||||
fprintf( backtrack_file, "No backtracking.\n" );
|
||||
else if ( fullspd || fulltbl )
|
||||
fprintf( backtrack_file,
|
||||
"%d backtracking (non-accepting) states.\n",
|
||||
num_backtracking );
|
||||
else
|
||||
fprintf( backtrack_file, "Compressed tables always backtrack.\n" );
|
||||
|
||||
if ( ferror( backtrack_file ) )
|
||||
flexfatal( "error occurred when writing backtracking file" );
|
||||
|
||||
else if ( fclose( backtrack_file ) )
|
||||
flexfatal( "error occurred when closing backtracking file" );
|
||||
}
|
||||
|
||||
if ( printstats )
|
||||
{
|
||||
endtime = flex_gettime();
|
||||
|
||||
fprintf( stderr, "%s version %s usage statistics:\n", program_name,
|
||||
flex_version );
|
||||
fprintf( stderr, " started at %s, finished at %s\n",
|
||||
starttime, endtime );
|
||||
|
||||
fprintf( stderr, " scanner options: -" );
|
||||
|
||||
if ( backtrack_report )
|
||||
putc( 'b', stderr );
|
||||
if ( ddebug )
|
||||
putc( 'd', stderr );
|
||||
if ( interactive )
|
||||
putc( 'I', stderr );
|
||||
if ( caseins )
|
||||
putc( 'i', stderr );
|
||||
if ( ! gen_line_dirs )
|
||||
putc( 'L', stderr );
|
||||
if ( performance_report )
|
||||
putc( 'p', stderr );
|
||||
if ( spprdflt )
|
||||
putc( 's', stderr );
|
||||
if ( use_stdout )
|
||||
putc( 't', stderr );
|
||||
if ( trace )
|
||||
putc( 'T', stderr );
|
||||
if ( printstats )
|
||||
putc( 'v', stderr ); /* always true! */
|
||||
if ( csize == 256 )
|
||||
putc( '8', stderr );
|
||||
|
||||
fprintf( stderr, " -C" );
|
||||
|
||||
if ( fulltbl )
|
||||
putc( 'f', stderr );
|
||||
if ( fullspd )
|
||||
putc( 'F', stderr );
|
||||
if ( useecs )
|
||||
putc( 'e', stderr );
|
||||
if ( usemecs )
|
||||
putc( 'm', stderr );
|
||||
|
||||
if ( strcmp( skelname, DEFAULT_SKELETON_FILE ) )
|
||||
fprintf( stderr, " -S%s", skelname );
|
||||
|
||||
putc( '\n', stderr );
|
||||
|
||||
fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns );
|
||||
fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa,
|
||||
current_max_dfas, totnst );
|
||||
fprintf( stderr,
|
||||
" %d rules\n", num_rules - 1 /* - 1 for def. rule */ );
|
||||
|
||||
if ( num_backtracking == 0 )
|
||||
fprintf( stderr, " No backtracking\n" );
|
||||
else if ( fullspd || fulltbl )
|
||||
fprintf( stderr, " %d backtracking (non-accepting) states\n",
|
||||
num_backtracking );
|
||||
else
|
||||
fprintf( stderr, " compressed tables always backtrack\n" );
|
||||
|
||||
if ( bol_needed )
|
||||
fprintf( stderr, " Beginning-of-line patterns used\n" );
|
||||
|
||||
fprintf( stderr, " %d/%d start conditions\n", lastsc,
|
||||
current_max_scs );
|
||||
fprintf( stderr, " %d epsilon states, %d double epsilon states\n",
|
||||
numeps, eps2 );
|
||||
|
||||
if ( lastccl == 0 )
|
||||
fprintf( stderr, " no character classes\n" );
|
||||
else
|
||||
fprintf( stderr,
|
||||
" %d/%d character classes needed %d/%d words of storage, %d reused\n",
|
||||
lastccl, current_maxccls,
|
||||
cclmap[lastccl] + ccllen[lastccl],
|
||||
current_max_ccl_tbl_size, cclreuse );
|
||||
|
||||
fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs );
|
||||
fprintf( stderr, " %d/%d unique/duplicate transitions\n",
|
||||
numuniq, numdup );
|
||||
|
||||
if ( fulltbl )
|
||||
{
|
||||
tblsiz = lastdfa * numecs;
|
||||
fprintf( stderr, " %d table entries\n", tblsiz );
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
|
||||
|
||||
fprintf( stderr, " %d/%d base-def entries created\n",
|
||||
lastdfa + numtemps, current_max_dfas );
|
||||
fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n",
|
||||
tblend, current_max_xpairs, peakpairs );
|
||||
fprintf( stderr,
|
||||
" %d/%d (peak %d) template nxt-chk entries created\n",
|
||||
numtemps * nummecs, current_max_template_xpairs,
|
||||
numtemps * numecs );
|
||||
fprintf( stderr, " %d empty table entries\n", nummt );
|
||||
fprintf( stderr, " %d protos created\n", numprots );
|
||||
fprintf( stderr, " %d templates created, %d uses\n",
|
||||
numtemps, tmpuses );
|
||||
}
|
||||
|
||||
if ( useecs )
|
||||
{
|
||||
tblsiz = tblsiz + csize;
|
||||
fprintf( stderr, " %d/%d equivalence classes created\n",
|
||||
numecs, csize );
|
||||
}
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
tblsiz = tblsiz + numecs;
|
||||
fprintf( stderr, " %d/%d meta-equivalence classes created\n",
|
||||
nummecs, csize );
|
||||
}
|
||||
|
||||
fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n",
|
||||
hshcol, hshsave, dfaeql );
|
||||
fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs );
|
||||
fprintf( stderr, " %d total table entries needed\n", tblsiz );
|
||||
}
|
||||
|
||||
#ifndef VMS
|
||||
exit( status );
|
||||
#else
|
||||
exit( status + 1 );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* flexinit - initialize flex
|
||||
*
|
||||
* synopsis
|
||||
* int argc;
|
||||
* char **argv;
|
||||
* flexinit( argc, argv );
|
||||
*/
|
||||
|
||||
void flexinit( argc, argv )
|
||||
int argc;
|
||||
char **argv;
|
||||
|
||||
{
|
||||
int i, sawcmpflag;
|
||||
char *arg, *flex_gettime(), *mktemp();
|
||||
|
||||
printstats = syntaxerror = trace = spprdflt = interactive = caseins = false;
|
||||
backtrack_report = performance_report = ddebug = fulltbl = fullspd = false;
|
||||
yymore_used = continued_action = reject = false;
|
||||
yymore_really_used = reject_really_used = false;
|
||||
gen_line_dirs = usemecs = useecs = true;
|
||||
|
||||
sawcmpflag = false;
|
||||
use_stdout = false;
|
||||
|
||||
csize = DEFAULT_CSIZE;
|
||||
|
||||
program_name = argv[0];
|
||||
|
||||
/* read flags */
|
||||
for ( --argc, ++argv; argc ; --argc, ++argv )
|
||||
{
|
||||
if ( argv[0][0] != '-' || argv[0][1] == '\0' )
|
||||
break;
|
||||
|
||||
arg = argv[0];
|
||||
|
||||
for ( i = 1; arg[i] != '\0'; ++i )
|
||||
switch ( arg[i] )
|
||||
{
|
||||
case 'b':
|
||||
backtrack_report = true;
|
||||
break;
|
||||
|
||||
case 'c':
|
||||
fprintf( stderr,
|
||||
"%s: Assuming use of deprecated -c flag is really intended to be -C\n",
|
||||
program_name );
|
||||
|
||||
/* fall through */
|
||||
|
||||
case 'C':
|
||||
if ( i != 1 )
|
||||
flexerror( "-C flag must be given separately" );
|
||||
|
||||
if ( ! sawcmpflag )
|
||||
{
|
||||
useecs = false;
|
||||
usemecs = false;
|
||||
fulltbl = false;
|
||||
sawcmpflag = true;
|
||||
}
|
||||
|
||||
for ( ++i; arg[i] != '\0'; ++i )
|
||||
switch ( arg[i] )
|
||||
{
|
||||
case 'e':
|
||||
useecs = true;
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
fullspd = true;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
fulltbl = true;
|
||||
break;
|
||||
|
||||
case 'm':
|
||||
usemecs = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
lerrif( "unknown -C option '%c'",
|
||||
(int) arg[i] );
|
||||
break;
|
||||
}
|
||||
|
||||
goto get_next_arg;
|
||||
|
||||
case 'd':
|
||||
ddebug = true;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
useecs = usemecs = false;
|
||||
fulltbl = true;
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
useecs = usemecs = false;
|
||||
fullspd = true;
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
interactive = true;
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
caseins = true;
|
||||
break;
|
||||
|
||||
case 'L':
|
||||
gen_line_dirs = false;
|
||||
break;
|
||||
|
||||
case 'n':
|
||||
/* stupid do-nothing deprecated option */
|
||||
break;
|
||||
|
||||
case 'p':
|
||||
performance_report = true;
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
if ( i != 1 )
|
||||
flexerror( "-S flag must be given separately" );
|
||||
|
||||
skelname = arg + i + 1;
|
||||
goto get_next_arg;
|
||||
|
||||
case 's':
|
||||
spprdflt = true;
|
||||
break;
|
||||
|
||||
case 't':
|
||||
use_stdout = true;
|
||||
break;
|
||||
|
||||
case 'T':
|
||||
trace = true;
|
||||
break;
|
||||
|
||||
case 'v':
|
||||
printstats = true;
|
||||
break;
|
||||
|
||||
case '8':
|
||||
csize = CSIZE;
|
||||
break;
|
||||
|
||||
default:
|
||||
lerrif( "unknown flag '%c'", (int) arg[i] );
|
||||
break;
|
||||
}
|
||||
|
||||
get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */
|
||||
;
|
||||
}
|
||||
|
||||
if ( (fulltbl || fullspd) && usemecs )
|
||||
flexerror( "full table and -Cm don't make sense together" );
|
||||
|
||||
if ( (fulltbl || fullspd) && interactive )
|
||||
flexerror( "full table and -I are (currently) incompatible" );
|
||||
|
||||
if ( fulltbl && fullspd )
|
||||
flexerror( "full table and -F are mutually exclusive" );
|
||||
|
||||
if ( ! skelname )
|
||||
{
|
||||
static char skeleton_name_storage[400];
|
||||
|
||||
skelname = skeleton_name_storage;
|
||||
(void) strcpy( skelname, DEFAULT_SKELETON_FILE );
|
||||
}
|
||||
|
||||
if ( ! use_stdout )
|
||||
{
|
||||
FILE *prev_stdout = freopen( outfile, "w", stdout );
|
||||
|
||||
if ( prev_stdout == NULL )
|
||||
lerrsf( "could not create %s", outfile );
|
||||
|
||||
outfile_created = 1;
|
||||
}
|
||||
|
||||
num_input_files = argc;
|
||||
input_files = argv;
|
||||
set_input_file( num_input_files > 0 ? input_files[0] : NULL );
|
||||
|
||||
if ( backtrack_report )
|
||||
{
|
||||
#ifndef SHORT_FILE_NAMES
|
||||
backtrack_file = fopen( "lex.backtrack", "w" );
|
||||
#else
|
||||
backtrack_file = fopen( "lex.bck", "w" );
|
||||
#endif
|
||||
|
||||
if ( backtrack_file == NULL )
|
||||
flexerror( "could not create lex.backtrack" );
|
||||
}
|
||||
|
||||
else
|
||||
backtrack_file = NULL;
|
||||
|
||||
|
||||
lastccl = 0;
|
||||
lastsc = 0;
|
||||
|
||||
/* initialize the statistics */
|
||||
starttime = flex_gettime();
|
||||
|
||||
if ( (skelfile = fopen( skelname, "r" )) == NULL )
|
||||
lerrsf( "can't open skeleton file %s", skelname );
|
||||
|
||||
#ifndef ACK_MOD
|
||||
#ifdef SYS_V
|
||||
action_file_name = tmpnam( NULL );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if ( action_file_name == NULL )
|
||||
{
|
||||
static char temp_action_file_name[32];
|
||||
|
||||
#ifndef SHORT_FILE_NAMES
|
||||
(void) strcpy( temp_action_file_name, "/tmp/flexXXXXXX" );
|
||||
#else
|
||||
(void) strcpy( temp_action_file_name, "flexXXXXXX.tmp" );
|
||||
#endif
|
||||
(void) mktemp( temp_action_file_name );
|
||||
|
||||
action_file_name = temp_action_file_name;
|
||||
}
|
||||
|
||||
if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL )
|
||||
lerrsf( "can't open temporary action file %s", action_file_name );
|
||||
|
||||
lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0;
|
||||
numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
|
||||
numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
|
||||
num_backtracking = onesp = numprots = 0;
|
||||
variable_trailing_context_rules = bol_needed = false;
|
||||
|
||||
linenum = sectnum = 1;
|
||||
firstprot = NIL;
|
||||
|
||||
/* used in mkprot() so that the first proto goes in slot 1
|
||||
* of the proto queue
|
||||
*/
|
||||
lastprot = 1;
|
||||
|
||||
if ( useecs )
|
||||
{ /* set up doubly-linked equivalence classes */
|
||||
/* We loop all the way up to csize, since ecgroup[csize] is the
|
||||
* position used for NUL characters
|
||||
*/
|
||||
ecgroup[1] = NIL;
|
||||
|
||||
for ( i = 2; i <= csize; ++i )
|
||||
{
|
||||
ecgroup[i] = i - 1;
|
||||
nextecm[i - 1] = i;
|
||||
}
|
||||
|
||||
nextecm[csize] = NIL;
|
||||
}
|
||||
|
||||
else
|
||||
{ /* put everything in its own equivalence class */
|
||||
for ( i = 1; i <= csize; ++i )
|
||||
{
|
||||
ecgroup[i] = i;
|
||||
nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */
|
||||
}
|
||||
}
|
||||
|
||||
set_up_initial_allocations();
|
||||
}
|
||||
|
||||
|
||||
/* readin - read in the rules section of the input file(s)
|
||||
*
|
||||
* synopsis
|
||||
* readin();
|
||||
*/
|
||||
|
||||
void readin()
|
||||
|
||||
{
|
||||
skelout();
|
||||
|
||||
if ( ddebug )
|
||||
puts( "#define FLEX_DEBUG" );
|
||||
|
||||
if ( csize == 256 )
|
||||
puts( "#define YY_CHAR unsigned char" );
|
||||
else
|
||||
puts( "#define YY_CHAR char" );
|
||||
|
||||
line_directive_out( stdout );
|
||||
|
||||
if ( yyparse() )
|
||||
{
|
||||
pinpoint_message( "fatal parse error" );
|
||||
flexend( 1 );
|
||||
}
|
||||
|
||||
if ( xlation )
|
||||
{
|
||||
numecs = ecs_from_xlation( ecgroup );
|
||||
useecs = true;
|
||||
}
|
||||
|
||||
else if ( useecs )
|
||||
numecs = cre8ecs( nextecm, ecgroup, csize );
|
||||
|
||||
else
|
||||
numecs = csize;
|
||||
|
||||
/* now map the equivalence class for NUL to its expected place */
|
||||
ecgroup[0] = ecgroup[csize];
|
||||
NUL_ec = abs( ecgroup[0] );
|
||||
|
||||
if ( useecs )
|
||||
ccl2ecl();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* set_up_initial_allocations - allocate memory for internal tables */
|
||||
|
||||
void set_up_initial_allocations()
|
||||
|
||||
{
|
||||
current_mns = INITIAL_MNS;
|
||||
firstst = allocate_integer_array( current_mns );
|
||||
lastst = allocate_integer_array( current_mns );
|
||||
finalst = allocate_integer_array( current_mns );
|
||||
transchar = allocate_integer_array( current_mns );
|
||||
trans1 = allocate_integer_array( current_mns );
|
||||
trans2 = allocate_integer_array( current_mns );
|
||||
accptnum = allocate_integer_array( current_mns );
|
||||
assoc_rule = allocate_integer_array( current_mns );
|
||||
state_type = allocate_integer_array( current_mns );
|
||||
|
||||
current_max_rules = INITIAL_MAX_RULES;
|
||||
rule_type = allocate_integer_array( current_max_rules );
|
||||
rule_linenum = allocate_integer_array( current_max_rules );
|
||||
|
||||
current_max_scs = INITIAL_MAX_SCS;
|
||||
scset = allocate_integer_array( current_max_scs );
|
||||
scbol = allocate_integer_array( current_max_scs );
|
||||
scxclu = allocate_integer_array( current_max_scs );
|
||||
sceof = allocate_integer_array( current_max_scs );
|
||||
scname = allocate_char_ptr_array( current_max_scs );
|
||||
actvsc = allocate_integer_array( current_max_scs );
|
||||
|
||||
current_maxccls = INITIAL_MAX_CCLS;
|
||||
cclmap = allocate_integer_array( current_maxccls );
|
||||
ccllen = allocate_integer_array( current_maxccls );
|
||||
cclng = allocate_integer_array( current_maxccls );
|
||||
|
||||
current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
|
||||
ccltbl = allocate_character_array( current_max_ccl_tbl_size );
|
||||
|
||||
current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
|
||||
|
||||
current_max_xpairs = INITIAL_MAX_XPAIRS;
|
||||
nxt = allocate_integer_array( current_max_xpairs );
|
||||
chk = allocate_integer_array( current_max_xpairs );
|
||||
|
||||
current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
|
||||
tnxt = allocate_integer_array( current_max_template_xpairs );
|
||||
|
||||
current_max_dfas = INITIAL_MAX_DFAS;
|
||||
base = allocate_integer_array( current_max_dfas );
|
||||
def = allocate_integer_array( current_max_dfas );
|
||||
dfasiz = allocate_integer_array( current_max_dfas );
|
||||
accsiz = allocate_integer_array( current_max_dfas );
|
||||
dhash = allocate_integer_array( current_max_dfas );
|
||||
dss = allocate_int_ptr_array( current_max_dfas );
|
||||
dfaacc = allocate_dfaacc_union( current_max_dfas );
|
||||
|
||||
nultrans = (int *) 0;
|
||||
}
|
158
util/flex/makefile
Normal file
158
util/flex/makefile
Normal file
|
@ -0,0 +1,158 @@
|
|||
# Note: this is a modified version of Makefile, for ACK installation. The
|
||||
# original flex makefile has a capital M (Makefile).
|
||||
|
||||
EMHOME = ../..
|
||||
|
||||
# make file for "flex" tool
|
||||
|
||||
# Porting considerations:
|
||||
#
|
||||
# For System V Unix machines, add -DUSG to CFLAGS (if it's not
|
||||
# automatically defined)
|
||||
# For Vax/VMS, add "-DVMS -DUSG" to CFLAGS.
|
||||
# For MS-DOS, add "-DMS_DOS -DUSG" to CFLAGS. Create \tmp if not present.
|
||||
# You will also want to rename flex.skel to something with a three
|
||||
# character extension, change SKELETON_FILE below appropriately,
|
||||
# See MSDOS.notes for more info.
|
||||
# For Amiga, add "-DAMIGA -DUSG" to CFLAGS.
|
||||
# For SCO Unix, add "-DSCO_UNIX" to CFLAGS.
|
||||
#
|
||||
# For C compilers which don't know about "void", add -Dvoid=int to CFLAGS.
|
||||
#
|
||||
# If your C compiler is ANSI standard but does not include the <stdlib.h>
|
||||
# header file (some installations of gcc have this problem), then add
|
||||
# -DDONT_HAVE_STDLIB_H to CFLAGS.
|
||||
#
|
||||
# By default, flex will be configured to generate 8-bit scanners only
|
||||
# if the -8 flag is given. If you want it to always generate 8-bit
|
||||
# scanners, add "-DDEFAULT_CSIZE=256" to CFLAGS. Note that doing
|
||||
# so will double the size of all uncompressed scanners.
|
||||
#
|
||||
# If on your system you have trouble building flex due to 8-bit
|
||||
# character problems, remove the -8 from FLEX_FLAGS and the
|
||||
# "#define FLEX_8_BIT_CHARS" from the beginning of flexdef.h.
|
||||
|
||||
|
||||
# the first time around use "make first_flex"
|
||||
|
||||
|
||||
# Installation targeting. Files will be installed under the tree rooted
|
||||
# at DESTDIR. User commands will be installed in BINDIR, library files
|
||||
# in LIBDIR (which will be created if necessary), auxiliary files in
|
||||
# AUXDIR, manual pages will be installed in MANDIR with extension MANEXT.
|
||||
# Raw, unformatted troff source will be installed if INSTALLMAN=man, nroff
|
||||
# preformatted versions will be installed if INSTALLMAN=cat.
|
||||
DESTDIR =
|
||||
BINDIR = $(EMHOME)/bin
|
||||
AUXDIR = $(EMHOME)/lib/flex
|
||||
MANDIR = $(EMHOME)/man
|
||||
MANEXT = 1
|
||||
INSTALLMAN = man
|
||||
|
||||
# MAKE = make
|
||||
|
||||
SKELETON_FILE = \"`cd $(AUXDIR); pwd`/flex.skel\"
|
||||
SKELFLAGS = -DDEFAULT_SKELETON_FILE=$(SKELETON_FILE)
|
||||
CFLAGS = -O -Dvoid=int -DACK_MOD
|
||||
LDFLAGS = -s
|
||||
|
||||
COMPRESSION =
|
||||
FLEX_FLAGS = -ist8 -Sflex.skel
|
||||
# which "flex" to use to generate scan.c from scan.l
|
||||
FLEX = ./flex
|
||||
# CC = cc
|
||||
|
||||
AR = ar
|
||||
RANLIB = ranlib
|
||||
|
||||
FLEXOBJS = \
|
||||
ccl.o \
|
||||
dfa.o \
|
||||
ecs.o \
|
||||
gen.o \
|
||||
main.o \
|
||||
misc.o \
|
||||
nfa.o \
|
||||
parse.o \
|
||||
scan.o \
|
||||
sym.o \
|
||||
tblcmp.o \
|
||||
yylex.o
|
||||
|
||||
FLEX_C_SOURCES = \
|
||||
ccl.c \
|
||||
dfa.c \
|
||||
ecs.c \
|
||||
gen.c \
|
||||
main.c \
|
||||
misc.c \
|
||||
nfa.c \
|
||||
parse.c \
|
||||
scan.c \
|
||||
sym.c \
|
||||
tblcmp.c \
|
||||
yylex.c
|
||||
|
||||
|
||||
all : flex
|
||||
|
||||
flex : $(FLEXOBJS)
|
||||
$(CC) $(CFLAGS) -o flex $(LDFLAGS) $(FLEXOBJS)
|
||||
|
||||
first_flex:
|
||||
cp initscan.c scan.c
|
||||
$(MAKE) $(MFLAGS) flex
|
||||
|
||||
parse.h parse.c : parse.y
|
||||
$(YACC) -d parse.y
|
||||
@mv y.tab.c parse.c
|
||||
@mv y.tab.h parse.h
|
||||
|
||||
scan.c : scan.l
|
||||
$(FLEX) $(FLEX_FLAGS) $(COMPRESSION) scan.l >scan.c
|
||||
|
||||
scan.o : scan.c parse.h flexdef.h
|
||||
|
||||
main.o : main.c flexdef.h
|
||||
-mkdir $(AUXDIR)
|
||||
$(CC) $(CFLAGS) -c $(SKELFLAGS) main.c
|
||||
|
||||
ccl.o : ccl.c flexdef.h
|
||||
dfa.o : dfa.c flexdef.h
|
||||
ecs.o : ecs.c flexdef.h
|
||||
gen.o : gen.c flexdef.h
|
||||
misc.o : misc.c flexdef.h
|
||||
nfa.o : nfa.c flexdef.h
|
||||
parse.o : parse.c flexdef.h
|
||||
sym.o : sym.c flexdef.h
|
||||
tblcmp.o : tblcmp.c flexdef.h
|
||||
yylex.o : yylex.c flexdef.h
|
||||
|
||||
lint : $(FLEX_C_SOURCES)
|
||||
lint $(FLEX_C_SOURCES) > flex.lint
|
||||
|
||||
install: first_flex flex.skel
|
||||
rm -f $(BINDIR)flex
|
||||
cp flex $(BINDIR)/flex
|
||||
cp flex.skel $(AUXDIR)/flex.skel
|
||||
cp flex.1 $(MANDIR)/flex.1
|
||||
cp flexdoc.1 $(MANDIR)/flexdoc.1
|
||||
|
||||
clean :
|
||||
rm -f core errs flex *.o parse.c *.lint parse.h tags
|
||||
|
||||
tags :
|
||||
ctags $(FLEX_C_SOURCES)
|
||||
|
||||
test : flex
|
||||
./flex $(FLEX_FLAGS) $(COMPRESSION) scan.l | diff scan.c -
|
||||
|
||||
bigtest :
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-C" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-CFe" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-Cf" test
|
||||
rm -f scan.c ; $(MAKE) COMPRESSION="-CF" test
|
||||
rm -f scan.c ; $(MAKE)
|
826
util/flex/misc.c
Normal file
826
util/flex/misc.c
Normal file
|
@ -0,0 +1,826 @@
|
|||
/* misc - miscellaneous flex routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include "flexdef.h"
|
||||
|
||||
|
||||
/* ANSI C does not guarantee that isascii() is defined */
|
||||
#ifndef isascii
|
||||
#define isascii(c) ((c) <= 0177)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* declare functions that have forward references */
|
||||
|
||||
void dataflush PROTO(());
|
||||
int otoi PROTO((Char []));
|
||||
|
||||
|
||||
/* action_out - write the actions from the temporary file to lex.yy.c
|
||||
*
|
||||
* synopsis
|
||||
* action_out();
|
||||
*
|
||||
* Copies the action file up to %% (or end-of-file) to lex.yy.c
|
||||
*/
|
||||
|
||||
void action_out()
|
||||
|
||||
{
|
||||
char buf[MAXLINE];
|
||||
|
||||
while ( fgets( buf, MAXLINE, temp_action_file ) != NULL )
|
||||
if ( buf[0] == '%' && buf[1] == '%' )
|
||||
break;
|
||||
else
|
||||
fputs( buf, stdout );
|
||||
}
|
||||
|
||||
|
||||
/* allocate_array - allocate memory for an integer array of the given size */
|
||||
|
||||
void *allocate_array( size, element_size )
|
||||
int size, element_size;
|
||||
|
||||
{
|
||||
register void *mem;
|
||||
|
||||
/* on 16-bit int machines (e.g., 80286) we might be trying to
|
||||
* allocate more than a signed int can hold, and that won't
|
||||
* work. Cheap test:
|
||||
*/
|
||||
if ( element_size * size <= 0 )
|
||||
flexfatal( "request for < 1 byte in allocate_array()" );
|
||||
|
||||
mem = (void *) malloc( (unsigned) (element_size * size) );
|
||||
|
||||
if ( mem == NULL )
|
||||
flexfatal( "memory allocation failed in allocate_array()" );
|
||||
|
||||
return ( mem );
|
||||
}
|
||||
|
||||
|
||||
/* all_lower - true if a string is all lower-case
|
||||
*
|
||||
* synopsis:
|
||||
* Char *str;
|
||||
* int all_lower();
|
||||
* true/false = all_lower( str );
|
||||
*/
|
||||
|
||||
int all_lower( str )
|
||||
register Char *str;
|
||||
|
||||
{
|
||||
while ( *str )
|
||||
{
|
||||
if ( ! isascii( *str ) || ! islower( *str ) )
|
||||
return ( 0 );
|
||||
++str;
|
||||
}
|
||||
|
||||
return ( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* all_upper - true if a string is all upper-case
|
||||
*
|
||||
* synopsis:
|
||||
* Char *str;
|
||||
* int all_upper();
|
||||
* true/false = all_upper( str );
|
||||
*/
|
||||
|
||||
int all_upper( str )
|
||||
register Char *str;
|
||||
|
||||
{
|
||||
while ( *str )
|
||||
{
|
||||
if ( ! isascii( *str ) || ! isupper( (char) *str ) )
|
||||
return ( 0 );
|
||||
++str;
|
||||
}
|
||||
|
||||
return ( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* bubble - bubble sort an integer array in increasing order
|
||||
*
|
||||
* synopsis
|
||||
* int v[n], n;
|
||||
* bubble( v, n );
|
||||
*
|
||||
* description
|
||||
* sorts the first n elements of array v and replaces them in
|
||||
* increasing order.
|
||||
*
|
||||
* passed
|
||||
* v - the array to be sorted
|
||||
* n - the number of elements of 'v' to be sorted */
|
||||
|
||||
void bubble( v, n )
|
||||
int v[], n;
|
||||
|
||||
{
|
||||
register int i, j, k;
|
||||
|
||||
for ( i = n; i > 1; --i )
|
||||
for ( j = 1; j < i; ++j )
|
||||
if ( v[j] > v[j + 1] ) /* compare */
|
||||
{
|
||||
k = v[j]; /* exchange */
|
||||
v[j] = v[j + 1];
|
||||
v[j + 1] = k;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* clower - replace upper-case letter to lower-case
|
||||
*
|
||||
* synopsis:
|
||||
* Char clower();
|
||||
* int c;
|
||||
* c = clower( c );
|
||||
*/
|
||||
|
||||
Char clower( c )
|
||||
register int c;
|
||||
|
||||
{
|
||||
return ( (isascii( c ) && isupper( c )) ? tolower( c ) : c );
|
||||
}
|
||||
|
||||
|
||||
/* copy_string - returns a dynamically allocated copy of a string
|
||||
*
|
||||
* synopsis
|
||||
* char *str, *copy, *copy_string();
|
||||
* copy = copy_string( str );
|
||||
*/
|
||||
|
||||
char *copy_string( str )
|
||||
register char *str;
|
||||
|
||||
{
|
||||
register char *c;
|
||||
char *copy;
|
||||
|
||||
/* find length */
|
||||
for ( c = str; *c; ++c )
|
||||
;
|
||||
|
||||
copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) );
|
||||
|
||||
if ( copy == NULL )
|
||||
flexfatal( "dynamic memory failure in copy_string()" );
|
||||
|
||||
for ( c = copy; (*c++ = *str++); )
|
||||
;
|
||||
|
||||
return ( copy );
|
||||
}
|
||||
|
||||
|
||||
/* copy_unsigned_string -
|
||||
* returns a dynamically allocated copy of a (potentially) unsigned string
|
||||
*
|
||||
* synopsis
|
||||
* Char *str, *copy, *copy_unsigned_string();
|
||||
* copy = copy_unsigned_string( str );
|
||||
*/
|
||||
|
||||
Char *copy_unsigned_string( str )
|
||||
register Char *str;
|
||||
|
||||
{
|
||||
register Char *c;
|
||||
Char *copy;
|
||||
|
||||
/* find length */
|
||||
for ( c = str; *c; ++c )
|
||||
;
|
||||
|
||||
copy = (Char *) malloc( (unsigned) ((c - str + 1) * sizeof( Char )) );
|
||||
|
||||
if ( copy == NULL )
|
||||
flexfatal( "dynamic memory failure in copy_unsigned_string()" );
|
||||
|
||||
for ( c = copy; (*c++ = *str++); )
|
||||
;
|
||||
|
||||
return ( copy );
|
||||
}
|
||||
|
||||
|
||||
/* cshell - shell sort a character array in increasing order
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* Char v[n];
|
||||
* int n, special_case_0;
|
||||
* cshell( v, n, special_case_0 );
|
||||
*
|
||||
* description
|
||||
* does a shell sort of the first n elements of array v.
|
||||
* If special_case_0 is true, then any element equal to 0
|
||||
* is instead assumed to have infinite weight.
|
||||
*
|
||||
* passed
|
||||
* v - array to be sorted
|
||||
* n - number of elements of v to be sorted
|
||||
*/
|
||||
|
||||
void cshell( v, n, special_case_0 )
|
||||
Char v[];
|
||||
int n, special_case_0;
|
||||
|
||||
{
|
||||
int gap, i, j, jg;
|
||||
Char k;
|
||||
|
||||
for ( gap = n / 2; gap > 0; gap = gap / 2 )
|
||||
for ( i = gap; i < n; ++i )
|
||||
for ( j = i - gap; j >= 0; j = j - gap )
|
||||
{
|
||||
jg = j + gap;
|
||||
|
||||
if ( special_case_0 )
|
||||
{
|
||||
if ( v[jg] == 0 )
|
||||
break;
|
||||
|
||||
else if ( v[j] != 0 && v[j] <= v[jg] )
|
||||
break;
|
||||
}
|
||||
|
||||
else if ( v[j] <= v[jg] )
|
||||
break;
|
||||
|
||||
k = v[j];
|
||||
v[j] = v[jg];
|
||||
v[jg] = k;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* dataend - finish up a block of data declarations
|
||||
*
|
||||
* synopsis
|
||||
* dataend();
|
||||
*/
|
||||
|
||||
void dataend()
|
||||
|
||||
{
|
||||
if ( datapos > 0 )
|
||||
dataflush();
|
||||
|
||||
/* add terminator for initialization */
|
||||
puts( " } ;\n" );
|
||||
|
||||
dataline = 0;
|
||||
datapos = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* dataflush - flush generated data statements
|
||||
*
|
||||
* synopsis
|
||||
* dataflush();
|
||||
*/
|
||||
|
||||
void dataflush()
|
||||
|
||||
{
|
||||
putchar( '\n' );
|
||||
|
||||
if ( ++dataline >= NUMDATALINES )
|
||||
{
|
||||
/* put out a blank line so that the table is grouped into
|
||||
* large blocks that enable the user to find elements easily
|
||||
*/
|
||||
putchar( '\n' );
|
||||
dataline = 0;
|
||||
}
|
||||
|
||||
/* reset the number of characters written on the current line */
|
||||
datapos = 0;
|
||||
}
|
||||
|
||||
|
||||
/* flexerror - report an error message and terminate
|
||||
*
|
||||
* synopsis
|
||||
* char msg[];
|
||||
* flexerror( msg );
|
||||
*/
|
||||
|
||||
void flexerror( msg )
|
||||
char msg[];
|
||||
|
||||
{
|
||||
fprintf( stderr, "%s: %s\n", program_name, msg );
|
||||
|
||||
flexend( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* flexfatal - report a fatal error message and terminate
|
||||
*
|
||||
* synopsis
|
||||
* char msg[];
|
||||
* flexfatal( msg );
|
||||
*/
|
||||
|
||||
void flexfatal( msg )
|
||||
char msg[];
|
||||
|
||||
{
|
||||
fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg );
|
||||
flexend( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* flex_gettime - return current time
|
||||
*
|
||||
* synopsis
|
||||
* char *flex_gettime(), *time_str;
|
||||
* time_str = flex_gettime();
|
||||
*
|
||||
* note
|
||||
* the routine name has the "flex_" prefix because of name clashes
|
||||
* with Turbo-C
|
||||
*/
|
||||
|
||||
/* include sys/types.h to use time_t and make lint happy */
|
||||
|
||||
#ifndef MS_DOS
|
||||
#ifndef VMS
|
||||
#include <sys/types.h>
|
||||
#else
|
||||
#include <types.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MS_DOS
|
||||
#include <time.h>
|
||||
typedef long time_t;
|
||||
#endif
|
||||
|
||||
char *flex_gettime()
|
||||
|
||||
{
|
||||
time_t t, time();
|
||||
char *result, *ctime(), *copy_string();
|
||||
|
||||
t = time( (long *) 0 );
|
||||
|
||||
result = copy_string( ctime( &t ) );
|
||||
|
||||
/* get rid of trailing newline */
|
||||
result[24] = '\0';
|
||||
|
||||
return ( result );
|
||||
}
|
||||
|
||||
|
||||
/* lerrif - report an error message formatted with one integer argument
|
||||
*
|
||||
* synopsis
|
||||
* char msg[];
|
||||
* int arg;
|
||||
* lerrif( msg, arg );
|
||||
*/
|
||||
|
||||
void lerrif( msg, arg )
|
||||
char msg[];
|
||||
int arg;
|
||||
|
||||
{
|
||||
char errmsg[MAXLINE];
|
||||
(void) sprintf( errmsg, msg, arg );
|
||||
flexerror( errmsg );
|
||||
}
|
||||
|
||||
|
||||
/* lerrsf - report an error message formatted with one string argument
|
||||
*
|
||||
* synopsis
|
||||
* char msg[], arg[];
|
||||
* lerrsf( msg, arg );
|
||||
*/
|
||||
|
||||
void lerrsf( msg, arg )
|
||||
char msg[], arg[];
|
||||
|
||||
{
|
||||
char errmsg[MAXLINE];
|
||||
|
||||
(void) sprintf( errmsg, msg, arg );
|
||||
flexerror( errmsg );
|
||||
}
|
||||
|
||||
|
||||
/* htoi - convert a hexadecimal digit string to an integer value
|
||||
*
|
||||
* synopsis:
|
||||
* int val, htoi();
|
||||
* Char str[];
|
||||
* val = htoi( str );
|
||||
*/
|
||||
|
||||
int htoi( str )
|
||||
Char str[];
|
||||
|
||||
{
|
||||
int result;
|
||||
|
||||
(void) sscanf( (char *) str, "%x", &result );
|
||||
|
||||
return ( result );
|
||||
}
|
||||
|
||||
|
||||
/* is_hex_digit - returns true if a character is a valid hex digit, false
|
||||
* otherwise
|
||||
*
|
||||
* synopsis:
|
||||
* int true_or_false, is_hex_digit();
|
||||
* int ch;
|
||||
* val = is_hex_digit( ch );
|
||||
*/
|
||||
|
||||
int is_hex_digit( ch )
|
||||
int ch;
|
||||
|
||||
{
|
||||
if ( isdigit( ch ) )
|
||||
return ( 1 );
|
||||
|
||||
switch ( clower( ch ) )
|
||||
{
|
||||
case 'a':
|
||||
case 'b':
|
||||
case 'c':
|
||||
case 'd':
|
||||
case 'e':
|
||||
case 'f':
|
||||
return ( 1 );
|
||||
|
||||
default:
|
||||
return ( 0 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* line_directive_out - spit out a "# line" statement */
|
||||
|
||||
void line_directive_out( output_file_name )
|
||||
FILE *output_file_name;
|
||||
|
||||
{
|
||||
if ( infilename && gen_line_dirs )
|
||||
fprintf( output_file_name, "# line %d \"%s\"\n", linenum, infilename );
|
||||
}
|
||||
|
||||
|
||||
/* mk2data - generate a data statement for a two-dimensional array
|
||||
*
|
||||
* synopsis
|
||||
* int value;
|
||||
* mk2data( value );
|
||||
*
|
||||
* generates a data statement initializing the current 2-D array to "value"
|
||||
*/
|
||||
void mk2data( value )
|
||||
int value;
|
||||
|
||||
{
|
||||
if ( datapos >= NUMDATAITEMS )
|
||||
{
|
||||
putchar( ',' );
|
||||
dataflush();
|
||||
}
|
||||
|
||||
if ( datapos == 0 )
|
||||
/* indent */
|
||||
fputs( " ", stdout );
|
||||
|
||||
else
|
||||
putchar( ',' );
|
||||
|
||||
++datapos;
|
||||
|
||||
printf( "%5d", value );
|
||||
}
|
||||
|
||||
|
||||
/* mkdata - generate a data statement
|
||||
*
|
||||
* synopsis
|
||||
* int value;
|
||||
* mkdata( value );
|
||||
*
|
||||
* generates a data statement initializing the current array element to
|
||||
* "value"
|
||||
*/
|
||||
void mkdata( value )
|
||||
int value;
|
||||
|
||||
{
|
||||
if ( datapos >= NUMDATAITEMS )
|
||||
{
|
||||
putchar( ',' );
|
||||
dataflush();
|
||||
}
|
||||
|
||||
if ( datapos == 0 )
|
||||
/* indent */
|
||||
fputs( " ", stdout );
|
||||
|
||||
else
|
||||
putchar( ',' );
|
||||
|
||||
++datapos;
|
||||
|
||||
printf( "%5d", value );
|
||||
}
|
||||
|
||||
|
||||
/* myctoi - return the integer represented by a string of digits
|
||||
*
|
||||
* synopsis
|
||||
* Char array[];
|
||||
* int val, myctoi();
|
||||
* val = myctoi( array );
|
||||
*
|
||||
*/
|
||||
|
||||
int myctoi( array )
|
||||
Char array[];
|
||||
|
||||
{
|
||||
int val = 0;
|
||||
|
||||
(void) sscanf( (char *) array, "%d", &val );
|
||||
|
||||
return ( val );
|
||||
}
|
||||
|
||||
|
||||
/* myesc - return character corresponding to escape sequence
|
||||
*
|
||||
* synopsis
|
||||
* Char array[], c, myesc();
|
||||
* c = myesc( array );
|
||||
*
|
||||
*/
|
||||
|
||||
Char myesc( array )
|
||||
Char array[];
|
||||
|
||||
{
|
||||
Char c, esc_char;
|
||||
register int sptr;
|
||||
|
||||
switch ( array[1] )
|
||||
{
|
||||
case 'a': return ( '\a' );
|
||||
case 'b': return ( '\b' );
|
||||
case 'f': return ( '\f' );
|
||||
case 'n': return ( '\n' );
|
||||
case 'r': return ( '\r' );
|
||||
case 't': return ( '\t' );
|
||||
case 'v': return ( '\v' );
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
{ /* \<octal> */
|
||||
sptr = 1;
|
||||
|
||||
while ( isascii( array[sptr] ) && isdigit( array[sptr] ) )
|
||||
/* don't increment inside loop control because if
|
||||
* isdigit() is a macro it might expand into multiple
|
||||
* increments ...
|
||||
*/
|
||||
++sptr;
|
||||
|
||||
c = array[sptr];
|
||||
array[sptr] = '\0';
|
||||
|
||||
esc_char = otoi( array + 1 );
|
||||
|
||||
array[sptr] = c;
|
||||
|
||||
return ( esc_char );
|
||||
}
|
||||
|
||||
case 'x':
|
||||
{ /* \x<hex> */
|
||||
int sptr = 2;
|
||||
|
||||
while ( isascii( array[sptr] ) && is_hex_digit( array[sptr] ) )
|
||||
/* don't increment inside loop control because if
|
||||
* isdigit() is a macro it might expand into multiple
|
||||
* increments ...
|
||||
*/
|
||||
++sptr;
|
||||
|
||||
c = array[sptr];
|
||||
array[sptr] = '\0';
|
||||
|
||||
esc_char = htoi( array + 2 );
|
||||
|
||||
array[sptr] = c;
|
||||
|
||||
return ( esc_char );
|
||||
}
|
||||
|
||||
default:
|
||||
return ( array[1] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* otoi - convert an octal digit string to an integer value
|
||||
*
|
||||
* synopsis:
|
||||
* int val, otoi();
|
||||
* Char str[];
|
||||
* val = otoi( str );
|
||||
*/
|
||||
|
||||
int otoi( str )
|
||||
Char str[];
|
||||
|
||||
{
|
||||
int result;
|
||||
|
||||
(void) sscanf( (char *) str, "%o", &result );
|
||||
|
||||
return ( result );
|
||||
}
|
||||
|
||||
|
||||
/* readable_form - return the the human-readable form of a character
|
||||
*
|
||||
* synopsis:
|
||||
* int c;
|
||||
* char *readable_form();
|
||||
* <string> = readable_form( c );
|
||||
*
|
||||
* The returned string is in static storage.
|
||||
*/
|
||||
|
||||
char *readable_form( c )
|
||||
register int c;
|
||||
|
||||
{
|
||||
static char rform[10];
|
||||
|
||||
if ( (c >= 0 && c < 32) || c >= 127 )
|
||||
{
|
||||
switch ( c )
|
||||
{
|
||||
case '\n': return ( "\\n" );
|
||||
case '\t': return ( "\\t" );
|
||||
case '\f': return ( "\\f" );
|
||||
case '\r': return ( "\\r" );
|
||||
case '\b': return ( "\\b" );
|
||||
|
||||
default:
|
||||
(void) sprintf( rform, "\\%.3o", c );
|
||||
return ( rform );
|
||||
}
|
||||
}
|
||||
|
||||
else if ( c == ' ' )
|
||||
return ( "' '" );
|
||||
|
||||
else
|
||||
{
|
||||
rform[0] = c;
|
||||
rform[1] = '\0';
|
||||
|
||||
return ( rform );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* reallocate_array - increase the size of a dynamic array */
|
||||
|
||||
void *reallocate_array( array, size, element_size )
|
||||
void *array;
|
||||
int size, element_size;
|
||||
|
||||
{
|
||||
register void *new_array;
|
||||
|
||||
/* same worry as in allocate_array(): */
|
||||
if ( size * element_size <= 0 )
|
||||
flexfatal( "attempt to increase array size by less than 1 byte" );
|
||||
|
||||
new_array =
|
||||
(void *) realloc( (char *)array, (unsigned) (size * element_size ));
|
||||
|
||||
if ( new_array == NULL )
|
||||
flexfatal( "attempt to increase array size failed" );
|
||||
|
||||
return ( new_array );
|
||||
}
|
||||
|
||||
|
||||
/* skelout - write out one section of the skeleton file
|
||||
*
|
||||
* synopsis
|
||||
* skelout();
|
||||
*
|
||||
* DESCRIPTION
|
||||
* Copies from skelfile to stdout until a line beginning with "%%" or
|
||||
* EOF is found.
|
||||
*/
|
||||
void skelout()
|
||||
|
||||
{
|
||||
char buf[MAXLINE];
|
||||
|
||||
while ( fgets( buf, MAXLINE, skelfile ) != NULL )
|
||||
if ( buf[0] == '%' && buf[1] == '%' )
|
||||
break;
|
||||
else
|
||||
fputs( buf, stdout );
|
||||
}
|
||||
|
||||
|
||||
/* transition_struct_out - output a yy_trans_info structure
|
||||
*
|
||||
* synopsis
|
||||
* int element_v, element_n;
|
||||
* transition_struct_out( element_v, element_n );
|
||||
*
|
||||
* outputs the yy_trans_info structure with the two elements, element_v and
|
||||
* element_n. Formats the output with spaces and carriage returns.
|
||||
*/
|
||||
|
||||
void transition_struct_out( element_v, element_n )
|
||||
int element_v, element_n;
|
||||
|
||||
{
|
||||
printf( "%7d, %5d,", element_v, element_n );
|
||||
|
||||
datapos += TRANS_STRUCT_PRINT_LENGTH;
|
||||
|
||||
if ( datapos >= 75 )
|
||||
{
|
||||
putchar( '\n' );
|
||||
|
||||
if ( ++dataline % 10 == 0 )
|
||||
putchar( '\n' );
|
||||
|
||||
datapos = 0;
|
||||
}
|
||||
}
|
717
util/flex/nfa.c
Normal file
717
util/flex/nfa.c
Normal file
|
@ -0,0 +1,717 @@
|
|||
/* nfa - NFA construction routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
|
||||
/* declare functions that have forward references */
|
||||
|
||||
int dupmachine PROTO((int));
|
||||
void mkxtion PROTO((int, int));
|
||||
|
||||
|
||||
/* add_accept - add an accepting state to a machine
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* add_accept( mach, accepting_number );
|
||||
*
|
||||
* accepting_number becomes mach's accepting number.
|
||||
*/
|
||||
|
||||
void add_accept( mach, accepting_number )
|
||||
int mach, accepting_number;
|
||||
|
||||
{
|
||||
/* hang the accepting number off an epsilon state. if it is associated
|
||||
* with a state that has a non-epsilon out-transition, then the state
|
||||
* will accept BEFORE it makes that transition, i.e., one character
|
||||
* too soon
|
||||
*/
|
||||
|
||||
if ( transchar[finalst[mach]] == SYM_EPSILON )
|
||||
accptnum[finalst[mach]] = accepting_number;
|
||||
|
||||
else
|
||||
{
|
||||
int astate = mkstate( SYM_EPSILON );
|
||||
accptnum[astate] = accepting_number;
|
||||
mach = link_machines( mach, astate );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* copysingl - make a given number of copies of a singleton machine
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* newsng = copysingl( singl, num );
|
||||
*
|
||||
* newsng - a new singleton composed of num copies of singl
|
||||
* singl - a singleton machine
|
||||
* num - the number of copies of singl to be present in newsng
|
||||
*/
|
||||
|
||||
int copysingl( singl, num )
|
||||
int singl, num;
|
||||
|
||||
{
|
||||
int copy, i;
|
||||
|
||||
copy = mkstate( SYM_EPSILON );
|
||||
|
||||
for ( i = 1; i <= num; ++i )
|
||||
copy = link_machines( copy, dupmachine( singl ) );
|
||||
|
||||
return ( copy );
|
||||
}
|
||||
|
||||
|
||||
/* dumpnfa - debugging routine to write out an nfa
|
||||
*
|
||||
* synopsis
|
||||
* int state1;
|
||||
* dumpnfa( state1 );
|
||||
*/
|
||||
|
||||
void dumpnfa( state1 )
|
||||
int state1;
|
||||
|
||||
{
|
||||
int sym, tsp1, tsp2, anum, ns;
|
||||
|
||||
fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
|
||||
state1 );
|
||||
|
||||
/* we probably should loop starting at firstst[state1] and going to
|
||||
* lastst[state1], but they're not maintained properly when we "or"
|
||||
* all of the rules together. So we use our knowledge that the machine
|
||||
* starts at state 1 and ends at lastnfa.
|
||||
*/
|
||||
|
||||
/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
|
||||
for ( ns = 1; ns <= lastnfa; ++ns )
|
||||
{
|
||||
fprintf( stderr, "state # %4d\t", ns );
|
||||
|
||||
sym = transchar[ns];
|
||||
tsp1 = trans1[ns];
|
||||
tsp2 = trans2[ns];
|
||||
anum = accptnum[ns];
|
||||
|
||||
fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
|
||||
|
||||
if ( anum != NIL )
|
||||
fprintf( stderr, " [%d]", anum );
|
||||
|
||||
fprintf( stderr, "\n" );
|
||||
}
|
||||
|
||||
fprintf( stderr, "********** end of dump\n" );
|
||||
}
|
||||
|
||||
|
||||
/* dupmachine - make a duplicate of a given machine
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* copy = dupmachine( mach );
|
||||
*
|
||||
* copy - holds duplicate of mach
|
||||
* mach - machine to be duplicated
|
||||
*
|
||||
* note that the copy of mach is NOT an exact duplicate; rather, all the
|
||||
* transition states values are adjusted so that the copy is self-contained,
|
||||
* as the original should have been.
|
||||
*
|
||||
* also note that the original MUST be contiguous, with its low and high
|
||||
* states accessible by the arrays firstst and lastst
|
||||
*/
|
||||
|
||||
int dupmachine( mach )
|
||||
int mach;
|
||||
|
||||
{
|
||||
int i, init, state_offset;
|
||||
int state = 0;
|
||||
int last = lastst[mach];
|
||||
|
||||
for ( i = firstst[mach]; i <= last; ++i )
|
||||
{
|
||||
state = mkstate( transchar[i] );
|
||||
|
||||
if ( trans1[i] != NO_TRANSITION )
|
||||
{
|
||||
mkxtion( finalst[state], trans1[i] + state - i );
|
||||
|
||||
if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
|
||||
mkxtion( finalst[state], trans2[i] + state - i );
|
||||
}
|
||||
|
||||
accptnum[state] = accptnum[i];
|
||||
}
|
||||
|
||||
if ( state == 0 )
|
||||
flexfatal( "empty machine in dupmachine()" );
|
||||
|
||||
state_offset = state - i + 1;
|
||||
|
||||
init = mach + state_offset;
|
||||
firstst[init] = firstst[mach] + state_offset;
|
||||
finalst[init] = finalst[mach] + state_offset;
|
||||
lastst[init] = lastst[mach] + state_offset;
|
||||
|
||||
return ( init );
|
||||
}
|
||||
|
||||
|
||||
/* finish_rule - finish up the processing for a rule
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
|
||||
*
|
||||
* An accepting number is added to the given machine. If variable_trail_rule
|
||||
* is true then the rule has trailing context and both the head and trail
|
||||
* are variable size. Otherwise if headcnt or trailcnt is non-zero then
|
||||
* the machine recognizes a pattern with trailing context and headcnt is
|
||||
* the number of characters in the matched part of the pattern, or zero
|
||||
* if the matched part has variable length. trailcnt is the number of
|
||||
* trailing context characters in the pattern, or zero if the trailing
|
||||
* context has variable length.
|
||||
*/
|
||||
|
||||
void finish_rule( mach, variable_trail_rule, headcnt, trailcnt )
|
||||
int mach, variable_trail_rule, headcnt, trailcnt;
|
||||
|
||||
{
|
||||
add_accept( mach, num_rules );
|
||||
|
||||
/* we did this in new_rule(), but it often gets the wrong
|
||||
* number because we do it before we start parsing the current rule
|
||||
*/
|
||||
rule_linenum[num_rules] = linenum;
|
||||
|
||||
/* if this is a continued action, then the line-number has
|
||||
* already been updated, giving us the wrong number
|
||||
*/
|
||||
if ( continued_action )
|
||||
--rule_linenum[num_rules];
|
||||
|
||||
fprintf( temp_action_file, "case %d:\n", num_rules );
|
||||
|
||||
if ( variable_trail_rule )
|
||||
{
|
||||
rule_type[num_rules] = RULE_VARIABLE;
|
||||
|
||||
if ( performance_report )
|
||||
fprintf( stderr, "Variable trailing context rule at line %d\n",
|
||||
rule_linenum[num_rules] );
|
||||
|
||||
variable_trailing_context_rules = true;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
rule_type[num_rules] = RULE_NORMAL;
|
||||
|
||||
if ( headcnt > 0 || trailcnt > 0 )
|
||||
{
|
||||
/* do trailing context magic to not match the trailing characters */
|
||||
char *scanner_cp = "yy_c_buf_p = yy_cp";
|
||||
char *scanner_bp = "yy_bp";
|
||||
|
||||
fprintf( temp_action_file,
|
||||
"*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
|
||||
|
||||
if ( headcnt > 0 )
|
||||
fprintf( temp_action_file, "%s = %s + %d;\n",
|
||||
scanner_cp, scanner_bp, headcnt );
|
||||
|
||||
else
|
||||
fprintf( temp_action_file,
|
||||
"%s -= %d;\n", scanner_cp, trailcnt );
|
||||
|
||||
fprintf( temp_action_file,
|
||||
"YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
|
||||
}
|
||||
}
|
||||
|
||||
line_directive_out( temp_action_file );
|
||||
}
|
||||
|
||||
|
||||
/* link_machines - connect two machines together
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* new = link_machines( first, last );
|
||||
*
|
||||
* new - a machine constructed by connecting first to last
|
||||
* first - the machine whose successor is to be last
|
||||
* last - the machine whose predecessor is to be first
|
||||
*
|
||||
* note: this routine concatenates the machine first with the machine
|
||||
* last to produce a machine new which will pattern-match first first
|
||||
* and then last, and will fail if either of the sub-patterns fails.
|
||||
* FIRST is set to new by the operation. last is unmolested.
|
||||
*/
|
||||
|
||||
int link_machines( first, last )
|
||||
int first, last;
|
||||
|
||||
{
|
||||
if ( first == NIL )
|
||||
return ( last );
|
||||
|
||||
else if ( last == NIL )
|
||||
return ( first );
|
||||
|
||||
else
|
||||
{
|
||||
mkxtion( finalst[first], last );
|
||||
finalst[first] = finalst[last];
|
||||
lastst[first] = max( lastst[first], lastst[last] );
|
||||
firstst[first] = min( firstst[first], firstst[last] );
|
||||
|
||||
return ( first );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mark_beginning_as_normal - mark each "beginning" state in a machine
|
||||
* as being a "normal" (i.e., not trailing context-
|
||||
* associated) states
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* mark_beginning_as_normal( mach )
|
||||
*
|
||||
* mach - machine to mark
|
||||
*
|
||||
* The "beginning" states are the epsilon closure of the first state
|
||||
*/
|
||||
|
||||
void mark_beginning_as_normal( mach )
|
||||
register int mach;
|
||||
|
||||
{
|
||||
switch ( state_type[mach] )
|
||||
{
|
||||
case STATE_NORMAL:
|
||||
/* oh, we've already visited here */
|
||||
return;
|
||||
|
||||
case STATE_TRAILING_CONTEXT:
|
||||
state_type[mach] = STATE_NORMAL;
|
||||
|
||||
if ( transchar[mach] == SYM_EPSILON )
|
||||
{
|
||||
if ( trans1[mach] != NO_TRANSITION )
|
||||
mark_beginning_as_normal( trans1[mach] );
|
||||
|
||||
if ( trans2[mach] != NO_TRANSITION )
|
||||
mark_beginning_as_normal( trans2[mach] );
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
flexerror( "bad state type in mark_beginning_as_normal()" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkbranch - make a machine that branches to two machines
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* branch = mkbranch( first, second );
|
||||
*
|
||||
* branch - a machine which matches either first's pattern or second's
|
||||
* first, second - machines whose patterns are to be or'ed (the | operator)
|
||||
*
|
||||
* note that first and second are NEITHER destroyed by the operation. Also,
|
||||
* the resulting machine CANNOT be used with any other "mk" operation except
|
||||
* more mkbranch's. Compare with mkor()
|
||||
*/
|
||||
|
||||
int mkbranch( first, second )
|
||||
int first, second;
|
||||
|
||||
{
|
||||
int eps;
|
||||
|
||||
if ( first == NO_TRANSITION )
|
||||
return ( second );
|
||||
|
||||
else if ( second == NO_TRANSITION )
|
||||
return ( first );
|
||||
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
|
||||
mkxtion( eps, first );
|
||||
mkxtion( eps, second );
|
||||
|
||||
return ( eps );
|
||||
}
|
||||
|
||||
|
||||
/* mkclos - convert a machine into a closure
|
||||
*
|
||||
* synopsis
|
||||
* new = mkclos( state );
|
||||
*
|
||||
* new - a new state which matches the closure of "state"
|
||||
*/
|
||||
|
||||
int mkclos( state )
|
||||
int state;
|
||||
|
||||
{
|
||||
return ( mkopt( mkposcl( state ) ) );
|
||||
}
|
||||
|
||||
|
||||
/* mkopt - make a machine optional
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* new = mkopt( mach );
|
||||
*
|
||||
* new - a machine which optionally matches whatever mach matched
|
||||
* mach - the machine to make optional
|
||||
*
|
||||
* notes:
|
||||
* 1. mach must be the last machine created
|
||||
* 2. mach is destroyed by the call
|
||||
*/
|
||||
|
||||
int mkopt( mach )
|
||||
int mach;
|
||||
|
||||
{
|
||||
int eps;
|
||||
|
||||
if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
|
||||
{
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
mach = link_machines( mach, eps );
|
||||
}
|
||||
|
||||
/* can't skimp on the following if FREE_EPSILON(mach) is true because
|
||||
* some state interior to "mach" might point back to the beginning
|
||||
* for a closure
|
||||
*/
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
mach = link_machines( eps, mach );
|
||||
|
||||
mkxtion( mach, finalst[mach] );
|
||||
|
||||
return ( mach );
|
||||
}
|
||||
|
||||
|
||||
/* mkor - make a machine that matches either one of two machines
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* new = mkor( first, second );
|
||||
*
|
||||
* new - a machine which matches either first's pattern or second's
|
||||
* first, second - machines whose patterns are to be or'ed (the | operator)
|
||||
*
|
||||
* note that first and second are both destroyed by the operation
|
||||
* the code is rather convoluted because an attempt is made to minimize
|
||||
* the number of epsilon states needed
|
||||
*/
|
||||
|
||||
int mkor( first, second )
|
||||
int first, second;
|
||||
|
||||
{
|
||||
int eps, orend;
|
||||
|
||||
if ( first == NIL )
|
||||
return ( second );
|
||||
|
||||
else if ( second == NIL )
|
||||
return ( first );
|
||||
|
||||
else
|
||||
{
|
||||
/* see comment in mkopt() about why we can't use the first state
|
||||
* of "first" or "second" if they satisfy "FREE_EPSILON"
|
||||
*/
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
|
||||
first = link_machines( eps, first );
|
||||
|
||||
mkxtion( first, second );
|
||||
|
||||
if ( SUPER_FREE_EPSILON(finalst[first]) &&
|
||||
accptnum[finalst[first]] == NIL )
|
||||
{
|
||||
orend = finalst[first];
|
||||
mkxtion( finalst[second], orend );
|
||||
}
|
||||
|
||||
else if ( SUPER_FREE_EPSILON(finalst[second]) &&
|
||||
accptnum[finalst[second]] == NIL )
|
||||
{
|
||||
orend = finalst[second];
|
||||
mkxtion( finalst[first], orend );
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
|
||||
first = link_machines( first, eps );
|
||||
orend = finalst[first];
|
||||
|
||||
mkxtion( finalst[second], orend );
|
||||
}
|
||||
}
|
||||
|
||||
finalst[first] = orend;
|
||||
return ( first );
|
||||
}
|
||||
|
||||
|
||||
/* mkposcl - convert a machine into a positive closure
|
||||
*
|
||||
* synopsis
|
||||
* new = mkposcl( state );
|
||||
*
|
||||
* new - a machine matching the positive closure of "state"
|
||||
*/
|
||||
|
||||
int mkposcl( state )
|
||||
int state;
|
||||
|
||||
{
|
||||
int eps;
|
||||
|
||||
if ( SUPER_FREE_EPSILON(finalst[state]) )
|
||||
{
|
||||
mkxtion( finalst[state], state );
|
||||
return ( state );
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
mkxtion( eps, state );
|
||||
return ( link_machines( state, eps ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkrep - make a replicated machine
|
||||
*
|
||||
* synopsis
|
||||
* new = mkrep( mach, lb, ub );
|
||||
*
|
||||
* new - a machine that matches whatever "mach" matched from "lb"
|
||||
* number of times to "ub" number of times
|
||||
*
|
||||
* note
|
||||
* if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
|
||||
*/
|
||||
|
||||
int mkrep( mach, lb, ub )
|
||||
int mach, lb, ub;
|
||||
|
||||
{
|
||||
int base_mach, tail, copy, i;
|
||||
|
||||
base_mach = copysingl( mach, lb - 1 );
|
||||
|
||||
if ( ub == INFINITY )
|
||||
{
|
||||
copy = dupmachine( mach );
|
||||
mach = link_machines( mach,
|
||||
link_machines( base_mach, mkclos( copy ) ) );
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
tail = mkstate( SYM_EPSILON );
|
||||
|
||||
for ( i = lb; i < ub; ++i )
|
||||
{
|
||||
copy = dupmachine( mach );
|
||||
tail = mkopt( link_machines( copy, tail ) );
|
||||
}
|
||||
|
||||
mach = link_machines( mach, link_machines( base_mach, tail ) );
|
||||
}
|
||||
|
||||
return ( mach );
|
||||
}
|
||||
|
||||
|
||||
/* mkstate - create a state with a transition on a given symbol
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* state = mkstate( sym );
|
||||
*
|
||||
* state - a new state matching sym
|
||||
* sym - the symbol the new state is to have an out-transition on
|
||||
*
|
||||
* note that this routine makes new states in ascending order through the
|
||||
* state array (and increments LASTNFA accordingly). The routine DUPMACHINE
|
||||
* relies on machines being made in ascending order and that they are
|
||||
* CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
|
||||
* that it admittedly is)
|
||||
*/
|
||||
|
||||
int mkstate( sym )
|
||||
int sym;
|
||||
|
||||
{
|
||||
if ( ++lastnfa >= current_mns )
|
||||
{
|
||||
if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
|
||||
lerrif( "input rules are too complicated (>= %d NFA states)",
|
||||
current_mns );
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
firstst = reallocate_integer_array( firstst, current_mns );
|
||||
lastst = reallocate_integer_array( lastst, current_mns );
|
||||
finalst = reallocate_integer_array( finalst, current_mns );
|
||||
transchar = reallocate_integer_array( transchar, current_mns );
|
||||
trans1 = reallocate_integer_array( trans1, current_mns );
|
||||
trans2 = reallocate_integer_array( trans2, current_mns );
|
||||
accptnum = reallocate_integer_array( accptnum, current_mns );
|
||||
assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
|
||||
state_type = reallocate_integer_array( state_type, current_mns );
|
||||
}
|
||||
|
||||
firstst[lastnfa] = lastnfa;
|
||||
finalst[lastnfa] = lastnfa;
|
||||
lastst[lastnfa] = lastnfa;
|
||||
transchar[lastnfa] = sym;
|
||||
trans1[lastnfa] = NO_TRANSITION;
|
||||
trans2[lastnfa] = NO_TRANSITION;
|
||||
accptnum[lastnfa] = NIL;
|
||||
assoc_rule[lastnfa] = num_rules;
|
||||
state_type[lastnfa] = current_state_type;
|
||||
|
||||
/* fix up equivalence classes base on this transition. Note that any
|
||||
* character which has its own transition gets its own equivalence class.
|
||||
* Thus only characters which are only in character classes have a chance
|
||||
* at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b'
|
||||
* into two different equivalence classes. "[ab]" puts them in the same
|
||||
* equivalence class (barring other differences elsewhere in the input).
|
||||
*/
|
||||
|
||||
if ( sym < 0 )
|
||||
{
|
||||
/* we don't have to update the equivalence classes since that was
|
||||
* already done when the ccl was created for the first time
|
||||
*/
|
||||
}
|
||||
|
||||
else if ( sym == SYM_EPSILON )
|
||||
++numeps;
|
||||
|
||||
else
|
||||
{
|
||||
if ( useecs )
|
||||
/* map NUL's to csize */
|
||||
mkechar( sym ? sym : csize, nextecm, ecgroup );
|
||||
}
|
||||
|
||||
return ( lastnfa );
|
||||
}
|
||||
|
||||
|
||||
/* mkxtion - make a transition from one state to another
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* mkxtion( statefrom, stateto );
|
||||
*
|
||||
* statefrom - the state from which the transition is to be made
|
||||
* stateto - the state to which the transition is to be made
|
||||
*/
|
||||
|
||||
void mkxtion( statefrom, stateto )
|
||||
int statefrom, stateto;
|
||||
|
||||
{
|
||||
if ( trans1[statefrom] == NO_TRANSITION )
|
||||
trans1[statefrom] = stateto;
|
||||
|
||||
else if ( (transchar[statefrom] != SYM_EPSILON) ||
|
||||
(trans2[statefrom] != NO_TRANSITION) )
|
||||
flexfatal( "found too many transitions in mkxtion()" );
|
||||
|
||||
else
|
||||
{ /* second out-transition for an epsilon state */
|
||||
++eps2;
|
||||
trans2[statefrom] = stateto;
|
||||
}
|
||||
}
|
||||
|
||||
/* new_rule - initialize for a new rule
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* new_rule();
|
||||
*
|
||||
* the global num_rules is incremented and the any corresponding dynamic
|
||||
* arrays (such as rule_type[]) are grown as needed.
|
||||
*/
|
||||
|
||||
void new_rule()
|
||||
|
||||
{
|
||||
if ( ++num_rules >= current_max_rules )
|
||||
{
|
||||
++num_reallocs;
|
||||
current_max_rules += MAX_RULES_INCREMENT;
|
||||
rule_type = reallocate_integer_array( rule_type, current_max_rules );
|
||||
rule_linenum =
|
||||
reallocate_integer_array( rule_linenum, current_max_rules );
|
||||
}
|
||||
|
||||
if ( num_rules > MAX_RULE )
|
||||
lerrif( "too many rules (> %d)!", MAX_RULE );
|
||||
|
||||
rule_linenum[num_rules] = linenum;
|
||||
}
|
702
util/flex/parse.y
Normal file
702
util/flex/parse.y
Normal file
|
@ -0,0 +1,702 @@
|
|||
|
||||
/* parse.y - parser for flex input */
|
||||
|
||||
%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
|
||||
|
||||
%{
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
|
||||
int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
|
||||
Char clower();
|
||||
|
||||
static int madeany = false; /* whether we've made the '.' character class */
|
||||
int previous_continued_action; /* whether the previous rule's action was '|' */
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
goal : initlex sect1 sect1end sect2 initforrule
|
||||
{ /* add default rule */
|
||||
int def_rule;
|
||||
|
||||
pat = cclinit();
|
||||
cclnegate( pat );
|
||||
|
||||
def_rule = mkstate( -pat );
|
||||
|
||||
finish_rule( def_rule, false, 0, 0 );
|
||||
|
||||
for ( i = 1; i <= lastsc; ++i )
|
||||
scset[i] = mkbranch( scset[i], def_rule );
|
||||
|
||||
if ( spprdflt )
|
||||
fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
|
||||
temp_action_file );
|
||||
else
|
||||
fputs( "ECHO", temp_action_file );
|
||||
|
||||
fputs( ";\n\tYY_BREAK\n", temp_action_file );
|
||||
}
|
||||
;
|
||||
|
||||
initlex :
|
||||
{
|
||||
/* initialize for processing rules */
|
||||
|
||||
/* create default DFA start condition */
|
||||
scinstal( "INITIAL", false );
|
||||
}
|
||||
;
|
||||
|
||||
sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
|
||||
|
|
||||
| error '\n'
|
||||
{ synerr( "unknown error processing section 1" ); }
|
||||
;
|
||||
|
||||
sect1end : SECTEND
|
||||
;
|
||||
|
||||
startconddecl : SCDECL
|
||||
{
|
||||
/* these productions are separate from the s1object
|
||||
* rule because the semantics must be done before
|
||||
* we parse the remainder of an s1object
|
||||
*/
|
||||
|
||||
xcluflg = false;
|
||||
}
|
||||
|
||||
| XSCDECL
|
||||
{ xcluflg = true; }
|
||||
;
|
||||
|
||||
namelist1 : namelist1 WHITESPACE NAME
|
||||
{ scinstal( nmstr, xcluflg ); }
|
||||
|
||||
| NAME
|
||||
{ scinstal( nmstr, xcluflg ); }
|
||||
|
||||
| error
|
||||
{ synerr( "bad start condition list" ); }
|
||||
;
|
||||
|
||||
sect2 : sect2 initforrule flexrule '\n'
|
||||
|
|
||||
;
|
||||
|
||||
initforrule :
|
||||
{
|
||||
/* initialize for a parse of one rule */
|
||||
trlcontxt = variable_trail_rule = varlength = false;
|
||||
trailcnt = headcnt = rulelen = 0;
|
||||
current_state_type = STATE_NORMAL;
|
||||
previous_continued_action = continued_action;
|
||||
new_rule();
|
||||
}
|
||||
;
|
||||
|
||||
flexrule : scon '^' rule
|
||||
{
|
||||
pat = $3;
|
||||
finish_rule( pat, variable_trail_rule,
|
||||
headcnt, trailcnt );
|
||||
|
||||
for ( i = 1; i <= actvp; ++i )
|
||||
scbol[actvsc[i]] =
|
||||
mkbranch( scbol[actvsc[i]], pat );
|
||||
|
||||
if ( ! bol_needed )
|
||||
{
|
||||
bol_needed = true;
|
||||
|
||||
if ( performance_report )
|
||||
pinpoint_message(
|
||||
"'^' operator results in sub-optimal performance" );
|
||||
}
|
||||
}
|
||||
|
||||
| scon rule
|
||||
{
|
||||
pat = $2;
|
||||
finish_rule( pat, variable_trail_rule,
|
||||
headcnt, trailcnt );
|
||||
|
||||
for ( i = 1; i <= actvp; ++i )
|
||||
scset[actvsc[i]] =
|
||||
mkbranch( scset[actvsc[i]], pat );
|
||||
}
|
||||
|
||||
| '^' rule
|
||||
{
|
||||
pat = $2;
|
||||
finish_rule( pat, variable_trail_rule,
|
||||
headcnt, trailcnt );
|
||||
|
||||
/* add to all non-exclusive start conditions,
|
||||
* including the default (0) start condition
|
||||
*/
|
||||
|
||||
for ( i = 1; i <= lastsc; ++i )
|
||||
if ( ! scxclu[i] )
|
||||
scbol[i] = mkbranch( scbol[i], pat );
|
||||
|
||||
if ( ! bol_needed )
|
||||
{
|
||||
bol_needed = true;
|
||||
|
||||
if ( performance_report )
|
||||
pinpoint_message(
|
||||
"'^' operator results in sub-optimal performance" );
|
||||
}
|
||||
}
|
||||
|
||||
| rule
|
||||
{
|
||||
pat = $1;
|
||||
finish_rule( pat, variable_trail_rule,
|
||||
headcnt, trailcnt );
|
||||
|
||||
for ( i = 1; i <= lastsc; ++i )
|
||||
if ( ! scxclu[i] )
|
||||
scset[i] = mkbranch( scset[i], pat );
|
||||
}
|
||||
|
||||
| scon EOF_OP
|
||||
{ build_eof_action(); }
|
||||
|
||||
| EOF_OP
|
||||
{
|
||||
/* this EOF applies to all start conditions
|
||||
* which don't already have EOF actions
|
||||
*/
|
||||
actvp = 0;
|
||||
|
||||
for ( i = 1; i <= lastsc; ++i )
|
||||
if ( ! sceof[i] )
|
||||
actvsc[++actvp] = i;
|
||||
|
||||
if ( actvp == 0 )
|
||||
pinpoint_message(
|
||||
"warning - all start conditions already have <<EOF>> rules" );
|
||||
|
||||
else
|
||||
build_eof_action();
|
||||
}
|
||||
|
||||
| error
|
||||
{ synerr( "unrecognized rule" ); }
|
||||
;
|
||||
|
||||
scon : '<' namelist2 '>'
|
||||
;
|
||||
|
||||
namelist2 : namelist2 ',' NAME
|
||||
{
|
||||
if ( (scnum = sclookup( nmstr )) == 0 )
|
||||
format_pinpoint_message(
|
||||
"undeclared start condition %s", nmstr );
|
||||
|
||||
else
|
||||
actvsc[++actvp] = scnum;
|
||||
}
|
||||
|
||||
| NAME
|
||||
{
|
||||
if ( (scnum = sclookup( nmstr )) == 0 )
|
||||
format_pinpoint_message(
|
||||
"undeclared start condition %s", nmstr );
|
||||
else
|
||||
actvsc[actvp = 1] = scnum;
|
||||
}
|
||||
|
||||
| error
|
||||
{ synerr( "bad start condition list" ); }
|
||||
;
|
||||
|
||||
rule : re2 re
|
||||
{
|
||||
if ( transchar[lastst[$2]] != SYM_EPSILON )
|
||||
/* provide final transition \now/ so it
|
||||
* will be marked as a trailing context
|
||||
* state
|
||||
*/
|
||||
$2 = link_machines( $2, mkstate( SYM_EPSILON ) );
|
||||
|
||||
mark_beginning_as_normal( $2 );
|
||||
current_state_type = STATE_NORMAL;
|
||||
|
||||
if ( previous_continued_action )
|
||||
{
|
||||
/* we need to treat this as variable trailing
|
||||
* context so that the backup does not happen
|
||||
* in the action but before the action switch
|
||||
* statement. If the backup happens in the
|
||||
* action, then the rules "falling into" this
|
||||
* one's action will *also* do the backup,
|
||||
* erroneously.
|
||||
*/
|
||||
if ( ! varlength || headcnt != 0 )
|
||||
{
|
||||
fprintf( stderr,
|
||||
"%s: warning - trailing context rule at line %d made variable because\n",
|
||||
program_name, linenum );
|
||||
fprintf( stderr,
|
||||
" of preceding '|' action\n" );
|
||||
}
|
||||
|
||||
/* mark as variable */
|
||||
varlength = true;
|
||||
headcnt = 0;
|
||||
}
|
||||
|
||||
if ( varlength && headcnt == 0 )
|
||||
{ /* variable trailing context rule */
|
||||
/* mark the first part of the rule as the accepting
|
||||
* "head" part of a trailing context rule
|
||||
*/
|
||||
/* by the way, we didn't do this at the beginning
|
||||
* of this production because back then
|
||||
* current_state_type was set up for a trail
|
||||
* rule, and add_accept() can create a new
|
||||
* state ...
|
||||
*/
|
||||
add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
|
||||
variable_trail_rule = true;
|
||||
}
|
||||
|
||||
else
|
||||
trailcnt = rulelen;
|
||||
|
||||
$$ = link_machines( $1, $2 );
|
||||
}
|
||||
|
||||
| re2 re '$'
|
||||
{ synerr( "trailing context used twice" ); }
|
||||
|
||||
| re '$'
|
||||
{
|
||||
if ( trlcontxt )
|
||||
{
|
||||
synerr( "trailing context used twice" );
|
||||
$$ = mkstate( SYM_EPSILON );
|
||||
}
|
||||
|
||||
else if ( previous_continued_action )
|
||||
{
|
||||
/* see the comment in the rule for "re2 re"
|
||||
* above
|
||||
*/
|
||||
if ( ! varlength || headcnt != 0 )
|
||||
{
|
||||
fprintf( stderr,
|
||||
"%s: warning - trailing context rule at line %d made variable because\n",
|
||||
program_name, linenum );
|
||||
fprintf( stderr,
|
||||
" of preceding '|' action\n" );
|
||||
}
|
||||
|
||||
/* mark as variable */
|
||||
varlength = true;
|
||||
headcnt = 0;
|
||||
}
|
||||
|
||||
trlcontxt = true;
|
||||
|
||||
if ( ! varlength )
|
||||
headcnt = rulelen;
|
||||
|
||||
++rulelen;
|
||||
trailcnt = 1;
|
||||
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
$$ = link_machines( $1,
|
||||
link_machines( eps, mkstate( '\n' ) ) );
|
||||
}
|
||||
|
||||
| re
|
||||
{
|
||||
$$ = $1;
|
||||
|
||||
if ( trlcontxt )
|
||||
{
|
||||
if ( varlength && headcnt == 0 )
|
||||
/* both head and trail are variable-length */
|
||||
variable_trail_rule = true;
|
||||
else
|
||||
trailcnt = rulelen;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
|
||||
re : re '|' series
|
||||
{
|
||||
varlength = true;
|
||||
$$ = mkor( $1, $3 );
|
||||
}
|
||||
|
||||
| series
|
||||
{ $$ = $1; }
|
||||
;
|
||||
|
||||
|
||||
re2 : re '/'
|
||||
{
|
||||
/* this rule is written separately so
|
||||
* the reduction will occur before the trailing
|
||||
* series is parsed
|
||||
*/
|
||||
|
||||
if ( trlcontxt )
|
||||
synerr( "trailing context used twice" );
|
||||
else
|
||||
trlcontxt = true;
|
||||
|
||||
if ( varlength )
|
||||
/* we hope the trailing context is fixed-length */
|
||||
varlength = false;
|
||||
else
|
||||
headcnt = rulelen;
|
||||
|
||||
rulelen = 0;
|
||||
|
||||
current_state_type = STATE_TRAILING_CONTEXT;
|
||||
$$ = $1;
|
||||
}
|
||||
;
|
||||
|
||||
series : series singleton
|
||||
{
|
||||
/* this is where concatenation of adjacent patterns
|
||||
* gets done
|
||||
*/
|
||||
$$ = link_machines( $1, $2 );
|
||||
}
|
||||
|
||||
| singleton
|
||||
{ $$ = $1; }
|
||||
;
|
||||
|
||||
singleton : singleton '*'
|
||||
{
|
||||
varlength = true;
|
||||
|
||||
$$ = mkclos( $1 );
|
||||
}
|
||||
|
||||
| singleton '+'
|
||||
{
|
||||
varlength = true;
|
||||
|
||||
$$ = mkposcl( $1 );
|
||||
}
|
||||
|
||||
| singleton '?'
|
||||
{
|
||||
varlength = true;
|
||||
|
||||
$$ = mkopt( $1 );
|
||||
}
|
||||
|
||||
| singleton '{' NUMBER ',' NUMBER '}'
|
||||
{
|
||||
varlength = true;
|
||||
|
||||
if ( $3 > $5 || $3 < 0 )
|
||||
{
|
||||
synerr( "bad iteration values" );
|
||||
$$ = $1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( $3 == 0 )
|
||||
$$ = mkopt( mkrep( $1, $3, $5 ) );
|
||||
else
|
||||
$$ = mkrep( $1, $3, $5 );
|
||||
}
|
||||
}
|
||||
|
||||
| singleton '{' NUMBER ',' '}'
|
||||
{
|
||||
varlength = true;
|
||||
|
||||
if ( $3 <= 0 )
|
||||
{
|
||||
synerr( "iteration value must be positive" );
|
||||
$$ = $1;
|
||||
}
|
||||
|
||||
else
|
||||
$$ = mkrep( $1, $3, INFINITY );
|
||||
}
|
||||
|
||||
| singleton '{' NUMBER '}'
|
||||
{
|
||||
/* the singleton could be something like "(foo)",
|
||||
* in which case we have no idea what its length
|
||||
* is, so we punt here.
|
||||
*/
|
||||
varlength = true;
|
||||
|
||||
if ( $3 <= 0 )
|
||||
{
|
||||
synerr( "iteration value must be positive" );
|
||||
$$ = $1;
|
||||
}
|
||||
|
||||
else
|
||||
$$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
|
||||
}
|
||||
|
||||
| '.'
|
||||
{
|
||||
if ( ! madeany )
|
||||
{
|
||||
/* create the '.' character class */
|
||||
anyccl = cclinit();
|
||||
ccladd( anyccl, '\n' );
|
||||
cclnegate( anyccl );
|
||||
|
||||
if ( useecs )
|
||||
mkeccl( ccltbl + cclmap[anyccl],
|
||||
ccllen[anyccl], nextecm,
|
||||
ecgroup, csize, csize );
|
||||
|
||||
madeany = true;
|
||||
}
|
||||
|
||||
++rulelen;
|
||||
|
||||
$$ = mkstate( -anyccl );
|
||||
}
|
||||
|
||||
| fullccl
|
||||
{
|
||||
if ( ! cclsorted )
|
||||
/* sort characters for fast searching. We use a
|
||||
* shell sort since this list could be large.
|
||||
*/
|
||||
cshell( ccltbl + cclmap[$1], ccllen[$1], true );
|
||||
|
||||
if ( useecs )
|
||||
mkeccl( ccltbl + cclmap[$1], ccllen[$1],
|
||||
nextecm, ecgroup, csize, csize );
|
||||
|
||||
++rulelen;
|
||||
|
||||
$$ = mkstate( -$1 );
|
||||
}
|
||||
|
||||
| PREVCCL
|
||||
{
|
||||
++rulelen;
|
||||
|
||||
$$ = mkstate( -$1 );
|
||||
}
|
||||
|
||||
| '"' string '"'
|
||||
{ $$ = $2; }
|
||||
|
||||
| '(' re ')'
|
||||
{ $$ = $2; }
|
||||
|
||||
| CHAR
|
||||
{
|
||||
++rulelen;
|
||||
|
||||
if ( caseins && $1 >= 'A' && $1 <= 'Z' )
|
||||
$1 = clower( $1 );
|
||||
|
||||
$$ = mkstate( $1 );
|
||||
}
|
||||
;
|
||||
|
||||
fullccl : '[' ccl ']'
|
||||
{ $$ = $2; }
|
||||
|
||||
| '[' '^' ccl ']'
|
||||
{
|
||||
/* *Sigh* - to be compatible Unix lex, negated ccls
|
||||
* match newlines
|
||||
*/
|
||||
#ifdef NOTDEF
|
||||
ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
|
||||
cclsorted = false; /* because we added the newline */
|
||||
#endif
|
||||
cclnegate( $3 );
|
||||
$$ = $3;
|
||||
}
|
||||
;
|
||||
|
||||
ccl : ccl CHAR '-' CHAR
|
||||
{
|
||||
if ( $2 > $4 )
|
||||
synerr( "negative range in character class" );
|
||||
|
||||
else
|
||||
{
|
||||
if ( caseins )
|
||||
{
|
||||
if ( $2 >= 'A' && $2 <= 'Z' )
|
||||
$2 = clower( $2 );
|
||||
if ( $4 >= 'A' && $4 <= 'Z' )
|
||||
$4 = clower( $4 );
|
||||
}
|
||||
|
||||
for ( i = $2; i <= $4; ++i )
|
||||
ccladd( $1, i );
|
||||
|
||||
/* keep track if this ccl is staying in alphabetical
|
||||
* order
|
||||
*/
|
||||
cclsorted = cclsorted && ($2 > lastchar);
|
||||
lastchar = $4;
|
||||
}
|
||||
|
||||
$$ = $1;
|
||||
}
|
||||
|
||||
| ccl CHAR
|
||||
{
|
||||
if ( caseins )
|
||||
if ( $2 >= 'A' && $2 <= 'Z' )
|
||||
$2 = clower( $2 );
|
||||
|
||||
ccladd( $1, $2 );
|
||||
cclsorted = cclsorted && ($2 > lastchar);
|
||||
lastchar = $2;
|
||||
$$ = $1;
|
||||
}
|
||||
|
||||
|
|
||||
{
|
||||
cclsorted = true;
|
||||
lastchar = 0;
|
||||
$$ = cclinit();
|
||||
}
|
||||
;
|
||||
|
||||
string : string CHAR
|
||||
{
|
||||
if ( caseins )
|
||||
if ( $2 >= 'A' && $2 <= 'Z' )
|
||||
$2 = clower( $2 );
|
||||
|
||||
++rulelen;
|
||||
|
||||
$$ = link_machines( $1, mkstate( $2 ) );
|
||||
}
|
||||
|
||||
|
|
||||
{ $$ = mkstate( SYM_EPSILON ); }
|
||||
;
|
||||
|
||||
%%
|
||||
|
||||
|
||||
/* build_eof_action - build the "<<EOF>>" action for the active start
|
||||
* conditions
|
||||
*/
|
||||
|
||||
void build_eof_action()
|
||||
|
||||
{
|
||||
register int i;
|
||||
|
||||
for ( i = 1; i <= actvp; ++i )
|
||||
{
|
||||
if ( sceof[actvsc[i]] )
|
||||
format_pinpoint_message(
|
||||
"multiple <<EOF>> rules for start condition %s",
|
||||
scname[actvsc[i]] );
|
||||
|
||||
else
|
||||
{
|
||||
sceof[actvsc[i]] = true;
|
||||
fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n",
|
||||
scname[actvsc[i]] );
|
||||
}
|
||||
}
|
||||
|
||||
line_directive_out( temp_action_file );
|
||||
}
|
||||
|
||||
|
||||
/* synerr - report a syntax error */
|
||||
|
||||
void synerr( str )
|
||||
char str[];
|
||||
|
||||
{
|
||||
syntaxerror = true;
|
||||
pinpoint_message( str );
|
||||
}
|
||||
|
||||
|
||||
/* format_pinpoint_message - write out a message formatted with one string,
|
||||
* pinpointing its location
|
||||
*/
|
||||
|
||||
void format_pinpoint_message( msg, arg )
|
||||
char msg[], arg[];
|
||||
|
||||
{
|
||||
char errmsg[MAXLINE];
|
||||
|
||||
(void) sprintf( errmsg, msg, arg );
|
||||
pinpoint_message( errmsg );
|
||||
}
|
||||
|
||||
|
||||
/* pinpoint_message - write out a message, pinpointing its location */
|
||||
|
||||
void pinpoint_message( str )
|
||||
char str[];
|
||||
|
||||
{
|
||||
fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str );
|
||||
}
|
||||
|
||||
|
||||
/* yyerror - eat up an error message from the parser;
|
||||
* currently, messages are ignore
|
||||
*/
|
||||
|
||||
void yyerror( msg )
|
||||
char msg[];
|
||||
|
||||
{
|
||||
}
|
533
util/flex/scan.l
Normal file
533
util/flex/scan.l
Normal file
|
@ -0,0 +1,533 @@
|
|||
|
||||
/* scan.l - scanner for flex input */
|
||||
|
||||
%{
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#undef yywrap
|
||||
|
||||
#include "flexdef.h"
|
||||
#include "parse.h"
|
||||
|
||||
#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext )
|
||||
#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" );
|
||||
|
||||
#undef YY_DECL
|
||||
#define YY_DECL \
|
||||
int flexscan()
|
||||
|
||||
#define RETURNCHAR \
|
||||
yylval = yytext[0]; \
|
||||
return ( CHAR );
|
||||
|
||||
#define RETURNNAME \
|
||||
(void) strcpy( nmstr, (char *) yytext ); \
|
||||
return ( NAME );
|
||||
|
||||
#define PUT_BACK_STRING(str, start) \
|
||||
for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \
|
||||
unput((str)[i])
|
||||
|
||||
#define CHECK_REJECT(str) \
|
||||
if ( all_upper( str ) ) \
|
||||
reject = true;
|
||||
|
||||
#define CHECK_YYMORE(str) \
|
||||
if ( all_lower( str ) ) \
|
||||
yymore_used = true;
|
||||
%}
|
||||
|
||||
%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
|
||||
%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
|
||||
%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION
|
||||
|
||||
WS [ \t\f]+
|
||||
OPTWS [ \t\f]*
|
||||
NOT_WS [^ \t\f\n]
|
||||
|
||||
NAME [a-z_][a-z_0-9-]*
|
||||
NOT_NAME [^a-z_\n]+
|
||||
|
||||
SCNAME {NAME}
|
||||
|
||||
ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2})
|
||||
|
||||
%%
|
||||
static int bracelevel, didadef;
|
||||
int i, indented_code, checking_used, new_xlation;
|
||||
int doing_codeblock = false;
|
||||
Char nmdef[MAXLINE], myesc();
|
||||
|
||||
^{WS} indented_code = true; BEGIN(CODEBLOCK);
|
||||
^#.*\n ++linenum; /* treat as a comment */
|
||||
^"/*" ECHO; BEGIN(C_COMMENT);
|
||||
^"%s"{NAME}? return ( SCDECL );
|
||||
^"%x"{NAME}? return ( XSCDECL );
|
||||
^"%{".*\n {
|
||||
++linenum;
|
||||
line_directive_out( stdout );
|
||||
indented_code = false;
|
||||
BEGIN(CODEBLOCK);
|
||||
}
|
||||
|
||||
{WS} return ( WHITESPACE );
|
||||
|
||||
^"%%".* {
|
||||
sectnum = 2;
|
||||
line_directive_out( stdout );
|
||||
BEGIN(SECT2PROLOG);
|
||||
return ( SECTEND );
|
||||
}
|
||||
|
||||
^"%used" {
|
||||
pinpoint_message( "warning - %%used/%%unused have been deprecated" );
|
||||
checking_used = REALLY_USED; BEGIN(USED_LIST);
|
||||
}
|
||||
^"%unused" {
|
||||
checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
|
||||
pinpoint_message( "warning - %%used/%%unused have been deprecated" );
|
||||
checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
|
||||
}
|
||||
|
||||
|
||||
^"%"[aeknopt]" ".*\n {
|
||||
#ifdef NOTDEF
|
||||
fprintf( stderr,
|
||||
"old-style lex command at line %d ignored:\n\t%s",
|
||||
linenum, yytext );
|
||||
#endif
|
||||
++linenum;
|
||||
}
|
||||
|
||||
^"%"[cr]{OPTWS} /* ignore old lex directive */
|
||||
|
||||
%t{OPTWS}\n {
|
||||
++linenum;
|
||||
xlation =
|
||||
(int *) malloc( sizeof( int ) * (unsigned) csize );
|
||||
|
||||
if ( ! xlation )
|
||||
flexfatal(
|
||||
"dynamic memory failure building %t table" );
|
||||
|
||||
for ( i = 0; i < csize; ++i )
|
||||
xlation[i] = 0;
|
||||
|
||||
num_xlations = 0;
|
||||
|
||||
BEGIN(XLATION);
|
||||
}
|
||||
|
||||
^"%"[^sxanpekotcru{}]{OPTWS} synerr( "unrecognized '%' directive" );
|
||||
|
||||
^{NAME} {
|
||||
(void) strcpy( nmstr, (char *) yytext );
|
||||
didadef = false;
|
||||
BEGIN(PICKUPDEF);
|
||||
}
|
||||
|
||||
{SCNAME} RETURNNAME;
|
||||
^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */
|
||||
{OPTWS}\n ++linenum; return ( '\n' );
|
||||
. synerr( "illegal character" ); BEGIN(RECOVER);
|
||||
|
||||
|
||||
<C_COMMENT>"*/" ECHO; BEGIN(INITIAL);
|
||||
<C_COMMENT>"*/".*\n ++linenum; ECHO; BEGIN(INITIAL);
|
||||
<C_COMMENT>[^*\n]+ ECHO;
|
||||
<C_COMMENT>"*" ECHO;
|
||||
<C_COMMENT>\n ++linenum; ECHO;
|
||||
|
||||
|
||||
<CODEBLOCK>^"%}".*\n ++linenum; BEGIN(INITIAL);
|
||||
<CODEBLOCK>"reject" ECHO; CHECK_REJECT(yytext);
|
||||
<CODEBLOCK>"yymore" ECHO; CHECK_YYMORE(yytext);
|
||||
<CODEBLOCK>{NAME}|{NOT_NAME}|. ECHO;
|
||||
<CODEBLOCK>\n {
|
||||
++linenum;
|
||||
ECHO;
|
||||
if ( indented_code )
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
|
||||
|
||||
<PICKUPDEF>{WS} /* separates name and definition */
|
||||
|
||||
<PICKUPDEF>{NOT_WS}.* {
|
||||
(void) strcpy( (char *) nmdef, (char *) yytext );
|
||||
|
||||
for ( i = strlen( (char *) nmdef ) - 1;
|
||||
i >= 0 &&
|
||||
nmdef[i] == ' ' || nmdef[i] == '\t';
|
||||
--i )
|
||||
;
|
||||
|
||||
nmdef[i + 1] = '\0';
|
||||
|
||||
ndinstal( nmstr, nmdef );
|
||||
didadef = true;
|
||||
}
|
||||
|
||||
<PICKUPDEF>\n {
|
||||
if ( ! didadef )
|
||||
synerr( "incomplete name definition" );
|
||||
BEGIN(INITIAL);
|
||||
++linenum;
|
||||
}
|
||||
|
||||
<RECOVER>.*\n ++linenum; BEGIN(INITIAL); RETURNNAME;
|
||||
|
||||
|
||||
<USED_LIST>\n ++linenum; BEGIN(INITIAL);
|
||||
<USED_LIST>{WS}
|
||||
<USED_LIST>"reject" {
|
||||
if ( all_upper( yytext ) )
|
||||
reject_really_used = checking_used;
|
||||
else
|
||||
synerr( "unrecognized %used/%unused construct" );
|
||||
}
|
||||
<USED_LIST>"yymore" {
|
||||
if ( all_lower( yytext ) )
|
||||
yymore_really_used = checking_used;
|
||||
else
|
||||
synerr( "unrecognized %used/%unused construct" );
|
||||
}
|
||||
<USED_LIST>{NOT_WS}+ synerr( "unrecognized %used/%unused construct" );
|
||||
|
||||
|
||||
<XLATION>"%t"{OPTWS}\n ++linenum; BEGIN(INITIAL);
|
||||
<XLATION>^{OPTWS}[0-9]+ ++num_xlations; new_xlation = true;
|
||||
<XLATION>^. synerr( "bad row in translation table" );
|
||||
<XLATION>{WS} /* ignore whitespace */
|
||||
|
||||
<XLATION>{ESCSEQ} {
|
||||
xlation[myesc( yytext )] =
|
||||
(new_xlation ? num_xlations : -num_xlations);
|
||||
new_xlation = false;
|
||||
}
|
||||
<XLATION>. {
|
||||
xlation[yytext[0]] =
|
||||
(new_xlation ? num_xlations : -num_xlations);
|
||||
new_xlation = false;
|
||||
}
|
||||
|
||||
<XLATION>\n ++linenum;
|
||||
|
||||
|
||||
<SECT2PROLOG>.*\n/{NOT_WS} {
|
||||
++linenum;
|
||||
ACTION_ECHO;
|
||||
MARK_END_OF_PROLOG;
|
||||
BEGIN(SECT2);
|
||||
}
|
||||
|
||||
<SECT2PROLOG>.*\n ++linenum; ACTION_ECHO;
|
||||
|
||||
<SECT2PROLOG><<EOF>> MARK_END_OF_PROLOG; yyterminate();
|
||||
|
||||
<SECT2>^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */
|
||||
|
||||
<SECT2>^({WS}|"%{") {
|
||||
indented_code = (yytext[0] != '%');
|
||||
doing_codeblock = true;
|
||||
bracelevel = 1;
|
||||
|
||||
if ( indented_code )
|
||||
ACTION_ECHO;
|
||||
|
||||
BEGIN(CODEBLOCK_2);
|
||||
}
|
||||
|
||||
<SECT2>"<" BEGIN(SC); return ( '<' );
|
||||
<SECT2>^"^" return ( '^' );
|
||||
<SECT2>\" BEGIN(QUOTE); return ( '"' );
|
||||
<SECT2>"{"/[0-9] BEGIN(NUM); return ( '{' );
|
||||
<SECT2>"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR);
|
||||
<SECT2>"$"/[ \t\n] return ( '$' );
|
||||
|
||||
<SECT2>{WS}"%{" {
|
||||
bracelevel = 1;
|
||||
BEGIN(PERCENT_BRACE_ACTION);
|
||||
return ( '\n' );
|
||||
}
|
||||
<SECT2>{WS}"|".*\n continued_action = true; ++linenum; return ( '\n' );
|
||||
|
||||
<SECT2>{WS} {
|
||||
/* this rule is separate from the one below because
|
||||
* otherwise we get variable trailing context, so
|
||||
* we can't build the scanner using -{f,F}
|
||||
*/
|
||||
bracelevel = 0;
|
||||
continued_action = false;
|
||||
BEGIN(ACTION);
|
||||
return ( '\n' );
|
||||
}
|
||||
|
||||
<SECT2>{OPTWS}/\n {
|
||||
bracelevel = 0;
|
||||
continued_action = false;
|
||||
BEGIN(ACTION);
|
||||
return ( '\n' );
|
||||
}
|
||||
|
||||
<SECT2>^{OPTWS}\n ++linenum; return ( '\n' );
|
||||
|
||||
<SECT2>"<<EOF>>" return ( EOF_OP );
|
||||
|
||||
<SECT2>^"%%".* {
|
||||
sectnum = 3;
|
||||
BEGIN(SECT3);
|
||||
return ( EOF ); /* to stop the parser */
|
||||
}
|
||||
|
||||
<SECT2>"["([^\\\]\n]|{ESCSEQ})+"]" {
|
||||
int cclval;
|
||||
|
||||
(void) strcpy( nmstr, (char *) yytext );
|
||||
|
||||
/* check to see if we've already encountered this ccl */
|
||||
if ( (cclval = ccllookup( (Char *) nmstr )) )
|
||||
{
|
||||
yylval = cclval;
|
||||
++cclreuse;
|
||||
return ( PREVCCL );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we fudge a bit. We know that this ccl will
|
||||
* soon be numbered as lastccl + 1 by cclinit
|
||||
*/
|
||||
cclinstal( (Char *) nmstr, lastccl + 1 );
|
||||
|
||||
/* push back everything but the leading bracket
|
||||
* so the ccl can be rescanned
|
||||
*/
|
||||
PUT_BACK_STRING((Char *) nmstr, 1);
|
||||
|
||||
BEGIN(FIRSTCCL);
|
||||
return ( '[' );
|
||||
}
|
||||
}
|
||||
|
||||
<SECT2>"{"{NAME}"}" {
|
||||
register Char *nmdefptr;
|
||||
Char *ndlookup();
|
||||
|
||||
(void) strcpy( nmstr, (char *) yytext );
|
||||
nmstr[yyleng - 1] = '\0'; /* chop trailing brace */
|
||||
|
||||
/* lookup from "nmstr + 1" to chop leading brace */
|
||||
if ( ! (nmdefptr = ndlookup( nmstr + 1 )) )
|
||||
synerr( "undefined {name}" );
|
||||
|
||||
else
|
||||
{ /* push back name surrounded by ()'s */
|
||||
unput(')');
|
||||
PUT_BACK_STRING(nmdefptr, 0);
|
||||
unput('(');
|
||||
}
|
||||
}
|
||||
|
||||
<SECT2>[/|*+?.()] return ( yytext[0] );
|
||||
<SECT2>. RETURNCHAR;
|
||||
<SECT2>\n ++linenum; return ( '\n' );
|
||||
|
||||
|
||||
<SC>"," return ( ',' );
|
||||
<SC>">" BEGIN(SECT2); return ( '>' );
|
||||
<SC>">"/"^" BEGIN(CARETISBOL); return ( '>' );
|
||||
<SC>{SCNAME} RETURNNAME;
|
||||
<SC>. synerr( "bad start condition name" );
|
||||
|
||||
<CARETISBOL>"^" BEGIN(SECT2); return ( '^' );
|
||||
|
||||
|
||||
<QUOTE>[^"\n] RETURNCHAR;
|
||||
<QUOTE>\" BEGIN(SECT2); return ( '"' );
|
||||
|
||||
<QUOTE>\n {
|
||||
synerr( "missing quote" );
|
||||
BEGIN(SECT2);
|
||||
++linenum;
|
||||
return ( '"' );
|
||||
}
|
||||
|
||||
|
||||
<FIRSTCCL>"^"/[^-\n] BEGIN(CCL); return ( '^' );
|
||||
<FIRSTCCL>"^"/- return ( '^' );
|
||||
<FIRSTCCL>- BEGIN(CCL); yylval = '-'; return ( CHAR );
|
||||
<FIRSTCCL>. BEGIN(CCL); RETURNCHAR;
|
||||
|
||||
<CCL>-/[^\]\n] return ( '-' );
|
||||
<CCL>[^\]\n] RETURNCHAR;
|
||||
<CCL>"]" BEGIN(SECT2); return ( ']' );
|
||||
|
||||
|
||||
<NUM>[0-9]+ {
|
||||
yylval = myctoi( yytext );
|
||||
return ( NUMBER );
|
||||
}
|
||||
|
||||
<NUM>"," return ( ',' );
|
||||
<NUM>"}" BEGIN(SECT2); return ( '}' );
|
||||
|
||||
<NUM>. {
|
||||
synerr( "bad character inside {}'s" );
|
||||
BEGIN(SECT2);
|
||||
return ( '}' );
|
||||
}
|
||||
|
||||
<NUM>\n {
|
||||
synerr( "missing }" );
|
||||
BEGIN(SECT2);
|
||||
++linenum;
|
||||
return ( '}' );
|
||||
}
|
||||
|
||||
|
||||
<BRACEERROR>"}" synerr( "bad name in {}'s" ); BEGIN(SECT2);
|
||||
<BRACEERROR>\n synerr( "missing }" ); ++linenum; BEGIN(SECT2);
|
||||
|
||||
|
||||
<PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".* bracelevel = 0;
|
||||
<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject" {
|
||||
ACTION_ECHO;
|
||||
CHECK_REJECT(yytext);
|
||||
}
|
||||
<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore" {
|
||||
ACTION_ECHO;
|
||||
CHECK_YYMORE(yytext);
|
||||
}
|
||||
<PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|. ACTION_ECHO;
|
||||
<PERCENT_BRACE_ACTION,CODEBLOCK_2>\n {
|
||||
++linenum;
|
||||
ACTION_ECHO;
|
||||
if ( bracelevel == 0 ||
|
||||
(doing_codeblock && indented_code) )
|
||||
{
|
||||
if ( ! doing_codeblock )
|
||||
fputs( "\tYY_BREAK\n", temp_action_file );
|
||||
|
||||
doing_codeblock = false;
|
||||
BEGIN(SECT2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
|
||||
<ACTION>"{" ACTION_ECHO; ++bracelevel;
|
||||
<ACTION>"}" ACTION_ECHO; --bracelevel;
|
||||
<ACTION>[^a-z_{}"'/\n]+ ACTION_ECHO;
|
||||
<ACTION>{NAME} ACTION_ECHO;
|
||||
<ACTION>"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT);
|
||||
<ACTION>"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
|
||||
<ACTION>\" ACTION_ECHO; BEGIN(ACTION_STRING);
|
||||
<ACTION>\n {
|
||||
++linenum;
|
||||
ACTION_ECHO;
|
||||
if ( bracelevel == 0 )
|
||||
{
|
||||
fputs( "\tYY_BREAK\n", temp_action_file );
|
||||
BEGIN(SECT2);
|
||||
}
|
||||
}
|
||||
<ACTION>. ACTION_ECHO;
|
||||
|
||||
<ACTION_COMMENT>"*/" ACTION_ECHO; BEGIN(ACTION);
|
||||
<ACTION_COMMENT>[^*\n]+ ACTION_ECHO;
|
||||
<ACTION_COMMENT>"*" ACTION_ECHO;
|
||||
<ACTION_COMMENT>\n ++linenum; ACTION_ECHO;
|
||||
<ACTION_COMMENT>. ACTION_ECHO;
|
||||
|
||||
<ACTION_STRING>[^"\\\n]+ ACTION_ECHO;
|
||||
<ACTION_STRING>\\. ACTION_ECHO;
|
||||
<ACTION_STRING>\n ++linenum; ACTION_ECHO;
|
||||
<ACTION_STRING>\" ACTION_ECHO; BEGIN(ACTION);
|
||||
<ACTION_STRING>. ACTION_ECHO;
|
||||
|
||||
<ACTION,ACTION_COMMENT,ACTION_STRING><<EOF>> {
|
||||
synerr( "EOF encountered inside an action" );
|
||||
yyterminate();
|
||||
}
|
||||
|
||||
|
||||
<SECT2,QUOTE,CCL>{ESCSEQ} {
|
||||
yylval = myesc( yytext );
|
||||
return ( CHAR );
|
||||
}
|
||||
|
||||
<FIRSTCCL>{ESCSEQ} {
|
||||
yylval = myesc( yytext );
|
||||
BEGIN(CCL);
|
||||
return ( CHAR );
|
||||
}
|
||||
|
||||
|
||||
<SECT3>.*(\n?) ECHO;
|
||||
%%
|
||||
|
||||
|
||||
int yywrap()
|
||||
|
||||
{
|
||||
if ( --num_input_files > 0 )
|
||||
{
|
||||
set_input_file( *++input_files );
|
||||
return ( 0 );
|
||||
}
|
||||
|
||||
else
|
||||
return ( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* set_input_file - open the given file (if NULL, stdin) for scanning */
|
||||
|
||||
void set_input_file( file )
|
||||
char *file;
|
||||
|
||||
{
|
||||
if ( file )
|
||||
{
|
||||
infilename = file;
|
||||
yyin = fopen( infilename, "r" );
|
||||
|
||||
if ( yyin == NULL )
|
||||
lerrsf( "can't open %s", file );
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
yyin = stdin;
|
||||
infilename = "<stdin>";
|
||||
}
|
||||
}
|
315
util/flex/sym.c
Normal file
315
util/flex/sym.c
Normal file
|
@ -0,0 +1,315 @@
|
|||
/* sym - symbol table routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
|
||||
/* declare functions that have forward references */
|
||||
|
||||
int hashfunct PROTO((register char[], int));
|
||||
|
||||
|
||||
struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
|
||||
struct hash_entry *sctbl[START_COND_HASH_SIZE];
|
||||
struct hash_entry *ccltab[CCL_HASH_SIZE];
|
||||
|
||||
struct hash_entry *findsym();
|
||||
|
||||
|
||||
/* addsym - add symbol and definitions to symbol table
|
||||
*
|
||||
* synopsis
|
||||
* char sym[], *str_def;
|
||||
* int int_def;
|
||||
* hash_table table;
|
||||
* int table_size;
|
||||
* 0 / -1 = addsym( sym, def, int_def, table, table_size );
|
||||
*
|
||||
* -1 is returned if the symbol already exists, and the change not made.
|
||||
*/
|
||||
|
||||
int addsym( sym, str_def, int_def, table, table_size )
|
||||
register char sym[];
|
||||
char *str_def;
|
||||
int int_def;
|
||||
hash_table table;
|
||||
int table_size;
|
||||
|
||||
{
|
||||
int hash_val = hashfunct( sym, table_size );
|
||||
register struct hash_entry *sym_entry = table[hash_val];
|
||||
register struct hash_entry *new_entry;
|
||||
register struct hash_entry *successor;
|
||||
|
||||
while ( sym_entry )
|
||||
{
|
||||
if ( ! strcmp( sym, sym_entry->name ) )
|
||||
{ /* entry already exists */
|
||||
return ( -1 );
|
||||
}
|
||||
|
||||
sym_entry = sym_entry->next;
|
||||
}
|
||||
|
||||
/* create new entry */
|
||||
new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) );
|
||||
|
||||
if ( new_entry == NULL )
|
||||
flexfatal( "symbol table memory allocation failed" );
|
||||
|
||||
if ( (successor = table[hash_val]) )
|
||||
{
|
||||
new_entry->next = successor;
|
||||
successor->prev = new_entry;
|
||||
}
|
||||
else
|
||||
new_entry->next = NULL;
|
||||
|
||||
new_entry->prev = NULL;
|
||||
new_entry->name = sym;
|
||||
new_entry->str_val = str_def;
|
||||
new_entry->int_val = int_def;
|
||||
|
||||
table[hash_val] = new_entry;
|
||||
|
||||
return ( 0 );
|
||||
}
|
||||
|
||||
|
||||
/* cclinstal - save the text of a character class
|
||||
*
|
||||
* synopsis
|
||||
* Char ccltxt[];
|
||||
* int cclnum;
|
||||
* cclinstal( ccltxt, cclnum );
|
||||
*/
|
||||
|
||||
void cclinstal( ccltxt, cclnum )
|
||||
Char ccltxt[];
|
||||
int cclnum;
|
||||
|
||||
{
|
||||
/* we don't bother checking the return status because we are not called
|
||||
* unless the symbol is new
|
||||
*/
|
||||
Char *copy_unsigned_string();
|
||||
|
||||
(void) addsym( (char *) copy_unsigned_string( ccltxt ), (char *) 0, cclnum,
|
||||
ccltab, CCL_HASH_SIZE );
|
||||
}
|
||||
|
||||
|
||||
/* ccllookup - lookup the number associated with character class text
|
||||
*
|
||||
* synopsis
|
||||
* Char ccltxt[];
|
||||
* int ccllookup, cclval;
|
||||
* cclval/0 = ccllookup( ccltxt );
|
||||
*/
|
||||
|
||||
int ccllookup( ccltxt )
|
||||
Char ccltxt[];
|
||||
|
||||
{
|
||||
return ( findsym( (char *) ccltxt, ccltab, CCL_HASH_SIZE )->int_val );
|
||||
}
|
||||
|
||||
|
||||
/* findsym - find symbol in symbol table
|
||||
*
|
||||
* synopsis
|
||||
* char sym[];
|
||||
* hash_table table;
|
||||
* int table_size;
|
||||
* struct hash_entry *sym_entry, *findsym();
|
||||
* sym_entry = findsym( sym, table, table_size );
|
||||
*/
|
||||
|
||||
struct hash_entry *findsym( sym, table, table_size )
|
||||
register char sym[];
|
||||
hash_table table;
|
||||
int table_size;
|
||||
|
||||
{
|
||||
register struct hash_entry *sym_entry = table[hashfunct( sym, table_size )];
|
||||
static struct hash_entry empty_entry =
|
||||
{
|
||||
(struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0,
|
||||
} ;
|
||||
|
||||
while ( sym_entry )
|
||||
{
|
||||
if ( ! strcmp( sym, sym_entry->name ) )
|
||||
return ( sym_entry );
|
||||
sym_entry = sym_entry->next;
|
||||
}
|
||||
|
||||
return ( &empty_entry );
|
||||
}
|
||||
|
||||
|
||||
/* hashfunct - compute the hash value for "str" and hash size "hash_size"
|
||||
*
|
||||
* synopsis
|
||||
* char str[];
|
||||
* int hash_size, hash_val;
|
||||
* hash_val = hashfunct( str, hash_size );
|
||||
*/
|
||||
|
||||
int hashfunct( str, hash_size )
|
||||
register char str[];
|
||||
int hash_size;
|
||||
|
||||
{
|
||||
register int hashval;
|
||||
register int locstr;
|
||||
|
||||
hashval = 0;
|
||||
locstr = 0;
|
||||
|
||||
while ( str[locstr] )
|
||||
hashval = ((hashval << 1) + str[locstr++]) % hash_size;
|
||||
|
||||
return ( hashval );
|
||||
}
|
||||
|
||||
|
||||
/* ndinstal - install a name definition
|
||||
*
|
||||
* synopsis
|
||||
* char nd[];
|
||||
* Char def[];
|
||||
* ndinstal( nd, def );
|
||||
*/
|
||||
|
||||
void ndinstal( nd, def )
|
||||
char nd[];
|
||||
Char def[];
|
||||
|
||||
{
|
||||
char *copy_string();
|
||||
Char *copy_unsigned_string();
|
||||
|
||||
if ( addsym( copy_string( nd ), (char *) copy_unsigned_string( def ), 0,
|
||||
ndtbl, NAME_TABLE_HASH_SIZE ) )
|
||||
synerr( "name defined twice" );
|
||||
}
|
||||
|
||||
|
||||
/* ndlookup - lookup a name definition
|
||||
*
|
||||
* synopsis
|
||||
* char nd[], *def;
|
||||
* char *ndlookup();
|
||||
* def/NULL = ndlookup( nd );
|
||||
*/
|
||||
|
||||
Char *ndlookup( nd )
|
||||
char nd[];
|
||||
|
||||
{
|
||||
return ( (Char *) findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val );
|
||||
}
|
||||
|
||||
|
||||
/* scinstal - make a start condition
|
||||
*
|
||||
* synopsis
|
||||
* char str[];
|
||||
* int xcluflg;
|
||||
* scinstal( str, xcluflg );
|
||||
*
|
||||
* NOTE
|
||||
* the start condition is Exclusive if xcluflg is true
|
||||
*/
|
||||
|
||||
void scinstal( str, xcluflg )
|
||||
char str[];
|
||||
int xcluflg;
|
||||
|
||||
{
|
||||
char *copy_string();
|
||||
|
||||
/* bit of a hack. We know how the default start-condition is
|
||||
* declared, and don't put out a define for it, because it
|
||||
* would come out as "#define 0 1"
|
||||
*/
|
||||
/* actually, this is no longer the case. The default start-condition
|
||||
* is now called "INITIAL". But we keep the following for the sake
|
||||
* of future robustness.
|
||||
*/
|
||||
|
||||
if ( strcmp( str, "0" ) )
|
||||
printf( "#define %s %d\n", str, lastsc );
|
||||
|
||||
if ( ++lastsc >= current_max_scs )
|
||||
{
|
||||
current_max_scs += MAX_SCS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
scset = reallocate_integer_array( scset, current_max_scs );
|
||||
scbol = reallocate_integer_array( scbol, current_max_scs );
|
||||
scxclu = reallocate_integer_array( scxclu, current_max_scs );
|
||||
sceof = reallocate_integer_array( sceof, current_max_scs );
|
||||
scname = reallocate_char_ptr_array( scname, current_max_scs );
|
||||
actvsc = reallocate_integer_array( actvsc, current_max_scs );
|
||||
}
|
||||
|
||||
scname[lastsc] = copy_string( str );
|
||||
|
||||
if ( addsym( scname[lastsc], (char *) 0, lastsc,
|
||||
sctbl, START_COND_HASH_SIZE ) )
|
||||
format_pinpoint_message( "start condition %s declared twice", str );
|
||||
|
||||
scset[lastsc] = mkstate( SYM_EPSILON );
|
||||
scbol[lastsc] = mkstate( SYM_EPSILON );
|
||||
scxclu[lastsc] = xcluflg;
|
||||
sceof[lastsc] = false;
|
||||
}
|
||||
|
||||
|
||||
/* sclookup - lookup the number associated with a start condition
|
||||
*
|
||||
* synopsis
|
||||
* char str[], scnum;
|
||||
* int sclookup;
|
||||
* scnum/0 = sclookup( str );
|
||||
*/
|
||||
|
||||
int sclookup( str )
|
||||
char str[];
|
||||
|
||||
{
|
||||
return ( findsym( str, sctbl, START_COND_HASH_SIZE )->int_val );
|
||||
}
|
932
util/flex/tblcmp.c
Normal file
932
util/flex/tblcmp.c
Normal file
|
@ -0,0 +1,932 @@
|
|||
/* tblcmp - table compression routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
|
||||
/* declarations for functions that have forward references */
|
||||
|
||||
void mkentry PROTO((register int*, int, int, int, int));
|
||||
void mkprot PROTO((int[], int, int));
|
||||
void mktemplate PROTO((int[], int, int));
|
||||
void mv2front PROTO((int));
|
||||
int tbldiff PROTO((int[], int, int[]));
|
||||
|
||||
|
||||
/* bldtbl - build table entries for dfa state
|
||||
*
|
||||
* synopsis
|
||||
* int state[numecs], statenum, totaltrans, comstate, comfreq;
|
||||
* bldtbl( state, statenum, totaltrans, comstate, comfreq );
|
||||
*
|
||||
* State is the statenum'th dfa state. It is indexed by equivalence class and
|
||||
* gives the number of the state to enter for a given equivalence class.
|
||||
* totaltrans is the total number of transitions out of the state. Comstate
|
||||
* is that state which is the destination of the most transitions out of State.
|
||||
* Comfreq is how many transitions there are out of State to Comstate.
|
||||
*
|
||||
* A note on terminology:
|
||||
* "protos" are transition tables which have a high probability of
|
||||
* either being redundant (a state processed later will have an identical
|
||||
* transition table) or nearly redundant (a state processed later will have
|
||||
* many of the same out-transitions). A "most recently used" queue of
|
||||
* protos is kept around with the hope that most states will find a proto
|
||||
* which is similar enough to be usable, and therefore compacting the
|
||||
* output tables.
|
||||
* "templates" are a special type of proto. If a transition table is
|
||||
* homogeneous or nearly homogeneous (all transitions go to the same
|
||||
* destination) then the odds are good that future states will also go
|
||||
* to the same destination state on basically the same character set.
|
||||
* These homogeneous states are so common when dealing with large rule
|
||||
* sets that they merit special attention. If the transition table were
|
||||
* simply made into a proto, then (typically) each subsequent, similar
|
||||
* state will differ from the proto for two out-transitions. One of these
|
||||
* out-transitions will be that character on which the proto does not go
|
||||
* to the common destination, and one will be that character on which the
|
||||
* state does not go to the common destination. Templates, on the other
|
||||
* hand, go to the common state on EVERY transition character, and therefore
|
||||
* cost only one difference.
|
||||
*/
|
||||
|
||||
void bldtbl( state, statenum, totaltrans, comstate, comfreq )
|
||||
int state[], statenum, totaltrans, comstate, comfreq;
|
||||
|
||||
{
|
||||
int extptr, extrct[2][CSIZE + 1];
|
||||
int mindiff, minprot, i, d;
|
||||
int checkcom;
|
||||
|
||||
/* If extptr is 0 then the first array of extrct holds the result of the
|
||||
* "best difference" to date, which is those transitions which occur in
|
||||
* "state" but not in the proto which, to date, has the fewest differences
|
||||
* between itself and "state". If extptr is 1 then the second array of
|
||||
* extrct hold the best difference. The two arrays are toggled
|
||||
* between so that the best difference to date can be kept around and
|
||||
* also a difference just created by checking against a candidate "best"
|
||||
* proto.
|
||||
*/
|
||||
|
||||
extptr = 0;
|
||||
|
||||
/* if the state has too few out-transitions, don't bother trying to
|
||||
* compact its tables
|
||||
*/
|
||||
|
||||
if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
|
||||
mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
|
||||
|
||||
else
|
||||
{
|
||||
/* checkcom is true if we should only check "state" against
|
||||
* protos which have the same "comstate" value
|
||||
*/
|
||||
|
||||
checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
|
||||
|
||||
minprot = firstprot;
|
||||
mindiff = totaltrans;
|
||||
|
||||
if ( checkcom )
|
||||
{
|
||||
/* find first proto which has the same "comstate" */
|
||||
for ( i = firstprot; i != NIL; i = protnext[i] )
|
||||
if ( protcomst[i] == comstate )
|
||||
{
|
||||
minprot = i;
|
||||
mindiff = tbldiff( state, minprot, extrct[extptr] );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* since we've decided that the most common destination out
|
||||
* of "state" does not occur with a high enough frequency,
|
||||
* we set the "comstate" to zero, assuring that if this state
|
||||
* is entered into the proto list, it will not be considered
|
||||
* a template.
|
||||
*/
|
||||
comstate = 0;
|
||||
|
||||
if ( firstprot != NIL )
|
||||
{
|
||||
minprot = firstprot;
|
||||
mindiff = tbldiff( state, minprot, extrct[extptr] );
|
||||
}
|
||||
}
|
||||
|
||||
/* we now have the first interesting proto in "minprot". If
|
||||
* it matches within the tolerances set for the first proto,
|
||||
* we don't want to bother scanning the rest of the proto list
|
||||
* to see if we have any other reasonable matches.
|
||||
*/
|
||||
|
||||
if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
|
||||
{ /* not a good enough match. Scan the rest of the protos */
|
||||
for ( i = minprot; i != NIL; i = protnext[i] )
|
||||
{
|
||||
d = tbldiff( state, i, extrct[1 - extptr] );
|
||||
if ( d < mindiff )
|
||||
{
|
||||
extptr = 1 - extptr;
|
||||
mindiff = d;
|
||||
minprot = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check if the proto we've decided on as our best bet is close
|
||||
* enough to the state we want to match to be usable
|
||||
*/
|
||||
|
||||
if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
|
||||
{
|
||||
/* no good. If the state is homogeneous enough, we make a
|
||||
* template out of it. Otherwise, we make a proto.
|
||||
*/
|
||||
|
||||
if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE )
|
||||
mktemplate( state, statenum, comstate );
|
||||
|
||||
else
|
||||
{
|
||||
mkprot( state, statenum, comstate );
|
||||
mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{ /* use the proto */
|
||||
mkentry( extrct[extptr], numecs, statenum,
|
||||
prottbl[minprot], mindiff );
|
||||
|
||||
/* if this state was sufficiently different from the proto
|
||||
* we built it from, make it, too, a proto
|
||||
*/
|
||||
|
||||
if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
|
||||
mkprot( state, statenum, comstate );
|
||||
|
||||
/* since mkprot added a new proto to the proto queue, it's possible
|
||||
* that "minprot" is no longer on the proto queue (if it happened
|
||||
* to have been the last entry, it would have been bumped off).
|
||||
* If it's not there, then the new proto took its physical place
|
||||
* (though logically the new proto is at the beginning of the
|
||||
* queue), so in that case the following call will do nothing.
|
||||
*/
|
||||
|
||||
mv2front( minprot );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* cmptmps - compress template table entries
|
||||
*
|
||||
* synopsis
|
||||
* cmptmps();
|
||||
*
|
||||
* template tables are compressed by using the 'template equivalence
|
||||
* classes', which are collections of transition character equivalence
|
||||
* classes which always appear together in templates - really meta-equivalence
|
||||
* classes. until this point, the tables for templates have been stored
|
||||
* up at the top end of the nxt array; they will now be compressed and have
|
||||
* table entries made for them.
|
||||
*/
|
||||
|
||||
void cmptmps()
|
||||
|
||||
{
|
||||
int tmpstorage[CSIZE + 1];
|
||||
register int *tmp = tmpstorage, i, j;
|
||||
int totaltrans, trans;
|
||||
|
||||
peakpairs = numtemps * numecs + tblend;
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
/* create equivalence classes base on data gathered on template
|
||||
* transitions
|
||||
*/
|
||||
|
||||
nummecs = cre8ecs( tecfwd, tecbck, numecs );
|
||||
}
|
||||
|
||||
else
|
||||
nummecs = numecs;
|
||||
|
||||
if ( lastdfa + numtemps + 1 >= current_max_dfas )
|
||||
increase_max_dfas();
|
||||
|
||||
/* loop through each template */
|
||||
|
||||
for ( i = 1; i <= numtemps; ++i )
|
||||
{
|
||||
totaltrans = 0; /* number of non-jam transitions out of this template */
|
||||
|
||||
for ( j = 1; j <= numecs; ++j )
|
||||
{
|
||||
trans = tnxt[numecs * i + j];
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
/* the absolute value of tecbck is the meta-equivalence class
|
||||
* of a given equivalence class, as set up by cre8ecs
|
||||
*/
|
||||
if ( tecbck[j] > 0 )
|
||||
{
|
||||
tmp[tecbck[j]] = trans;
|
||||
|
||||
if ( trans > 0 )
|
||||
++totaltrans;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
tmp[j] = trans;
|
||||
|
||||
if ( trans > 0 )
|
||||
++totaltrans;
|
||||
}
|
||||
}
|
||||
|
||||
/* it is assumed (in a rather subtle way) in the skeleton that
|
||||
* if we're using meta-equivalence classes, the def[] entry for
|
||||
* all templates is the jam template, i.e., templates never default
|
||||
* to other non-jam table entries (e.g., another template)
|
||||
*/
|
||||
|
||||
/* leave room for the jam-state after the last real state */
|
||||
mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ACK_MOD
|
||||
static void bzero(p, cnt)
|
||||
register char *p;
|
||||
register int cnt;
|
||||
{
|
||||
while (cnt-- > 0) *p++ = '\0';
|
||||
}
|
||||
#endif /* ACK_MOD */
|
||||
|
||||
/* expand_nxt_chk - expand the next check arrays */
|
||||
|
||||
void expand_nxt_chk()
|
||||
|
||||
{
|
||||
register int old_max = current_max_xpairs;
|
||||
|
||||
current_max_xpairs += MAX_XPAIRS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
nxt = reallocate_integer_array( nxt, current_max_xpairs );
|
||||
chk = reallocate_integer_array( chk, current_max_xpairs );
|
||||
|
||||
bzero( (char *) (chk + old_max),
|
||||
MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) );
|
||||
}
|
||||
|
||||
|
||||
/* find_table_space - finds a space in the table for a state to be placed
|
||||
*
|
||||
* synopsis
|
||||
* int *state, numtrans, block_start;
|
||||
* int find_table_space();
|
||||
*
|
||||
* block_start = find_table_space( state, numtrans );
|
||||
*
|
||||
* State is the state to be added to the full speed transition table.
|
||||
* Numtrans is the number of out-transitions for the state.
|
||||
*
|
||||
* find_table_space() returns the position of the start of the first block (in
|
||||
* chk) able to accommodate the state
|
||||
*
|
||||
* In determining if a state will or will not fit, find_table_space() must take
|
||||
* into account the fact that an end-of-buffer state will be added at [0],
|
||||
* and an action number will be added in [-1].
|
||||
*/
|
||||
|
||||
int find_table_space( state, numtrans )
|
||||
int *state, numtrans;
|
||||
|
||||
{
|
||||
/* firstfree is the position of the first possible occurrence of two
|
||||
* consecutive unused records in the chk and nxt arrays
|
||||
*/
|
||||
register int i;
|
||||
register int *state_ptr, *chk_ptr;
|
||||
register int *ptr_to_last_entry_in_state;
|
||||
|
||||
/* if there are too many out-transitions, put the state at the end of
|
||||
* nxt and chk
|
||||
*/
|
||||
if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
|
||||
{
|
||||
/* if table is empty, return the first available spot in chk/nxt,
|
||||
* which should be 1
|
||||
*/
|
||||
if ( tblend < 2 )
|
||||
return ( 1 );
|
||||
|
||||
i = tblend - numecs; /* start searching for table space near the
|
||||
* end of chk/nxt arrays
|
||||
*/
|
||||
}
|
||||
|
||||
else
|
||||
i = firstfree; /* start searching for table space from the
|
||||
* beginning (skipping only the elements
|
||||
* which will definitely not hold the new
|
||||
* state)
|
||||
*/
|
||||
|
||||
while ( 1 ) /* loops until a space is found */
|
||||
{
|
||||
if ( i + numecs > current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
/* loops until space for end-of-buffer and action number are found */
|
||||
while ( 1 )
|
||||
{
|
||||
if ( chk[i - 1] == 0 ) /* check for action number space */
|
||||
{
|
||||
if ( chk[i] == 0 ) /* check for end-of-buffer space */
|
||||
break;
|
||||
|
||||
else
|
||||
i += 2; /* since i != 0, there is no use checking to
|
||||
* see if (++i) - 1 == 0, because that's the
|
||||
* same as i == 0, so we skip a space
|
||||
*/
|
||||
}
|
||||
|
||||
else
|
||||
++i;
|
||||
|
||||
if ( i + numecs > current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
}
|
||||
|
||||
/* if we started search from the beginning, store the new firstfree for
|
||||
* the next call of find_table_space()
|
||||
*/
|
||||
if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
|
||||
firstfree = i + 1;
|
||||
|
||||
/* check to see if all elements in chk (and therefore nxt) that are
|
||||
* needed for the new state have not yet been taken
|
||||
*/
|
||||
|
||||
state_ptr = &state[1];
|
||||
ptr_to_last_entry_in_state = &chk[i + numecs + 1];
|
||||
|
||||
for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state;
|
||||
++chk_ptr )
|
||||
if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
|
||||
break;
|
||||
|
||||
if ( chk_ptr == ptr_to_last_entry_in_state )
|
||||
return ( i );
|
||||
|
||||
else
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* inittbl - initialize transition tables
|
||||
*
|
||||
* synopsis
|
||||
* inittbl();
|
||||
*
|
||||
* Initializes "firstfree" to be one beyond the end of the table. Initializes
|
||||
* all "chk" entries to be zero. Note that templates are built in their
|
||||
* own tbase/tdef tables. They are shifted down to be contiguous
|
||||
* with the non-template entries during table generation.
|
||||
*/
|
||||
void inittbl()
|
||||
|
||||
{
|
||||
register int i;
|
||||
|
||||
bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) );
|
||||
|
||||
tblend = 0;
|
||||
firstfree = tblend + 1;
|
||||
numtemps = 0;
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
/* set up doubly-linked meta-equivalence classes
|
||||
* these are sets of equivalence classes which all have identical
|
||||
* transitions out of TEMPLATES
|
||||
*/
|
||||
|
||||
tecbck[1] = NIL;
|
||||
|
||||
for ( i = 2; i <= numecs; ++i )
|
||||
{
|
||||
tecbck[i] = i - 1;
|
||||
tecfwd[i - 1] = i;
|
||||
}
|
||||
|
||||
tecfwd[numecs] = NIL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkdeftbl - make the default, "jam" table entries
|
||||
*
|
||||
* synopsis
|
||||
* mkdeftbl();
|
||||
*/
|
||||
|
||||
void mkdeftbl()
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
jamstate = lastdfa + 1;
|
||||
|
||||
++tblend; /* room for transition on end-of-buffer character */
|
||||
|
||||
if ( tblend + numecs > current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
/* add in default end-of-buffer transition */
|
||||
nxt[tblend] = end_of_buffer_state;
|
||||
chk[tblend] = jamstate;
|
||||
|
||||
for ( i = 1; i <= numecs; ++i )
|
||||
{
|
||||
nxt[tblend + i] = 0;
|
||||
chk[tblend + i] = jamstate;
|
||||
}
|
||||
|
||||
jambase = tblend;
|
||||
|
||||
base[jamstate] = jambase;
|
||||
def[jamstate] = 0;
|
||||
|
||||
tblend += numecs;
|
||||
++numtemps;
|
||||
}
|
||||
|
||||
|
||||
/* mkentry - create base/def and nxt/chk entries for transition array
|
||||
*
|
||||
* synopsis
|
||||
* int state[numchars + 1], numchars, statenum, deflink, totaltrans;
|
||||
* mkentry( state, numchars, statenum, deflink, totaltrans );
|
||||
*
|
||||
* "state" is a transition array "numchars" characters in size, "statenum"
|
||||
* is the offset to be used into the base/def tables, and "deflink" is the
|
||||
* entry to put in the "def" table entry. If "deflink" is equal to
|
||||
* "JAMSTATE", then no attempt will be made to fit zero entries of "state"
|
||||
* (i.e., jam entries) into the table. It is assumed that by linking to
|
||||
* "JAMSTATE" they will be taken care of. In any case, entries in "state"
|
||||
* marking transitions to "SAME_TRANS" are treated as though they will be
|
||||
* taken care of by whereever "deflink" points. "totaltrans" is the total
|
||||
* number of transitions out of the state. If it is below a certain threshold,
|
||||
* the tables are searched for an interior spot that will accommodate the
|
||||
* state array.
|
||||
*/
|
||||
|
||||
void mkentry( state, numchars, statenum, deflink, totaltrans )
|
||||
register int *state;
|
||||
int numchars, statenum, deflink, totaltrans;
|
||||
|
||||
{
|
||||
register int minec, maxec, i, baseaddr;
|
||||
int tblbase, tbllast;
|
||||
|
||||
if ( totaltrans == 0 )
|
||||
{ /* there are no out-transitions */
|
||||
if ( deflink == JAMSTATE )
|
||||
base[statenum] = JAMSTATE;
|
||||
else
|
||||
base[statenum] = 0;
|
||||
|
||||
def[statenum] = deflink;
|
||||
return;
|
||||
}
|
||||
|
||||
for ( minec = 1; minec <= numchars; ++minec )
|
||||
{
|
||||
if ( state[minec] != SAME_TRANS )
|
||||
if ( state[minec] != 0 || deflink != JAMSTATE )
|
||||
break;
|
||||
}
|
||||
|
||||
if ( totaltrans == 1 )
|
||||
{
|
||||
/* there's only one out-transition. Save it for later to fill
|
||||
* in holes in the tables.
|
||||
*/
|
||||
stack1( statenum, minec, state[minec], deflink );
|
||||
return;
|
||||
}
|
||||
|
||||
for ( maxec = numchars; maxec > 0; --maxec )
|
||||
{
|
||||
if ( state[maxec] != SAME_TRANS )
|
||||
if ( state[maxec] != 0 || deflink != JAMSTATE )
|
||||
break;
|
||||
}
|
||||
|
||||
/* Whether we try to fit the state table in the middle of the table
|
||||
* entries we have already generated, or if we just take the state
|
||||
* table at the end of the nxt/chk tables, we must make sure that we
|
||||
* have a valid base address (i.e., non-negative). Note that not only are
|
||||
* negative base addresses dangerous at run-time (because indexing the
|
||||
* next array with one and a low-valued character might generate an
|
||||
* array-out-of-bounds error message), but at compile-time negative
|
||||
* base addresses denote TEMPLATES.
|
||||
*/
|
||||
|
||||
/* find the first transition of state that we need to worry about. */
|
||||
if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
|
||||
{ /* attempt to squeeze it into the middle of the tabls */
|
||||
baseaddr = firstfree;
|
||||
|
||||
while ( baseaddr < minec )
|
||||
{
|
||||
/* using baseaddr would result in a negative base address below
|
||||
* find the next free slot
|
||||
*/
|
||||
for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
|
||||
;
|
||||
}
|
||||
|
||||
if ( baseaddr + maxec - minec >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
for ( i = minec; i <= maxec; ++i )
|
||||
if ( state[i] != SAME_TRANS )
|
||||
if ( state[i] != 0 || deflink != JAMSTATE )
|
||||
if ( chk[baseaddr + i - minec] != 0 )
|
||||
{ /* baseaddr unsuitable - find another */
|
||||
for ( ++baseaddr;
|
||||
baseaddr < current_max_xpairs &&
|
||||
chk[baseaddr] != 0;
|
||||
++baseaddr )
|
||||
;
|
||||
|
||||
if ( baseaddr + maxec - minec >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
/* reset the loop counter so we'll start all
|
||||
* over again next time it's incremented
|
||||
*/
|
||||
|
||||
i = minec - 1;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* ensure that the base address we eventually generate is
|
||||
* non-negative
|
||||
*/
|
||||
baseaddr = max( tblend + 1, minec );
|
||||
}
|
||||
|
||||
tblbase = baseaddr - minec;
|
||||
tbllast = tblbase + maxec;
|
||||
|
||||
if ( tbllast >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
base[statenum] = tblbase;
|
||||
def[statenum] = deflink;
|
||||
|
||||
for ( i = minec; i <= maxec; ++i )
|
||||
if ( state[i] != SAME_TRANS )
|
||||
if ( state[i] != 0 || deflink != JAMSTATE )
|
||||
{
|
||||
nxt[tblbase + i] = state[i];
|
||||
chk[tblbase + i] = statenum;
|
||||
}
|
||||
|
||||
if ( baseaddr == firstfree )
|
||||
/* find next free slot in tables */
|
||||
for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
|
||||
;
|
||||
|
||||
tblend = max( tblend, tbllast );
|
||||
}
|
||||
|
||||
|
||||
/* mk1tbl - create table entries for a state (or state fragment) which
|
||||
* has only one out-transition
|
||||
*
|
||||
* synopsis
|
||||
* int state, sym, onenxt, onedef;
|
||||
* mk1tbl( state, sym, onenxt, onedef );
|
||||
*/
|
||||
|
||||
void mk1tbl( state, sym, onenxt, onedef )
|
||||
int state, sym, onenxt, onedef;
|
||||
|
||||
{
|
||||
if ( firstfree < sym )
|
||||
firstfree = sym;
|
||||
|
||||
while ( chk[firstfree] != 0 )
|
||||
if ( ++firstfree >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
base[state] = firstfree - sym;
|
||||
def[state] = onedef;
|
||||
chk[firstfree] = state;
|
||||
nxt[firstfree] = onenxt;
|
||||
|
||||
if ( firstfree > tblend )
|
||||
{
|
||||
tblend = firstfree++;
|
||||
|
||||
if ( firstfree >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkprot - create new proto entry
|
||||
*
|
||||
* synopsis
|
||||
* int state[], statenum, comstate;
|
||||
* mkprot( state, statenum, comstate );
|
||||
*/
|
||||
|
||||
void mkprot( state, statenum, comstate )
|
||||
int state[], statenum, comstate;
|
||||
|
||||
{
|
||||
int i, slot, tblbase;
|
||||
|
||||
if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
|
||||
{
|
||||
/* gotta make room for the new proto by dropping last entry in
|
||||
* the queue
|
||||
*/
|
||||
slot = lastprot;
|
||||
lastprot = protprev[lastprot];
|
||||
protnext[lastprot] = NIL;
|
||||
}
|
||||
|
||||
else
|
||||
slot = numprots;
|
||||
|
||||
protnext[slot] = firstprot;
|
||||
|
||||
if ( firstprot != NIL )
|
||||
protprev[firstprot] = slot;
|
||||
|
||||
firstprot = slot;
|
||||
prottbl[slot] = statenum;
|
||||
protcomst[slot] = comstate;
|
||||
|
||||
/* copy state into save area so it can be compared with rapidly */
|
||||
tblbase = numecs * (slot - 1);
|
||||
|
||||
for ( i = 1; i <= numecs; ++i )
|
||||
protsave[tblbase + i] = state[i];
|
||||
}
|
||||
|
||||
|
||||
/* mktemplate - create a template entry based on a state, and connect the state
|
||||
* to it
|
||||
*
|
||||
* synopsis
|
||||
* int state[], statenum, comstate, totaltrans;
|
||||
* mktemplate( state, statenum, comstate, totaltrans );
|
||||
*/
|
||||
|
||||
void mktemplate( state, statenum, comstate )
|
||||
int state[], statenum, comstate;
|
||||
|
||||
{
|
||||
int i, numdiff, tmpbase, tmp[CSIZE + 1];
|
||||
Char transset[CSIZE + 1];
|
||||
int tsptr;
|
||||
|
||||
++numtemps;
|
||||
|
||||
tsptr = 0;
|
||||
|
||||
/* calculate where we will temporarily store the transition table
|
||||
* of the template in the tnxt[] array. The final transition table
|
||||
* gets created by cmptmps()
|
||||
*/
|
||||
|
||||
tmpbase = numtemps * numecs;
|
||||
|
||||
if ( tmpbase + numecs >= current_max_template_xpairs )
|
||||
{
|
||||
current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs );
|
||||
}
|
||||
|
||||
for ( i = 1; i <= numecs; ++i )
|
||||
if ( state[i] == 0 )
|
||||
tnxt[tmpbase + i] = 0;
|
||||
else
|
||||
{
|
||||
transset[tsptr++] = i;
|
||||
tnxt[tmpbase + i] = comstate;
|
||||
}
|
||||
|
||||
if ( usemecs )
|
||||
mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
|
||||
|
||||
mkprot( tnxt + tmpbase, -numtemps, comstate );
|
||||
|
||||
/* we rely on the fact that mkprot adds things to the beginning
|
||||
* of the proto queue
|
||||
*/
|
||||
|
||||
numdiff = tbldiff( state, firstprot, tmp );
|
||||
mkentry( tmp, numecs, statenum, -numtemps, numdiff );
|
||||
}
|
||||
|
||||
|
||||
/* mv2front - move proto queue element to front of queue
|
||||
*
|
||||
* synopsis
|
||||
* int qelm;
|
||||
* mv2front( qelm );
|
||||
*/
|
||||
|
||||
void mv2front( qelm )
|
||||
int qelm;
|
||||
|
||||
{
|
||||
if ( firstprot != qelm )
|
||||
{
|
||||
if ( qelm == lastprot )
|
||||
lastprot = protprev[lastprot];
|
||||
|
||||
protnext[protprev[qelm]] = protnext[qelm];
|
||||
|
||||
if ( protnext[qelm] != NIL )
|
||||
protprev[protnext[qelm]] = protprev[qelm];
|
||||
|
||||
protprev[qelm] = NIL;
|
||||
protnext[qelm] = firstprot;
|
||||
protprev[firstprot] = qelm;
|
||||
firstprot = qelm;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* place_state - place a state into full speed transition table
|
||||
*
|
||||
* synopsis
|
||||
* int *state, statenum, transnum;
|
||||
* place_state( state, statenum, transnum );
|
||||
*
|
||||
* State is the statenum'th state. It is indexed by equivalence class and
|
||||
* gives the number of the state to enter for a given equivalence class.
|
||||
* Transnum is the number of out-transitions for the state.
|
||||
*/
|
||||
|
||||
void place_state( state, statenum, transnum )
|
||||
int *state, statenum, transnum;
|
||||
|
||||
{
|
||||
register int i;
|
||||
register int *state_ptr;
|
||||
int position = find_table_space( state, transnum );
|
||||
|
||||
/* base is the table of start positions */
|
||||
base[statenum] = position;
|
||||
|
||||
/* put in action number marker; this non-zero number makes sure that
|
||||
* find_table_space() knows that this position in chk/nxt is taken
|
||||
* and should not be used for another accepting number in another state
|
||||
*/
|
||||
chk[position - 1] = 1;
|
||||
|
||||
/* put in end-of-buffer marker; this is for the same purposes as above */
|
||||
chk[position] = 1;
|
||||
|
||||
/* place the state into chk and nxt */
|
||||
state_ptr = &state[1];
|
||||
|
||||
for ( i = 1; i <= numecs; ++i, ++state_ptr )
|
||||
if ( *state_ptr != 0 )
|
||||
{
|
||||
chk[position + i] = i;
|
||||
nxt[position + i] = *state_ptr;
|
||||
}
|
||||
|
||||
if ( position + numecs > tblend )
|
||||
tblend = position + numecs;
|
||||
}
|
||||
|
||||
|
||||
/* stack1 - save states with only one out-transition to be processed later
|
||||
*
|
||||
* synopsis
|
||||
* int statenum, sym, nextstate, deflink;
|
||||
* stack1( statenum, sym, nextstate, deflink );
|
||||
*
|
||||
* if there's room for another state one the "one-transition" stack, the
|
||||
* state is pushed onto it, to be processed later by mk1tbl. If there's
|
||||
* no room, we process the sucker right now.
|
||||
*/
|
||||
|
||||
void stack1( statenum, sym, nextstate, deflink )
|
||||
int statenum, sym, nextstate, deflink;
|
||||
|
||||
{
|
||||
if ( onesp >= ONE_STACK_SIZE - 1 )
|
||||
mk1tbl( statenum, sym, nextstate, deflink );
|
||||
|
||||
else
|
||||
{
|
||||
++onesp;
|
||||
onestate[onesp] = statenum;
|
||||
onesym[onesp] = sym;
|
||||
onenext[onesp] = nextstate;
|
||||
onedef[onesp] = deflink;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* tbldiff - compute differences between two state tables
|
||||
*
|
||||
* synopsis
|
||||
* int state[], pr, ext[];
|
||||
* int tbldiff, numdifferences;
|
||||
* numdifferences = tbldiff( state, pr, ext )
|
||||
*
|
||||
* "state" is the state array which is to be extracted from the pr'th
|
||||
* proto. "pr" is both the number of the proto we are extracting from
|
||||
* and an index into the save area where we can find the proto's complete
|
||||
* state table. Each entry in "state" which differs from the corresponding
|
||||
* entry of "pr" will appear in "ext".
|
||||
* Entries which are the same in both "state" and "pr" will be marked
|
||||
* as transitions to "SAME_TRANS" in "ext". The total number of differences
|
||||
* between "state" and "pr" is returned as function value. Note that this
|
||||
* number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
|
||||
*/
|
||||
|
||||
int tbldiff( state, pr, ext )
|
||||
int state[], pr, ext[];
|
||||
|
||||
{
|
||||
register int i, *sp = state, *ep = ext, *protp;
|
||||
register int numdiff = 0;
|
||||
|
||||
protp = &protsave[numecs * (pr - 1)];
|
||||
|
||||
for ( i = numecs; i > 0; --i )
|
||||
{
|
||||
if ( *++protp == *++sp )
|
||||
*++ep = SAME_TRANS;
|
||||
else
|
||||
{
|
||||
*++ep = *sp;
|
||||
++numdiff;
|
||||
}
|
||||
}
|
||||
|
||||
return ( numdiff );
|
||||
}
|
216
util/flex/yylex.c
Normal file
216
util/flex/yylex.c
Normal file
|
@ -0,0 +1,216 @@
|
|||
/* yylex - scanner front-end for flex */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] =
|
||||
"@(#) $Header$ (LBL)";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include "flexdef.h"
|
||||
#include "parse.h"
|
||||
|
||||
|
||||
/* ANSI C does not guarantee that isascii() is defined */
|
||||
#ifndef isascii
|
||||
#define isascii(c) ((c) <= 0177)
|
||||
#endif
|
||||
|
||||
|
||||
/* yylex - scan for a regular expression token
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* token = yylex();
|
||||
*
|
||||
* token - return token found
|
||||
*/
|
||||
|
||||
int yylex()
|
||||
|
||||
{
|
||||
int toktype;
|
||||
static int beglin = false;
|
||||
|
||||
if ( eofseen )
|
||||
toktype = EOF;
|
||||
else
|
||||
toktype = flexscan();
|
||||
|
||||
if ( toktype == EOF || toktype == 0 )
|
||||
{
|
||||
eofseen = 1;
|
||||
|
||||
if ( sectnum == 1 )
|
||||
{
|
||||
synerr( "premature EOF" );
|
||||
sectnum = 2;
|
||||
toktype = SECTEND;
|
||||
}
|
||||
|
||||
else if ( sectnum == 2 )
|
||||
{
|
||||
sectnum = 3;
|
||||
toktype = 0;
|
||||
}
|
||||
|
||||
else
|
||||
toktype = 0;
|
||||
}
|
||||
|
||||
if ( trace )
|
||||
{
|
||||
if ( beglin )
|
||||
{
|
||||
fprintf( stderr, "%d\t", num_rules + 1 );
|
||||
beglin = 0;
|
||||
}
|
||||
|
||||
switch ( toktype )
|
||||
{
|
||||
case '<':
|
||||
case '>':
|
||||
case '^':
|
||||
case '$':
|
||||
case '"':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '-':
|
||||
case '/':
|
||||
case '\\':
|
||||
case '?':
|
||||
case '.':
|
||||
case '*':
|
||||
case '+':
|
||||
case ',':
|
||||
(void) putc( toktype, stderr );
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
(void) putc( '\n', stderr );
|
||||
|
||||
if ( sectnum == 2 )
|
||||
beglin = 1;
|
||||
|
||||
break;
|
||||
|
||||
case SCDECL:
|
||||
fputs( "%s", stderr );
|
||||
break;
|
||||
|
||||
case XSCDECL:
|
||||
fputs( "%x", stderr );
|
||||
break;
|
||||
|
||||
case WHITESPACE:
|
||||
(void) putc( ' ', stderr );
|
||||
break;
|
||||
|
||||
case SECTEND:
|
||||
fputs( "%%\n", stderr );
|
||||
|
||||
/* we set beglin to be true so we'll start
|
||||
* writing out numbers as we echo rules. flexscan() has
|
||||
* already assigned sectnum
|
||||
*/
|
||||
|
||||
if ( sectnum == 2 )
|
||||
beglin = 1;
|
||||
|
||||
break;
|
||||
|
||||
case NAME:
|
||||
fprintf( stderr, "'%s'", nmstr );
|
||||
break;
|
||||
|
||||
case CHAR:
|
||||
switch ( yylval )
|
||||
{
|
||||
case '<':
|
||||
case '>':
|
||||
case '^':
|
||||
case '$':
|
||||
case '"':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '-':
|
||||
case '/':
|
||||
case '\\':
|
||||
case '?':
|
||||
case '.':
|
||||
case '*':
|
||||
case '+':
|
||||
case ',':
|
||||
fprintf( stderr, "\\%c", yylval );
|
||||
break;
|
||||
|
||||
default:
|
||||
if ( ! isascii( yylval ) || ! isprint( yylval ) )
|
||||
fprintf( stderr, "\\%.3o", yylval );
|
||||
else
|
||||
(void) putc( yylval, stderr );
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case NUMBER:
|
||||
fprintf( stderr, "%d", yylval );
|
||||
break;
|
||||
|
||||
case PREVCCL:
|
||||
fprintf( stderr, "[%d]", yylval );
|
||||
break;
|
||||
|
||||
case EOF_OP:
|
||||
fprintf( stderr, "<<EOF>>" );
|
||||
break;
|
||||
|
||||
case 0:
|
||||
fprintf( stderr, "End Marker" );
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf( stderr, "*Something Weird* - tok: %d val: %d\n",
|
||||
toktype, yylval );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ( toktype );
|
||||
}
|
Loading…
Reference in a new issue