fixup commit for tag 'dist2'

This commit is contained in:
cvs2hg 1985-02-17 00:57:20 +00:00
parent 9f45dd0650
commit 3f3bddef8f
1707 changed files with 0 additions and 178572 deletions

131
Action
View file

@ -1,131 +0,0 @@
name "System definition"
dir first
action did_first
failure "You have to run the shell script first in the directory first"
fatal
end
name "EM definition"
dir etc
end
name "C preprocessor"
dir util/cpp
end
name "EM definition library"
dir util/data
end
name "Encode/Decode"
dir util/misc
end
name "Shell files in bin"
dir util/shf
end
name "EM assembler"
dir util/ass
end
name "EM Peephole optimizer"
dir util/opt
end
name "ACK archiver"
dir util/arch
end
name "Program 'ack'"
dir util/ack
end
name "Bootstrap for backend tables"
dir util/cgg
end
name "LL(1) Parser generator"
dir util/LLgen
end
name "Bootstrap for newest form of backend tables"
dir util/ncgg
end
name "C frontend"
dir lang/cem/comp
end
name "Basic frontend"
dir lang/basic/src
end
name "Intel 8086 support"
dir mach/i86
indir
end
name "MSC6500 support"
dir mach/6500
indir
end
name "Motorola 6800 support"
dir mach/6800
indir
end
name "Motorola 6805 support"
dir mach/6805
indir
end
name "Motorola 6809 support"
dir mach/6809
indir
end
name "Intel 8080 support"
dir mach/8080
indir
end
name "2-2 Interpreter support"
dir mach/int22
indir
end
name "2-4 Interpreter support"
dir mach/int24
indir
end
name "4-4 Interpreter support"
dir mach/int44
indir
end
name "IBM PC/IX support"
dir mach/ix
indir
end
name "Motorola 68000 2-4 support"
dir mach/m68k2
indir
end
name "NS16032 support"
dir mach/ns
indir
end
name "PDP 11 support"
dir mach/pdp
indir
end
name "PMDS support"
dir mach/pmds
indir
end
name "Signetics 2650 support"
dir mach/s2650
indir
end
name "Vax 2-4 support"
dir mach/vax2
indir
end
name "Vax 4-4 support"
dir mach/vax4
indir
end
name "Z80 support"
dir mach/z80
indir
end
name "Zilog Z8000 support"
dir mach/z8000
indir
end
name "Nascom support"
dir mach/z80a
indir
end
name "Pascal frontend"
dir lang/pc/pem
end

View file

@ -1,17 +0,0 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/

View file

@ -1 +0,0 @@
exec sh TakeAction distr distr/Action

View file

@ -1,35 +0,0 @@
cmp: # compile everything and compare
(cd etc ; make cmp )
(cd util ; make cmp )
(cd lang ; make cmp )
(cd mach ; make cmp )
install: # compile everything to machine code
(cd etc ; make install )
(cd util ; make install )
(cd lang/cem ; make install )
(cd mach ; make install )
(cd lang/pc ; make install )
clean: # remove all non-sources, except boot-files
(cd doc ; make clean )
(cd man ; make clean )
(cd h ; make clean )
(cd etc ; make clean )
(cd util ; make clean )
(cd lang ; make clean )
(cd mach ; make clean )
opr: # print all sources
make pr | opr
pr: # print all sources
@( pr Makefile ; \
(cd doc ; make pr ) ; \
(cd man ; make pr ) ; \
(cd h ; make pr ) ; \
(cd etc ; make pr ) ; \
(cd lang ; make pr ) ; \
(cd util ; make pr ) ; \
(cd mach ; make pr ) \
)

2
README
View file

@ -1,2 +0,0 @@
Before starting installation you should read
the file doc/install.pr

View file

@ -1,102 +0,0 @@
case $# in
0) PAR=install ; CMD=Action ;;
1) PAR="$1" ; CMD=Action ;;
2) PAR="$1" ; CMD="$2" ;;
*) echo Syntax: "$0" [param [file]] ; exit 1 ;;
esac
if test -r "$CMD"
then :
else
case "$CMD" in
Action) echo No Action file present ;;
*) echo No Action file "($CMD)" present ;;
esac
fi
THISFILE=`pwd`/$0
SYS=
RETC=0
{ while read LINE
do
eval set $LINE
case x"$1" in
x#*) ;;
xname) SYS="$2"
ACTION='make $PAR'
DIR=.
FAIL='Failed for $SYS, see $DIR/Out'
SUCC='$SYS -- done'
ATYPE=
FATAL=no
DOIT=yes
;;
xfatal) FATAL=yes ;;
xaction|xindir) case x$ATYPE in
x) ACTION=$2 ; ATYPE=$1 ;;
*) echo Already specified an $ATYPE for this name
RETC=-1 ;;
esac ;;
xfailure) FAIL="$2" ;;
xsuccess) SUCC="$2" ;;
xdir) DIR="$2" ;;
xsystem) case `ack_sys` in
$2) ;;
*) echo "Sorry, $SYS can only be made on $2 systems"
DOIT=no
;;
esac ;;
xend) case $DOIT in
no) continue ;;
esac
case x$SYS in
x) echo Missing name line; RETC=-1 ;;
*) if test -d $DIR
then (
cd $DIR
X=
case $ATYPE in
indir)
if sh $THISFILE $PAR $ACTION
then eval echo $SUCC
else RETC=2 ; eval echo $FAIL
fi ;;
*)
if eval "$ACTION >Out 2>&1 </dev/null"
then eval echo $SUCC
else RETC=1 ; X=: ; eval echo $FAIL
fi
;;
esac
(echo ------- `pwd`
cat Out
$X rm -f Out
) 2>/dev/null 1>&- 1>&3
exit $RETC
)
case $? in
0) ;;
*) case $RETC in
0) RETC=$? ;;
esac ;;
esac
else
echo Directory $DIR for $SYS is inaccessible
RETC=-2
fi ;;
esac
case $FATAL$RETC in
yes0) ;;
yes*) echo Fatal error, installation stopped.
exit $RETC ;;
esac
SYS=
;;
*) echo Unknown keyword "$1"
RETC=-3 ;;
esac
done
} <$CMD
RETX=$?
case $RETX in
0) exit $RETC ;;
*) exit $RETX ;;
esac

View file

@ -1,26 +0,0 @@
REV=
FILE=
while :
do
case $# in
0) break ;;
esac
ARG="$1"
shift
case "$ARG" in
-r*) REV=`echo "$ARG"| sed s/-r//` ;;
-*) FLAGS="$FLAGS $ARG" ;;
*) case x$FILE in
x) FILE="$ARG" ;;
*) echo todistr can only be done on one file at the time
exit 1 ;;
esac
esac
done
case x$REV in
x) REV=`rlog -h "$FILE"|sed -n -e '/head/s/^head:[ ]*//p'` ;;
esac
case x$REV in
x) exit 2 ;;
esac
rcs -ndistr2:$REV $FLAGS $FILE

File diff suppressed because it is too large Load diff

View file

@ -1,53 +0,0 @@
# $Header$
SUF=pr
PRINT=cat
RESFILES=cref.$(SUF) pcref.$(SUF) val.$(SUF) v7bugs.$(SUF) install.$(SUF)\
ack.$(SUF) cg.$(SUF) regadd.$(SUF) peep.$(SUF) toolkit.$(SUF) LLgen.$(SUF)\
basic.$(SUF) 6500.$(SUF) ncg.$(SUF)
NROFF=nroff
MS=-ms
cref.$(SUF): cref.doc
tbl $? | $(NROFF) >$@
v7bugs.$(SUF): v7bugs.doc
$(NROFF) $(MS) $? >$@
ack.$(SUF): ack.doc
$(NROFF) $(MS) $? >$@
cg.$(SUF): cg.doc
$(NROFF) $(MS) $? >$@
ncg.$(SUF): ncg.doc
$(NROFF) $(MS) $? >$@
regadd.$(SUF): regadd.doc
$(NROFF) $(MS) $? >$@
install.$(SUF): install.doc
$(NROFF) $(MS) $? >$@
pcref.$(SUF): pcref.doc
$(NROFF) $? >$@
basic.$(SUF): basic.doc
$(NROFF) $(MS) $? >$@
peep.$(SUF): peep.doc
$(NROFF) $(MS) $? >$@
val.$(SUF): val.doc
$(NROFF) $? >$@
toolkit.$(SUF): toolkit.doc
$(NROFF) $(MS) $? >$@
LLgen.$(SUF): LLgen.doc
eqn $? | $(NROFF) $(MS) >$@
6500.$(SUF): 6500.doc
$(NROFF) $(MS) $? >$@
install cmp:
distr: install.doc
nroff -Tlp install.doc >install.pr
pr:
@make "SUF="$SUF "NROFF="$NROFF "PRINT="$PRINT $(RESFILES) \
>make.pr.out 2>&1
@$(PRINT) $(RESFILES)
opr:
make pr | opr
clean:
-rm -f *.old $(RESFILES) *.t

View file

@ -1,420 +0,0 @@
.\" $Header$
.nr LL 7.5i
.tr ~
.nr PD 1v
.TL
Ack Description File
.br
Reference Manual
.AU
Ed Keizer
.AI
Wiskundig Seminarium
Vrije Universiteit
Amsterdam
.NH
Introduction
.PP
The program \fIack\fP(I) internally maintains a table of
possible transformations and a table of string variables.
The transformation table contains one entry for each possible
transformation of a file.
Which transformations are used depends on the suffix of the
source file.
Each transformation table entry tells which input suffixes are
allowed and what suffix/name the output file has.
When the output file does not already satisfy the request of the
user, with the flag \fB-c.suffix\fP, the table is scanned
starting with the next transformation in the table for another
transformation that has as input suffix the output suffix of
the previous transformation.
A few special transformations are recognized, among them is the
combiner.
A program combining several files into one.
When no stop suffix was specified (flag \fB-c.suffix\fP) \fIack\fP
stops after executing the combiner with as arguments the -
possibly transformed - input files and libraries.
\fIAck\fP will only perform the transformations in the order in
which they are presented in the table.
.LP
The string variables are used while creating the argument list
and program call name for
a particular transformation.
.NH
Which descriptions are used
.PP
\fIAck\fP always uses two description files: one to define the
front-end transformations and one for the machine dependent
back-end transformations.
Each description has a name.
First the way of determining
the name of the descriptions needed is described.
.PP
When the shell environment variable ACKFE is set \fIack\fP uses
that to determine the front-end table name, otherwise it uses
\fBfe\fP.
.PP
The way the backend table name is determined is more
convoluted.
.br
First, when the last filename in the program call name is not
one of \fIack\fP, \fIcc\fP, \fIacc\fP, \fIpc\fP or \fIapc\fP,
this filename is used as the backend description name.
Second, when the \fB-m\fP is present the \fB-m\fP is chopped of this
flag and the rest is used as the backend description name.
Third, when both failed the shell environment variable ACKM is
used.
Last, when also ACKM was not present the default backend is
used, determined by the definition of ACKM in h/local.h.
The presence and value of the definition of ACKM is
determined at compile time of \fIack\fP.
.PP
Now, we have the names, but that is only the first step.
\fIAck\fP stores a few descriptions at compile time.
This descriptions are simply files read in at compile time.
At the moment of writing this document, the descriptions
included are: pdp, fe, i86, m68k2, vax2 and int.
The name of a description is first searched for internally,
then in the directory lib/ack and finally in the current
directory of the user.
.NH
Using the description file
.PP
Before starting on a narrative of the description file,
the introduction of a few terms is necessary.
All these terms are used to describe the scanning of zero
terminated strings, thereby producing another string or
sequence of strings.
.IP Backslashing 5
.br
All characters preceded by \e are modified to prevent
recognition at further scanning.
This modification is undone before a string is passed to the
outside world as argument or message.
When reading the description files the
sequences \e\e, \e# and \e<newline> have a special meaning.
\e\e translates to a single \e, \e# translates to a single #
that is not
recognized as the start of comment, but can be used in
recognition and finally, \e<newline> translates to nothing at
all, thereby allowing continuation lines.
.nr PD 0
.IP "Variable replacement"
.br
The scan recognizes the sequences {{, {NAME} and {NAME?text}
Where NAME can be any combination if characters excluding ? and
} and text may be anything excluding }.
(~\e} is allowed of course~)
The first sequence produces an unescaped single {.
The second produces the contents of the NAME, definitions are
done by \fIack\fP and in description files.
When the NAME is not defined an error message is produced on
the diagnostic output.
The last sequence produces the contents of NAME if it is
defined and text otherwise.
.PP
.IP "Expression replacement"
.br
Syntax: (\fIsuffix sequence\fP:\fIsuffix sequence\fP=\fItext\fP)
.br
Example: (.c.p.e:.e=tail_em)
.br
If the two suffix sequences have a common member -~\&.e in this
case~- the text is produced.
When no common member is present the empty string is produced.
Thus the example given is a constant expression.
Normally, one of the suffix sequences is produced by variable
replacement.
\fIAck\fP sets three variables while performing the diverse
transformations: HEAD, TAIL and RTS.
All three variables depend on the properties \fIrts\fP and
\fIneed\fP from the transformations used.
Whenever a transformation is used for the first time,
the text following the \fIneed\fP is appended to both the HEAD and
TAIL variable.
The value of the variable RTS is determined by the first
transformation used with a \fIrts\fP property.
.LP
Two runtime flags have effect on the value of one or more of
these variables.
The flag \fB-.suffix\fP has the same effect on these three variables
as if a file with that \fBsuffix\fP was included in the argument list
and had to be translated.
The flag \fB-r.suffix\fP only has that effect on the TAIL
variable.
The program call names \fIacc\fP and \fIcc\fP have the effect
of an automatic \fB-.c\fB flag.
\fIApc\fP and \fIpc\fP have the effect of an automatic \fB-.p\fP flag.
.IP "Line splitting"
.br
The string is transformed into a sequence of strings by replacing
the blank space by string separators (nulls).
.IP "IO replacement"
.br
The > in the string is replaced by the output file name.
The < in the string is replaced by the input file name.
When multiple input files are present the string is duplicated
for each input file name.
.nr PD 1v
.LP
Each description is a sequence of variable definitions followed
by a sequence of transformation definitions.
Variable definitions use a line each, transformations
definitions consist of a sequence of lines.
Empty lines are discarded, as are lines with nothing but
comment.
Comment is started by a # character, and continues to the end
of the line.
Three special two-characters sequences exist: \e#, \e\e and
\e<newline>.
Their effect is described under 'backslashing' above.
Each - nonempty - line starts with a keyword, possibly
preceded by blank space.
The keyword can be followed by a further specification.
The two are separated by blank space.
.PP
Variable definitions use the keyword \fIvar\fP and look like this:
.DS X
var NAME=text
.DE
The name can be any identifier, the text may contain any
character.
Blank space before the equal sign is not part of the NAME.
Blank space after the equal is considered as part of the text.
The text is scanned for variable replacement before it is
associated with the variable name.
.br
.sp 2
The start of a transformation definition is indicated by the
keyword \fIname\fP.
The last line of such a definition contains the keyword
\fIend\fP.
The lines in between associate properties to a transformation
and may be presented in any order.
The identifier after the \fIname\fP keyword determines the name
of the transformation.
This name is used for debugging and by the \fB-R\fP flag.
The keywords are used to specify which input suffices are
recognized by that transformation,
the program to run, the arguments to be handed to that program
and the name or suffix of the resulting output file.
Two keywords are used to indicate which run-time startoffs and
libraries are needed.
The possible keywords are:
.IP \fIfrom\fP
.br
followed by a sequence of suffices.
Each file with one of these suffices is allowed as input file.
Preprocessor transformations, those with the \fBP\fP property
after the \fIprop\fP keyword, do not need the \fIfrom\fP
keyword. All other transformations do.
.nr PD 0
.IP \fIto\fP
.br
followed by the suffix of the output file name or in the case of a
linker -~indicated by C option after the \fIprop\fP keyword~-
the output file name.
.IP \fIprogram\fP
.br
followed by name of the load file of the program, a pathname most likely
starts with either a / or {EM}.
This keyword must be
present, the remainder of the line
is subject to backslashing and variable replacement.
.IP \fImapflag\fP
.br
The mapflags are used to grab flags given to \fIack\fP and
pass them on to a specific transformation.
This feature uses a few simple pattern matching and replacement
facilities.
Multiple occurences of this keyword are allowed.
This text following the keyword is
subjected to backslashing.
The keyword is followed by a match expression and a variable
assignment separated by blank space.
As soon as both description files are read, \fIack\fP looks
at all transformations in these files to find a match for the
flags given to \fIack\fP.
The flags \fB-m\fP, \fB-o\fP,
\fI-O\fP, \fB-r\fP, \fB-v\fP, \fB-g\fP, -\fB-c\fP, \fB-t\fP,
\fB-k\fP, \fB-R\fP and -\f-.\fP are specific to \fIack\fP and
not handed down to any transformation.
The matching is performed in the order in which the entries
appear in the definition.
The scanning stops after first match is found.
When a match is found, the variable assignment is executed.
A * in the match expression matches any sequence of characters,
a * in the right hand part of the assignment is
replaced by the characters matched by
the * in the expression.
The right hand part is also subject to variable replacement.
The variable will probably be used in the program arguments.
The \fB-l\fP flags are special,
the order in which they are presented to \fIack\fP must be
preserved.
The identifier LNAME is used in conjunction with the scanning of
\fB-l\fP flags.
The value assigned to LNAME is used to replace the flag.
The example further on shows the use all this.
.IP \fIargs\fP
.br
The keyword is followed by the program call arguments.
It is subject to backslashing, variable replacement, expression
replacement, line splitting and IO replacement.
The variables assigned to by \fImapflags\P will probably be
used here.
The flags not recognized by \fIack\fP or any of the transformations
are passed to the linker and inserted before all other arguments.
.IP \fIprop\fB
.br
This -~optional~- keyword is followed by a sequence of options,
each option is indicated by one character
signifying a special property of the transformation.
The possible options are:
.DS X
< the input file will be read from standard input
> the output file will be written on standard output
p the input files must be preprocessed
m the input files must be preprocessed when starting with #
O this transformation is an optimizer and may be skipped
P this transformation is the preprocessor
C this transformation is the linker
.DE
.IP \fIrts\fP
.br
This -~optional~- keyword indicates that the rest of the line must be
used to set the variable RTS, if it was not already set.
Thus the variable RTS is set by the first transformation
executed which such a property or as a result from \fIack\fP's program
call name (acc, cc, apc or pc) or by the \fB-.suffix\fP flag.
.IP \fIneed\fP
.br
This -~optional~- keyword indicates that the rest of the line must be
concatenated to the NEEDS variable.
This is done once for every transformation used or indicated
by one of the program call names mentioned above or indicated
by the \fB-.suffix\fP flag.
.br
.nr PD 1v
.NH
Conventions used in description files
.PP
\fIAck\fP reads two description files.
A few of the variables defined in the machine specific file
are used by the descriptions of the front-ends.
Other variables, set by \fack\fB, are of use to all
transformations.
.PP
\fIAck\fP sets the variable EM to the home directory of the
Amsterdam Compiler Kit.
The variable SOURCE is set to the name of the argument that is currently
being massaged, this is usefull for debugging.
.br
The variable M indicates the
directory in mach/{M}/lib/tail_..... and NAME is the string to
be defined by the preprocessor with -D{NAME}.
The definitions of {w}, {s}, {l}, {d}, {f} and {p} indicate
EM_WSIZE, EM_SSIZE, EM_LSIZE, EM_DSIZE, EM_FSIZE and EM_PSIZE
respectively.
.br
The variable INCLUDES is used as the last argument to \fIcpp\fP,
it is currently used to add the directory {EM}/include to
the list of directories containing #include files.
{EM}/include contains a few files used by the library routines
for part III from the
.UX
manual.
These routines are included in the kit.
.PP
The variables HEAD, TAIL and RTS are set by \fIack\fP and used
to compose the arguments for the linker.
.NH
Example
.sp 1
description for front-end
.DS X
name cpp # the C-preprocessor
# no from, it's governed by the P property
to .i # result files have suffix i
program {EM}/lib/cpp # pathname of loadfile
mapflag -I* CPP_F={CPP_F?} -I* # grab -I.. -U.. and
mapflag -U* CPP_F={CPP_F?} -U* # -D.. to use as arguments
mapflag -D* CPP_F={CPP_F?} -D* # in the variable CPP_F
args {CPP_F?} {INCLUDES?} -D{NAME} -DEM_WSIZE={w} -DEM_PSIZE={p} \
-DEM_SSIZE={s} -DEM_LSIZE={l} -DEM_FSIZE={f} -DEM_DSIZE={d} <
# The arguments are: first the -[IUD]...
# then the include dir's for this machine
# then the NAME and size valeus finally
# followed by the input file name
prop >P # Output on stdout, is preprocessor
end
name cem # the C-compiler proper
from .c # used for files with suffix .c
to .k # produces compact code files
program {EM}/lib/em_cem # pathname of loadfile
mapflag -p CEM_F={CEM_F?} -Xp # pass -p as -Xp to cem
mapflag -L CEM_F={CEM_F?} -l # pass -L as -l to cem
args -Vw{w}i{w}p{p}f{f}s{s}l{l}d{d} {CEM_F?}
# the arguments are the object sizes in
# the -V... flag and possibly -l and -Xp
prop <>p # input on stdin, output on stdout, use cpp
rts .c # use the C run-time system
need .c # use the C libraries
end
name decode # make human readable files from compact code
from .k.m # accept files with suffix .k or .m
to .e # produce .e files
program {EM}/lib/em_decode # pathname of loadfile
args < # the input file name is the only argument
prop > # the output comes on stdout
end
.DE
.DS X
Example of a backend, in this case the EM assembler/loader.
var w=2 # wordsize 2
var p=2 # pointersize 2
var s=2 # short size 2
var l=4 # long size 4
var f=4 # float size 4
var d=8 # double size 8
var M=int # Unused in this example
var NAME=int22 # for cpp (NAME=int results in #define int 1)
var LIB=mach/int/lib/tail_ # part of file name for libraries
var RT=mach/int/lib/head_ # part of file name for run-time startoff
var SIZE_FLAG=-sm # default internal table size flag
var INCLUDES=-I{EM}/include # use {EM}/include for #include files
name asld # Assembler/loader
from .k.m.a # accepts compact code and archives
to e.out # output file name
program {EM}/lib/em_ass # load file pathname
mapflag -l* LNAME={EM}/{LIB}* # e.g. -ly becomes
# {EM}/mach/int/lib/tail_y
mapflag -+* ASS_F={ASS_F?} -+* # recognize -+ and --
mapflag --* ASS_F={ASS_F?} --*
mapflag -s* SIZE_FLAG=-s* # overwrite old value of SIZE_FLAG
args {SIZE_FLAG} \
({RTS}:.c={EM}/{RT}cc) ({RTS}:.p={EM}/{RT}pc) -o > < \
(.p:{TAIL}={EM}/{LIB}pc) \
(.c:{TAIL}={EM}/{LIB}cc.1s {EM}/{LIB}cc.2g) \
(.c.p:{TAIL}={EM}/{LIB}mon)
# -s[sml] must be first argument
# the next line contains the choice for head_cc or head_pc
# and the specification of in- and output.
# the last three args lines choose libraries
prop C # This is the final stage
end
.DE
The command "ack -mint -v -v -I../h -L -ly prog.c"
would result in the following
calls (with exec(II)):
.DS X
1) /lib/cpp -I../h -I/usr/em/include -Dint22 -DEM_WSIZE=2 -DEM_PSIZE=2
-DEM_SSIZE=2 -DEM_LSIZE=4 -DEM_FSIZE=4 -DEM_DSIZE=8 prog.c
2) /usr/em/lib/em_cem -Vw2i2p2f4s2l4d8 -l
3) /usr/em/lib/em_ass -sm /usr/em/mach/int/lib/head_cc -o e.out prog.k
/usr/em/mach/int/lib/tail_y /usr/em/mach/int/lib/tail_cc.1s
/usr/em/mach/int/lib/tail_cc.2g /usr/em/mach/int/lib/tail_mon
.DE

View file

@ -1,854 +0,0 @@
.\" $Header$
.TL
.de Sy
.LP
.IP \fBsyntax\fR 10
..
.de PU
.IP \fBpurpose\fR 10
..
.de RM
.IP \fBremarks\fR 10
..
The ABC compiler
.AU
Martin L. Kersten
.AI
Department of Mathematics and Computer Science.
.br
Vrije Universiteit
.AB
This manual describes the
programming language BASIC and its compiler
included in the Amsterdam Compiler Kit.
.AE
.SH
INTRODUCTION.
.LP
The BASIC-EM compiler is an extensive implementation of the
programming language BASIC.
The language structure and semantics are modelled after the
BASIC interpreter/compiler of Microsoft (tr), a detailed comparison
is provided in appendix A.
.LP
The compiler generates code for a virtual machine, the EM machine
[[ACM, etc]]
Using EM as an intermediate machine results in a highly portable
compiler and BASIC code.
The drawback of EM is that it does not directly reflect one particular
hardware design, which means that many of
the low level operations available within
BASIC are ill-defined or even inapplicable.
To mention a few, the peek and poke instructions are likely
to be behave errorneous, while line printer and tapedeck
primitives are unknown.
.LP
This manual is divided into three chapters.
The first chapter discusses the general language syntax and semantics.
Chapter two describes the statements available in BASIC-EM.
Chapter 3 describes the predefined functions,
ordered alphabetically.
Appendix A discusses the differences with
Microsoft BASIC. Appendix B describes all reserved symbols.
Appendix C lists the error messages in use.
.sp
Additional information about EM and the Amsterdam Compiler Kit
can be obtained from .... and found in ......
.SH
SyNTAX NOTATION
.LP
The conventions for syntax presentation are as follows:
.IP CAPS 10
Items are reserved words, must be input as shown
.IP <> 10
Items in lowercase letters enclosed in angular brackets
are to be supplied by the user.
.IP [] 10
Items are optional.
.IP \.\.\. 10
Items may be repeated any number of times
.IP {} 10
A choice between two or more alternatives. At least one of the entries
must be chosen.
.IP | 10
Vertical bars separate the choices within braces.
.LP
All punctuation must be included where shown.
.NH 1
GENERAL INFORMATION
.LP
The BASIC-EM compiler is designed for a UNIX based environment.
It accepts a text file with your BASIC program (suffix .b) and generates
an executable file, called a.out.
.LP
Should we call the preprocessor first?
.NH 2
LINE FORMAT
.LP
A BASIC program consists of a series of lines, starting with a
positive line number in the range 0 to 65529.
A line may consists of more then one physical line on your terminal, but must
is limited to 1024 characters.
Multiple BASIC statements may be placed on a single line, provided
they are separated by a colon (:).
.NH 2
CONSTANTS
.LP
The BASIC compiler character set is comprised of alphabetic
characters, numeric characters, and special characters shown below.
.DS
= + - * / ^ ( ) % # $ \\ _
! [ ] , . ; : & ' ? > < \\ (blanc)
.DE
.LP
BASIC uses two different types of constants during processing:
numeric and string constants.
.br
A string constant is a sequence of characters taken from the ASCII
character set enclosed by double quotation marks.
.br
Numeric constants are positive or negative numbers, grouped into
five different classes.
.IP "a) integer constants" 25
Whole numbers in the range of -32768 and 32767. Integer constants do
not contain decimal points.
.IP "b) fixed point constants" 25
Positive or negative real numbers, i.e. numbers with a decimal point.
.IP "c) floating point constants" 25
Real numbers in scientific notation. A floating point constant
consists of an optional signed integer or fixed point number
followed by the letter E (or D) and an optional signed integer
(the exponent).
The allowable range of floating point constants is 10^-38 to 10^+38.
.IP "d) Hex constants" 25
Hexadecimal numbers, denoted by the prefix &H.
.IP "d) Octal constants" 25
Octal numbers, denoted by the prefix &O.
.NH 2
VARIABLES
.LP
Variables are names used to represent values in a BASIC program.
A variable is assigned a value by assigment specified in the program.
Before a variable is assigned its value is assumed to be zero.
.br
Variable names are composed of letters, digits or the decimal point,
starting with a letter. Up to 40 characters are significant.
A variable name be be followed by any of the following type
declaration characters:
.IP % 5
Defines an integer variable
.IP ! 5
Defines a single precision variable (see below)
.IP # 5
Defines a double precision variable
.IP $ 5
Defines a string variable.
.LP
NOTE: Two variables with the same name but different type is
considered illegal (DONE?).
.LP
Beside single valued variables, values may be grouped
into tables or arrays.
Each element in an array is referenced by the array name and an index,
such a variable is called a subscripted variable.
An array has as many subscripts as there are dimensions in the array,
the maximum of which is 11.
.br
If a variable starts with FN it is assumed to be a call to a user defined
function.
.br
A variable name may not be a reserved word nor the name
of a predefined function.
A list of all reserved identifiers is included as Appendix ?.
.NH 2
EXPRESSIONS
.LP
BASIC-EM differs from Microsoft BASIC in supporting floats in one precision
only (due to EM).
All floating point constants have the same precision, i.e. 16 digits.
.LP
When necessary the compiler will convert a numeric value from
one type to another.
A value is always converted to the precision of the variable it is assigned
to.
When a floating point value is converted to an integer the fractional
portion is rounded.
In an expression all values are converted to the same degree of precision,
i.e. that of the most precise operand.
.br
Division by zero results in the message "Division by zero".
If overflow (or underflow) occurs, the "Overflow (underflow)" message is
displayed and execution is terminated (contrary to Microsoft).
.SH
Arithmetic
.LP
The arithmetic operators in order of precedence,a re:
.DS L
\^ Exponentiation
- Negation
*,/,\\,MOD Multiplication, Division, Remainder
+,- Addition, Substraction
.DE
The operator \\\\ denotes integer division, its operands are rounded to
integers before the operator is applied.
Modulus arithmetic is denoted by the operator MOD, which yields the
integer value that is the remainder of an integer division.
.br
The order in which operators are performed can be changec with parentheses.
.SH
Relational
.LP
The relational operators in order of precedence, are:
.DS
= Equality
<> Inequality
< Less than
> Greater than
<= Less than or equal to
>= Greater than or equal to
.DE
The relational operators are used to compare two values and returns
either "true" (-1) or "false" (0) (See IF statement).
The precedence of the relational operators is lower
then the arithmetic operators.
.SH
Logical
.LP
The logical operators performs tests on multiple relations, bit manipulations,
or Boolean operations.
The logical operators returns a bitwise result ("true" or "false").
In an expression, logical operators are performed after the relational and
arithmetic operators.
The logical operators work by converting their operands to signed
two-complement integers in the range -32768 to 32767.
.DS
NOT Bitwise negation
AND Bitwise and
OR Bitwise or
XOR Bitwise exclusive or
EQV Bitwise equivalence
IMP Bitwise implies
.DE
.SH
Functional
.LP
A function is used in an expression to call a system or user defined
function.
A list of predefined functions is presented in chapter 3.
.SH
String operations
.LP
Strings can be concatenated by using +. Strings can be compared with
the relational operators. String comparison is performed in lexicographic
order.
.NH 2
ERROR MESSAGES
.LP
The occurence of an error results in termination of the program
unless an ON....ERROR statement has been encountered.
.NH 1
B-EM STATEMENTS
.LP
This chapter describes the statements available within the BASIC-EM
compiler. Each description is formatted as follows:
.Sy
Shows the correct syntax for the statement. See introduction of
syntax notation above.
.PU
Describes the purpose and details of the instructions.
.RM
Describes special cases, deviation from Microsoft BASIC etc.
.LP
.NH 2
CALL
.Sy
CALL <variable name>[(<argument list>)]
.PU
The CALL statement provides the means to execute procedures
and functions written in another language included in the
Amsterdam Compiler Kit.
The argument list consist of (subscripted) variables.
The BASIC compiler pushes the address of the arguments on the stack in order
of encounter.
.RM
Not yet available
.NH 2
CLOSE
.Sy
CLOSE [[#]<file number>[,[#]<file number...>]]
.PU
To terminate I/O on a disk file.
<file number> is the number associated with the file
when it was OPENed (See OPEN). Ommission of parameters results in closing
all files.
.sp
The END statement and STOP statement always issue a CLOSE of
all files.
.NH 2
DATA
.Sy
DATA <list of constants>
.PU
DATA statements are used to construct a data bank of values that are
accessed by the program's READ statement.
DATA statements are non-executable,
the data items are assembled in a data file by the BASIC compiler.
This file can be replaced, provided the layout remains
the same (otherwise the RESTORE won't function properly).
.sp
The list of data items consists of numeric and string constants
as discussed in section 1.
Moreover, string constants starting with a letter and not
containing blancs, newlines, commas, colon need not be enclosed with
the string quotes.
.sp
DATA statements can be reread using the RESTORE statement.
.NH 2
DEF FN
.Sy
DEF FN<name> [(<parameterlist>)]=<expression>
.PU
To define and name a function that is written by the user.
<name> must be an identifier and should be preceded by FN,
which is considered integral part of the function name.
<expression> defines the expression to be evaluated upon function call.
.sp
The parameter list is comprised of a comma separated
list of variable names, used within the function definition,
that are to replaced by values upon function call.
The variable names defined in the parameterlist, called formal
parameters, do not affect the definition and use of variables
defined with the same name in the rest of the BASIC program.
.sp
A type declaration character may be suffixed to the function name to
designate the data type of the function result.
.NH 2
DEFINT/SNG/DBL/STR
.Sy
DEF<type> <range of letters>
.PU
Any undefined variable starting with the letter included in the range of
letters is declared of type <type> unless a type declaration character
is appended.
The range of letters is a comma separated list of characters and
character ranges (<letter>-<letter>).
.NH 2
DIM
.Sy
DIM <list of subscripted variable>
.PU
The DIM statement allocates storage for subscripted variables.
If an undefined subscripted variable is used
the maximum value of the array subscript(s) is assumed to be 10.
A subscript out of range is signalled by the program (when RCK works)
The minimum subscript value is 0, unless the OPTION BASE statement has been
encountered.
.sp
All variables in a subscripted variable are initially zero.
.sp
BUG. Multi-dimensional arrays MUST be defined.
.NH 2
END
.Sy
END
.PU
END terminates a BASIC program and returns to the UNIX shell.
An END statement at the end of the BASIC program is optional.
.NH 2
ERR and ERL
.PU
Whenever an error occurs the variable ERR contains the
error number and ERL the BASIC line where the error occurred.
The variables are usually used in error handling routines
provided by the user.
.NH 2
ERROR
.Sy
ERROR <integer expression>
.PU
To simulate the occurrence of a BASIC error.
To define your own error code use a value not already in
use by the BASIC runtime system.
The list of error messages currently in use
can be found in appendix B.
.NH 2
FIELD
.PU
To be implemented.
.NH 2
FOR...NEXT
.Sy
FOR <variable>= <low>TO<high>[STEP<size>]
.br
......
.br
NEXT [<variable>][,<variable>...]
.PU
The FOR statements allows a series of statements to be performed
repeatedly. <variable> is used as a counter. During the first
execution pass it is assigned the value <low>,
an arithmetic expression. After each pass the counter
is incremented with the step size <size>, an expression.
Ommission of the step size is intepreted as an increment of 1.
Execution of the program lines specified between the FOR and the NEXT
statement is terminated as soon as <low> is greater than <high>
.sp
The NEXT statement is labeled with the name(s) of the counter to be
incremented.
.sp
The body of the FOR statement is skipped when the initial value of the
loop times the sign of the step exceeds the value of the highest value
times the sign of the step.
.sp
The variables mentioned in the NEXT statement may be ommitted, in which case
the variable of increment the counter of the most recent FOR statement.
If a NEXT statement is encountered before its corresponding FOR statement,
the error message "NEXT without FOR" is generated.
.NH 2
GET
.Sy
GET [#]<file number>[, <record number>]
.PU
To be implemented.
.NH 2
GOSUB...RETURN
.Sy
GOSUB <line number
...
.br
RETURN
.PU
The GOSUB statement branches to the first statement of a subroutine.
The RETURN statement cause a branch back to the statement following the
most recent GOSUB statement.
A subroutine may contain more than one RETURN statement.
.sp
Subroutines may be called recursively.
Nesting of subroutine calls is limited, upon exceeding the maximum depth
the error message "XXXXX" is displayed.
.NH 2
GOTO
.Sy
GOTO <line number>
.PU
To branch unconditionally to a specified line in the program.
If <line number> does not exists, the compilation error message
"Line not defined" is displayed.
.RM
Microsoft BASIC continues at the first line
equal or greater then the line specified.
.NH 2
IF...THEN
.Sy
.br
IF <expression> THEN {<statements>|<line number>}
[ELSE {<statements>|<line number>}]
.br
.Sy
IF <expression> GOTO <line number>
[ELSE {<statements>|<line number>}]
.PU
The IF statement is used
to make a decision regarding the program flow based on the
result of the expressions.
If the expression is not zero, the THEN or GOTO clause is
executed. If the result of <expression> is zero, the THEN or
GOTO clause is ignored and the ELSE clause, if present is
executed.
.br
IF..THEN..ELSE statements may be nested.
Nesting is limited by the length of the line.
The ELSE clause matches with the closests unmatched THEN.
.sp
When using IF to test equality for a value that is the
result of a floating point expression, remember that the
internal representation of the value may not be exact.
Therefore, the test should be against a range to
handle the relative error.
.RM
Microsoft BASIC allows a comma before THEN.
.NH 2
INPUT
.Sy
INPUT [;][<"prompt string">;]<list of variables>
.PU
An INPUT statement can be used to obtain values from the user at the
terminal.
When an INPUT statement is encountered a question mark is printed
to indicate the program is awaiting data.
IF <"prompt string"> is included, the string is printed before the
the question mark. The question mark is suppressed when the prompt
string is followed by a comma, rather then a semicolon.
.sp
For each variable in the variable a list a value should be supplied.
Data items presented should be separated by a comma.
.sp
The type of the variable in the variable list must aggree with the
type of the data item entered. Responding with too few or too many
data items causes the message "?Redo". No assignment of input values
is made until an acceptable response is given.
.RM
The option to disgard the carriage return with the semicolon after the
input symbol is not yet implemented.
.NH 2
INPUT [#]
.Sy
INPUT #<file number>,<list of variables>
.PU
The purpose of the INPUT# statement is to read data items from a sequential
file and assign them to program variables.
<file number> is the number used to open the file for input.
The variables mentioned are (subscripted) variables.
The type of the data items read should aggree with the type of the variables.
A type mismatch results in the error message "XXXXX".
.sp
The data items on the sequential file are separated by commas and newlines.
In scanning the file, leading spaces, new lines, tabs, and
carriage returns are ignored. The first character encountered
is assumed to be the state of a new item.
String items need not be enclosed with double quotes, provided
it does not contain spaces, tabs, newlines and commas,
.RM
Microsoft BASIC won't assign values until the end of input statement.
This means that the user has to supply all the information.
.NH 2
LET
.Sy
[LET]<variable>=<expression>
.PU
To assign the value of an expression to a (subscribted) variable.
The type convertions as dictated in section 1.X apply.
.NH 2
LINE INPUT
.Sy
LINE INPUT [;][<"prompt string">;]<string variable>
.PU
An entire line of input is assigned to the string variable.
See INPUT for the meaning of the <"prompt string"> option.
.NH 2
LINE INPUT [#]
.Sy
LINE INPUT #<file number>,<string variable>
.PU
Read an entire line of text from a sequential file <file number>
and assign it to a string variable.
.NH 2
LSET and RSET
.PU
To be implemented
.NH 2
MID$
.Sy
MID$(<string expr1>,n[,m])=<string expr2>
.PU
To replace a portion of a string with another string value.
The characters of <string expr> replaces characters in <string expr1>
starting at position n. If m is present, at most m characters are copied,
otherwise all characters are copied.
However, the string obtained never exceeds the length of string expr1.
.NH 2
ON ERROR GOTO
.Sy
ON ERROR GOTO <line number>
.PU
To enable error handling within the BASIC program.
An error may result from arithmetic errors, disk problems, interrupts, or
as a result of the ERROR statement.
After printing an error message the program is continued at the
statements associated with <line number>.
.sp
Error handling is disabled using ON ERROR GOTO 0.
Subsequent errors result in an error message and program termination.
.NH 2
ON...GOSUB and ON ...GOTO
.Sy
ON <expression> GOSUB <list of line numbers>
ON <expression> GOTO <list of line numbers>
.PU
To branch to one of several specified line numbers or subroutines, based
on the result of the <expression>. The list of line numbers are considered
the first, second, etc alternative. Branching to the first occurs when
the expression evaluates to one, to the second alternative on two, etc.
If the value of the expression in zero or greater than the number of alternatives, processing continues at the first statement following the ON..GOTO
(ON GOSUB) statement.
When the expression results in a negative number the
an "Illegal function call" error occurs.
.NH 2
OPEN
.NH 2
OPTION BASE
.Sy
OPTION BASE n
.PU
To declare the lower bound of subsequent array subscripts as either
0 or 1. The default lower bound is zero.
.NH 2
POKE
.Sy
POKE <expr1>,<expr2>
.PU
To poke around in memory. The use of this statement is not recommended,
because it requires full understanding of both
the implementation of the Amsterdam
Compiler Kit and the hardware characteristics.
.NH 2
PRINT [USING]
.NH 2
PUT
.PU
To be implemented
.NH 2
RANDOMIZE
.Sy
RANDOMIZE [<expression>]
.PU
To reset the random seed. When the expression is ommitted, the system
will ask for a value between -32768 and 32767.
The random number generator returns the same sequence of values provided
the same seed is used.
.NH 2
READ
.Sy
READ <list of variables>
.PU
To read values from the DATA statements and assign them to variables.
The type of the variables should match to the type of the items being read,
otherwise a "Syntax error" occurs.
.NH 2
REM
.Sy
REM <remark>
.PU
To include explantory information in a program.
The REM statements are not executed.
A single quote has the same effect as : REM, which
allows for the inclusion of comment at the end of the line.
.RM
Microsoft BASIC does not allow REM statements as part of
DATA lines.
.NH 2
RESTORE
.Sy
RESTORE [<line number>]
.PU
To allow DATA statements to be re-read from a specific line.
After a RESTORE statement is executed, the next READ accesses
the first item of the DATA statements.
If <line number> is specified, the next READ accesses the first
item in the specified line.
.sp
Note that data statements result in a sequential datafile generated
by the compiler, being read by the read statements.
This data file may be replaced using the operating system functions
with a modified version, provided the same layout of items
(same number of lines and items per line) is used.
.NH 2
STOP
.Sy
STOP
.PU
To terminate the execution of a program and return to the operating system
command interpreter. A STOP statement results in the message "Break in line
???"
.NH 2
SWAP
.Sy
SWAP <variable>,<variable>
.PU
To exchange the values of two variables.
.NH 2
TRON/TROFF
.Sy
TRON
.Sy
TROFF
.PU
As an aid in debugging the TRON statement results in a program
listing each line being interpreted. TROFF disables generation of
this code.
.NH 2
WHILE...WEND
.Sy
WHILE <expression>
.....
WEND
.PU
To execute a series of BASIC statements as long as a conditional expression
is true. WHILE...WEND loops may be nested.
.NH 2
WRITE
.Sy
WRITE [<list of expressions>]
.PU
To write data at the terminal in DATA statement layout conventions.
The expressions should be separated by commas.
.NH 2
WRITE #
.Sy
WRITE #<file number> ,<list of expressions>
.PU
To write a sequential data file, being opened with the "O" mode.
The values are being writting using the DATA statements layout conventions.
.NH
FUNCTIONS
.LP
.IP ABS(X) 12
Returns the absolute value of expression X
.IP ASC(X$) 12
Returns the numeric value of the first character of the string.
If X$ is not initialized an "Illegal function call" error
is returned.
.IP ATN(X) 12
Returns the arctangent of X in radians. Result is in the range
of -pi/2 to pi/2.
.IP CDBL(X) 12
Converts X to a double precision number.
.IP CHR$(X) 12
Converts the integer value X to its ASCII character.
X must be in the range of 0 to 127.
It is used for cursor addressing and generating bel signals.
.IP CINT(X) 12
Converts X to an integer by rounding the fractional portion.
If X is not in the range -32768 to 32767 an "Overflow"
error occurs.
.IP COS(X) 12
Returns the cosine of X in radians.
.IP CSNG(X) 12
Converts X to a double precision number.
.IP CVI(<2-bytes>) 12
Convert two byte string value to integer number.
.IP CVS(<4-bytes>) 12
Convert four byte string value to single precision number.
.IP CVD(<8-bytes>) 12
Convert eight byte string value to double precision number.
.IP EOF[(<file-number>)] 12
Returns -1 (true) if the end of a sequential file has been reached.
.IP EXP(X) 12
Returns e(base of natural logarithm) to the power of X.
X should be less then 10000.0.
.IP FIX(X) 12
Returns the truncated integer part of X. FIX(X) is
equivalent to SGN(X)*INT(ABS(X)).
The major difference between FIX and INT is that FIX does not
return the next lower number for negative X.
.IP HEX$(X) 12
Returns the string which represents the hexadecimal value of
the decimal argument. X is rounded to an integer using CINT
before HEX$ is evaluated.
.IP INT(X) 12
Returns the largest integer <= X.
.IP INPUT$(X[,[#]Y]) 12
Returns the string of X characters read from the terminal or
the designated file.
.IP LEX(X$) 12
Returns the number of characters in the string X$.
Non printable and blancs are counted too.
.IP LOC(<file\ number>) 12
For sequential files LOC returns
position of the read/write head, counted in number of bytes.
For random files the function returns the record number just
read or written from a GET or PUT statement.
If nothing was read or written 0 is returned.
.IP LOG(X) 12
Returns the natural logarithm of X. X must be greater than zero.
.IP MID$(X,I,[J]) 12
To be implemented.
.IP MKI$(X) 12
Converts an integer expression to a two-byte string.
.IP MKS$(X) 12
Converts a single precision expression to a four-byte string.
.IP MKD$(X) 12
Converts a double precision expression to a eight-byte string.
.IP OCT$(X) 12
Returns the string which represents the octal value of the decimal
argument. X is rounded to an integer using CINT before OCTS is evaluated.
.IP PEEK(I) 12
Returns the byte read from the indicated memory. (Of limited use
in the context of ACK)
.IP POS(I) 12
Returns the current cursor position. To be implemented.
.IP RIGHT$(X$,I)
Returns the right most I characters of string X$.
If I=0 then the empty string is returned.
.IP RND(X) 12
Returns a random number between 0 and 1. X is a dummy argument.
.IP SGN(X) 12
If X>0 , SGN(X) returns 1.
.br
if X=0, SGN(X) returns 0.
.br
if X<0, SGN(X) returns -1.
.IP SIN(X) 12
Returns the sine of X in radians.
.IP SPACE$(X) 12
Returns a string of spaces length X. The expression
X is rounded to an integer using CINT.
.IP STR$(X)
Returns the string representation value of X.
.IP STRING$(I,J) 12
Returns thes string of length Iwhose characters all
have ASCII code J. (or first character when J is a string)
.IP TAB(I) 12
Spaces to position I on the terminal. If the current
print position is already beyond space I,TAB
goes to that position on the next line.
Space 1 is leftmost position, and the rightmost position
is width minus 1. To be used within PRINT statements only.
.IP TAN(X) 12
Returns the tangent of X in radians. If TAN overflows
the "Overflow" message is displayed.
.IP VAL(X$) 12
Returns the numerical value of string X$.
The VAL function strips leading blanks and tabs from the
argument string.
.SH
APPENDIX A DIFFERENCES WITH MICROSOFT BASIC
.LP
The following list of Microsoft commands and statements are
not recognized by the compiler.
.DS
SPC
USR
VARPTR
AUTO
CHAIN
CLEAR
CLOAD
COMMON
CONT
CSAVE
DELETE
EDIT
ERASE
FRE
KILL
LIST
LLIST
LOAD
LPRINT
MERGE
NAME
NEW
NULL
RENUM
RESUME
RUN
SAVE
WAIT
WIDTH LPRINT
.DE
Some statements are in the current implementation not available,
but will be soon. These include:
.DS
CALL
DEFUSR
FIELD
GET
INKEY
INPUT$
INSTR$
LEFT$
LSET
RSET
PUT
.DE

1857
doc/cg.doc

File diff suppressed because it is too large Load diff

View file

@ -1,324 +0,0 @@
.\" $Header$
.ll 72
.nr ID 4
.de hd
'sp 2
'tl ''-%-''
'sp 3
..
.de fo
'bp
..
.tr ~
. TITLE
.de TL
.sp 15
.ce
\\fB\\$1\\fR
..
. AUTHOR
.de AU
.sp 15
.ce
by
.sp 2
.ce
\\$1
..
. DATE
.de DA
.sp 3
.ce
( Dated \\$1 )
..
. INSTITUTE
.de VU
.sp 3
.ce 4
Wiskundig Seminarium
Vrije Universteit
De Boelelaan 1081
Amsterdam
..
. PARAGRAPH
.de PP
.sp
.ti +\n(ID
..
.nr CH 0 1
. CHAPTER
.de CH
.nr SH 0 1
.bp
.in 0
\\fB\\n+(CH.~\\$1\\fR
.PP
..
. SUBCHAPTER
.de SH
.sp 3
.in 0
\\fB\\n(CH.\\n+(SH.~\\$1\\fR
.PP
..
. INDENT START
.de IS
.sp
.in +\n(ID
..
. INDENT END
.de IE
.in -\n(ID
.sp
..
.de PT
.ti -\n(ID
.ta \n(ID
.fc " @
"\\$1@"\c
.fc
..
. DOUBLE INDENT START
.de DS
.sp
.in +\n(ID
.ll -\n(ID
..
. DOUBLE INDENT END
.de DE
.ll +\n(ID
.in -\n(ID
.sp
..
. EQUATION START
.de EQ
.sp
.nf
..
. EQUATION END
.de EN
.fi
.sp
..
. ITEM
.de IT
.sp
.in 0
\\fB~\\$1\\fR
.ti +5
..
.de CS
.br
~-~\\
..
.br
.fi
.TL "Ack-C reference manual"
.AU "Ed Keizer"
.DA "September 12, 1983"
.VU
.wh 0 hd
.wh 60 fo
.CH "Introduction"
The C frontend included in the Amsterdam Compiler Kit
translates UNIX-V7 C into compact EM code [1].
The language accepted is described in [2] and [3].
This document describes which implementation dependent choices were
made in the Ack-C frontend and
some restrictions and additions.
.CH "The language"
.PP
Under the same heading as used in [2] we describe the
properties of the Ack-C frontend.
.IT "2.2 Identifiers"
External identifiers are unique up to 7 characters and allow
both upper and lower case.
.IT "2.3 Keywords"
The word \fBvoid\fP is also reserved as a keyword.
.IT "2.4.3 Character constants"
The ASCII-mapping is used when a character is converted to an
integer.
.IT "2.4.4 Floating constants"
To prevent loss of precision the compiler does not perform
floating point constant folding.
.IT "2.6 Hardware characteristics"
The size of objects of the several arithmetic types and
pointers depend on the EM-implementation used.
The ranges of the arithmetic types depend on the size used,
the C-frontend assumes two's complement representation for the
integral types.
All sizes are multiples of bytes.
The calling program \fIack\fP[4] passes information about the
size of the types to the compiler proper.
.br
However, a few general remarks must be made:
.sp 1
.IS
.PT (a)
The size of pointers is a multiple of
(or equal to) the size of an \fIint\fP.
.PT (b)
The following relations exist for the sizes of the types
mentioned:
.br
.ti +5
\fIchar<=short<=int<=long\fP
.PT (c)
Objects of type \fIchar\fP use one 8-bit byte of storage,
although several bytes are allocated sometimes.
.PT (d)
All sizes are in multiples of bytes.
.PT (e)
Most EM implementations use 4 bytes for floats and 8 bytes
for doubles, but exceptions to this rule occur.
.IE
.IT "4 What's in a name"
The type \fIvoid\fP is added.
Objects of type void do not exist.
Functions declared as returning void, do not return a value at all.
.IT "6.1 Characters and integers"
Objects of type \fIchar\fP are unsigned and do not cause
sign-extension when converted to \fIint\fP.
The range of characters values is from 0 to 255.
.IT "6.3 Floating and integral"
Floating point numbers are truncated towards zero when
converted to the integral types.
.IT "6.4 Pointers and integers"
When a \fIlong\fP is added to or subtracted from a pointer and
longs are larger then pointers the \fIlong\fP is converted to an
\fIint\fP before the operation is performed.
.IT "7.2 Unary operators"
It is allowed to cast any expression to the type \fIvoid\fP.
.IT "8.2 Type specifiers"
One type is added to the type-specifiers:
.br
.IS
void
.IE
.IT "8.5 Structure and union declarations"
The only type allowed for fields is \fIint\fP.
Fields with exactly the size of \fIint\fP are signed,
all other fields are unsigned.
.br
The size of any single structure must be less then 4096 bytes.
.IT "8.6 Initialization"
Initialization of structures containing bit fields is not
allowed.
There is one restriction when using an 'address expression' to initialize
an integral variable.
The integral variable must have the same size as a pointer.
Conversions altering the size of the address expression are not allowed.
.IT "9.10 Return statement"
Return statements of the form:
.IS
return ;
.IE
are the only form of return statement allowed in a function of type
function returning void.
.IT "10.1 External function definitions"
The total amount for storage used for parameters
in any function must be less then 4096 bytes.
The same holds for the total amount of storage occupied by the
automatic variables declared inside any function.
.sp
Using formal parameters whose size is smaller the the size of an int
is less efficient on several machines.
At procedure entry these parameters are converted from integer to the
declared type, because the compiler doesn't know where the least
significant bytes are stored in the int.
.IT "11.2 Scope of externals"
Most C compilers are rather lax in enforcing the restriction
that only one external definition without the keyword
\fIextern\fP is allowed in a program.
The Ack-C frontend is very strict in this.
The only exception is that declarations of arrays with a
missing first array bounds expression are regarded to have an
explicit keyword \fIextern\fP.
.IT "14.4 Explicit pointer conversions"
Pointers may be larger the ints, thus assigning a pointer to an
int and back will not always result in the same pointer.
The process mentioned above works with integrals
of the same size or larger as pointers in all EM implementations
having such integrals.
When converting pointers to an integral type or vice-versa,
the pointers is seen as an unsigned int.
.br
EM guarantees that any object can be placed at a word boundary,
this allows the C-programs to use \fIint\fP pointers
as pointers to objects of any type not smaller than an \fIint\fP.
.CH "Frontend options"
The C-frontend has a few options, these are controlled
by flags:
.IS
.PT -V
This flag is followed by a sequence of letters each followed by
positive integers. Each letter indicates a
certain type, the integer following it specifies the size of
objects of that type. One letter indicates the wordsize used.
.IS
.sp 1
.TS
center tab(:);
l l16 l l.
letter:type:letter:type
w:wordsize:i:int
s:short:l:long
f:float:d:double
p:pointer::
.TE
.sp 1
All existing implementations use an integer size equal to the
wordsize.
.IE
The calling program \fIack\fP[4] provides the frontend with
this flag, with values depending on the machine used.
.sp 1
.PT -l
The frontend normally generates code to keep track of the line
number and source file name at runtime for debugging purposes.
Currently a pointer to a
string containing the filename is stored at a fixed place in
memory at each function
entry and the line number at the start of every expression.
At the return from a function these memory locations are not reset to
the values they had before the call.
Most library routines do not use this feature and thus do not
ruin the current line number and filename when called.
However, you are really unlucky when your program crashes due
to a bug in such a library function, because the line number
and filename do not indicate that something went wrong inside
the library function.
.br
Providing the flag -l to the frontend tells it not to generate
the code updating line number and file name.
This is, for example, used when translating the stdio library.
.br
When the \fIack\fP[4] is called with the -L flag it provides
the frontend with this flag.
.sp 1
.PT -Xp
When this flag is present the frontend generates a call to
the function \fBprocentry\fP at each function entry and a
call to \fBprocexit\fP at each function exit.
Both functions are provided with one parameter,
a pointer to a string containing the function name.
.br
When \fIack\fP is called with the -p flag it provides the
frontend with this flag.
.IE
.CH References
.IS
.PT [1]
A.S. Tanenbaum, Hans van Staveren, Ed Keizer and Johan
Stevenson \fIDescription of a machine architecture for use with
block structured languages\fP Informatica report IR-81.
.sp 1
.PT [2]
B.W. Kernighan and D.M. Ritchie, \fIThe C Programming
language\fP, Prentice-Hall, 1978
.PT [3]
D.M. Ritchie, \fIC Reference Manual\fP
.sp
.PT [4]
UNIX manual ack(I).

View file

@ -1,31 +0,0 @@
head: doc.pr
NROFF=nroff
FILES = macr.nr title.nr intro.nr mem.nr ispace.nr dspace.nr mapping.nr types.nr descr.nr iotrap.nr mach.nr assem.nr app.nr
IOP=../../util/ass/ip_spec.t
doc.pr: $(FILES) itables em.i
tbl $(FILES) | $(NROFF) >doc.pr
distr: $(FILES) itables em.i
tbl $(FILES) | nroff -Tlp >doc.pr
opr: doc.pr
make pr | opr
pr:
@make "NROFF="$NROFF doc.pr >makepr.out 2>&1
@cat doc.pr
app.t: itables em.i
em.i: int/em.p
@echo Sorry, this copy was edited by hand from int/em.p
itables: $(IOP)
awk -f ip.awk $(IOP) | tbl >itables
.SUFFIXES : .pr .nr
.nr.pr: ; tbl macr.nr $*.nr | $(NROFF) >$@
cont.t intro.t mem.t ispace.t dspace.t mapping.t succ.t descr.t iotrap.t mach.t assem.t kern.t app.t: macr.nr

View file

@ -1 +0,0 @@
Sorry, the kun macro package is not ours to distribute.

File diff suppressed because it is too large Load diff

View file

@ -1,488 +0,0 @@
.BP
.AP "EM INTERPRETER"
.nf
.ta 8 16 24 32 40 48 56 64 72 80
.so em.i
.fi
.BP
.AP "EM CODE TABLES"
The following table is used by the assembler for EM machine
language.
It specifies the opcodes used for each instruction and
how arguments are mapped to machine language arguments.
The table is presented in three columns,
each line in each column contains three or four fields.
Each line describes a range of interpreter opcodes by
specifying for which instruction the range is used, the type of the
opcodes (mini, shortie, etc..) and range for the instruction
argument.
.A
The first field on each line gives the EM instruction mnemonic,
the second field gives some flags.
If the opcodes are minis or shorties the third field specifies
how many minis/shorties are used.
The last field gives the number of the (first) interpreter
opcode.
.N 1
Flags :
.IS 3
.N 1
Opcode type, only one of the following may be specified.
.PS - 5 " "
.PT -
opcode without argument
.PT m
mini
.PT s
shortie
.PT 2
opcode with 2-byte signed argument
.PT 4
opcode with 4-byte signed argument
.PT 8
opcode with 8-byte signed argument
.PE
Secondary (escaped) opcodes.
.PS - 5 " "
.PT e
The opcode thus marked is in the secondary opcode group instead
of the primary
.PE
restrictions on arguments
.PS - 5 " "
.PT N
Negative arguments only
.PT P
Positive and zero arguments only
.PE
mapping of arguments
.PS - 5 " "
.PT w
argument must be divisible by the wordsize and is divided by the
wordsize before use as opcode argument.
.PT o
argument ( possibly after division ) must be >= 1 and is
decremented before use as opcode argument
.PE
.IE
If the opcode type is 2,4 or 8 the resulting argument is used as
opcode argument (least significant byte first).
.N
If the opcode type is mini, the argument is added
to the first opcode - if in range - .
If the argument is negative, the absolute value minus one is
used in the algorithm above.
.N
For shorties with positive arguments the first opcode is used
for arguments in the range 0..255, the second for the range
256..511, etc..
For shorties with negative arguments the first opcode is used
for arguments in the range -1..-256, the second for the range
-257..-512, etc..
The byte following the opcode contains the least significant
byte of the argument.
First some examples of these specifications.
.PS - 5
.PT "aar mwPo 1 34"
Indicates that opcode 34 is used as a mini for Positive
instruction arguments only.
The w and o indicate division and decrementing of the
instruction argument.
Because the resulting argument must be zero ( only opcode 34 may be used
), this mini can only be used for instruction argument 2.
Conclusion: opcode 34 is for "AAR 2".
.PT "adp sP 1 41"
Opcode 41 is used as shortie for ADP with arguments in the range
0..255.
.PT "bra sN 2 60"
Opcode 60 is used as shortie for BRA with arguments -1..-256,
61 is used for arguments -257..-512.
.PT "zer e- 145"
Escaped opcode 145 is used for ZER.
.PE
The interpreter opcode table:
.N 1
.IS 3
.DS B
.so itables
.DE 0
.IE
.P
The table above results in the following dispatch tables.
Dispatch tables are used by interpreters to jump to the
routines implementing the EM instructions, indexed by the next opcode.
Each line of the dispatch tables gives the routine names
of eight consecutive opcodes, preceded by the first opcode number
on that line.
Routine names consist of an EM mnemonic followed by a suffix.
The suffices show the encoding used for each opcode.
.N
The following suffices exist:
.N 1
.VS 1 0
.IS 4
.PS - 11
.PT .z
no arguments
.PT .l
16-bit argument
.PT .lw
16-bit argument divided by the wordsize
.PT .p
positive 16-bit argument
.PT .pw
positive 16-bit argument divided by the wordsize
.PT .n
negative 16-bit argument
.PT .nw
negative 16-bit argument divided by the wordsize
.PT .s<num>
shortie with <num> as high order argument byte
.PT .sw<num>
shortie with argument divided by the wordsize
.PT .<num>
mini with <num> as argument
.PT .<num>W
mini with <num>*wordsize as argument
.PE 3
<num> is a possibly negative integer.
.VS 1 1
.IE
The dispatch table for the 256 primary opcodes:
.DS B
0 loc.0 loc.1 loc.2 loc.3 loc.4 loc.5 loc.6 loc.7
8 loc.8 loc.9 loc.10 loc.11 loc.12 loc.13 loc.14 loc.15
16 loc.16 loc.17 loc.18 loc.19 loc.20 loc.21 loc.22 loc.23
24 loc.24 loc.25 loc.26 loc.27 loc.28 loc.29 loc.30 loc.31
32 loc.32 loc.33 aar.1W adf.s0 adi.1W adi.2W adp.l adp.1
40 adp.2 adp.s0 adp.s-1 ads.1W and.1W asp.1W asp.2W asp.3W
48 asp.4W asp.5W asp.w0 beq.l beq.s0 bge.s0 bgt.s0 ble.s0
56 blm.s0 blt.s0 bne.s0 bra.l bra.s-1 bra.s-2 bra.s0 bra.s1
64 cal.1 cal.2 cal.3 cal.4 cal.5 cal.6 cal.7 cal.8
72 cal.9 cal.10 cal.11 cal.12 cal.13 cal.14 cal.15 cal.16
80 cal.17 cal.18 cal.19 cal.20 cal.21 cal.22 cal.23 cal.24
88 cal.25 cal.26 cal.27 cal.28 cal.s0 cff.z cif.z cii.z
96 cmf.s0 cmi.1W cmi.2W cmp.z cms.s0 csa.1W csb.1W dec.z
104 dee.w0 del.w-1 dup.1W dvf.s0 dvi.1W fil.l inc.z ine.lw
112 ine.w0 inl.-1W inl.-2W inl.-3W inl.w-1 inn.s0 ior.1W ior.s0
120 lae.l lae.w0 lae.w1 lae.w2 lae.w3 lae.w4 lae.w5 lae.w6
128 lal.p lal.n lal.0 lal.-1 lal.w0 lal.w-1 lal.w-2 lar.W
136 ldc.0 lde.lw lde.w0 ldl.0 ldl.w-1 lfr.1W lfr.2W lfr.s0
144 lil.w-1 lil.w0 lil.0 lil.1W lin.l lin.s0 lni.z loc.l
152 loc.-1 loc.s0 loc.s-1 loe.lw loe.w0 loe.w1 loe.w2 loe.w3
160 loe.w4 lof.l lof.1W lof.2W lof.3W lof.4W lof.s0 loi.l
168 loi.1 loi.1W loi.2W loi.3W loi.4W loi.s0 lol.pw lol.nw
176 lol.0 lol.1W lol.2W lol.3W lol.-1W lol.-2W lol.-3W lol.-4W
184 lol.-5W lol.-6W lol.-7W lol.-8W lol.w0 lol.w-1 lxa.1 lxl.1
192 lxl.2 mlf.s0 mli.1W mli.2W rck.1W ret.0 ret.1W ret.s0
200 rmi.1W sar.1W sbf.s0 sbi.1W sbi.2W sdl.w-1 set.s0 sil.w-1
208 sil.w0 sli.1W ste.lw ste.w0 ste.w1 ste.w2 stf.l stf.W
216 stf.2W stf.s0 sti.1 sti.1W sti.2W sti.3W sti.4W sti.s0
224 stl.pw stl.nw stl.0 stl.1W stl.-1W stl.-2W stl.-3W stl.-4W
232 stl.-5W stl.w-1 teq.z tgt.z tlt.z tne.z zeq.l zeq.s0
240 zeq.s1 zer.s0 zge.s0 zgt.s0 zle.s0 zlt.s0 zne.s0 zne.s-1
248 zre.lw zre.w0 zrl.-1W zrl.-2W zrl.w-1 zrl.nw escape1 escape2
.DE 2
The list of secondary opcodes (escape1):
.N 1
.DS B
0 aar.l aar.z adf.l adf.z adi.l adi.z ads.l ads.z
8 adu.l adu.z and.l and.z asp.lw ass.l ass.z bge.l
16 bgt.l ble.l blm.l bls.l bls.z blt.l bne.l cai.z
24 cal.l cfi.z cfu.z ciu.z cmf.l cmf.z cmi.l cmi.z
32 cms.l cms.z cmu.l cmu.z com.l com.z csa.l csa.z
40 csb.l csb.z cuf.z cui.z cuu.z dee.lw del.pw del.nw
48 dup.l dus.l dus.z dvf.l dvf.z dvi.l dvi.z dvu.l
56 dvu.z fef.l fef.z fif.l fif.z inl.pw inl.nw inn.l
64 inn.z ior.l ior.z lar.l lar.z ldc.l ldf.l ldl.pw
72 ldl.nw lfr.l lil.pw lil.nw lim.z los.l los.z lor.s0
80 lpi.l lxa.l lxl.l mlf.l mlf.z mli.l mli.z mlu.l
88 mlu.z mon.z ngf.l ngf.z ngi.l ngi.z nop.z rck.l
96 rck.z ret.l rmi.l rmi.z rmu.l rmu.z rol.l rol.z
104 ror.l ror.z rtt.z sar.l sar.z sbf.l sbf.z sbi.l
112 sbi.z sbs.l sbs.z sbu.l sbu.z sde.l sdf.l sdl.pw
120 sdl.nw set.l set.z sig.z sil.pw sil.nw sim.z sli.l
128 sli.z slu.l slu.z sri.l sri.z sru.l sru.z sti.l
136 sts.l sts.z str.s0 tge.z tle.z trp.z xor.l xor.z
144 zer.l zer.z zge.l zgt.l zle.l zlt.l zne.l zrf.l
152 zrf.z zrl.pw dch.z exg.s0 exg.l exg.z lpb.z gto.l
.DE 2
Finally, the list of opcodes with four byte arguments (escape2).
.DS
0 loc
.DE 0
.BP
.AP "AN EXAMPLE PROGRAM"
.DS B
1 program example(output);
2 {This program just demonstrates typical EM code.}
3 type rec = record r1: integer; r2:real; r3: boolean end;
4 var mi: integer; mx:real; r:rec;
5
6 function sum(a,b:integer):integer;
7 begin
8 sum := a + b
9 end;
10
11 procedure test(var r: rec);
12 label 1;
13 var i,j: integer;
14 x,y: real;
15 b: boolean;
16 c: char;
17 a: array[1..100] of integer;
18
19 begin
20 j := 1;
21 i := 3 * j + 6;
22 x := 4.8;
23 y := x/0.5;
24 b := true;
25 c := 'z';
26 for i:= 1 to 100 do a[i] := i * i;
27 r.r1 := j+27;
28 r.r3 := b;
29 r.r2 := x+y;
30 i := sum(r.r1, a[j]);
31 while i > 0 do begin j := j + r.r1; i := i - 1 end;
32 with r do begin r3 := b; r2 := x+y; r1 := 0 end;
33 goto 1;
34 1: writeln(j, i:6, x:9:3, b)
35 end; {test}
36 begin {main program}
37 mx := 15.96;
38 mi := 99;
39 test(r)
40 end.
.DE 0
.BP
The EM code as produced by the Pascal-VU compiler is given below. Comments
have been added manually. Note that this code has already been optimized.
.DS B
mes 2,2,2 ; wordsize 2, pointersize 2
.1
rom 't.p\e000' ; the name of the source file
hol 552,-32768,0 ; externals and buf occupy 552 bytes
exp $sum ; sum can be called from other modules
pro $sum,2 ; procedure sum; 2 bytes local storage
lin 8 ; code from source line 8
ldl 0 ; load two locals ( a and b )
adi 2 ; add them
ret 2 ; return the result
end 2 ; end of procedure ( still two bytes local storage )
.2
rom 1,99,2 ; descriptor of array a[]
exp $test ; the compiler exports all level 0 procedures
pro $test,226 ; procedure test, 226 bytes local storage
.3
rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in
.4 ; global storage
rom 0.5F8 ; same for 0.5
mes 3,-226,2,2 ; compiler temporary not referenced by address
mes 3,-24,2,0 ; the same is true for i, j, b and c in test
mes 3,-22,2,0
mes 3,-4,2,0
mes 3,-2,2,0
mes 3,-20,8,0 ; and for x and y
mes 3,-12,8,0
lin 20 ; maintain source line number
loc 1
stl -4 ; j := 1
lni ; lin 21 prior to optimization
lol -4
loc 3
mli 2
loc 6
adi 2
stl -2 ; i := 3 * j + 6
lni ; lin 22 prior to optimization
lae .3
loi 8
lal -12
sti 8 ; x := 4.8
lni ; lin 23 prior to optimization
lal -12
loi 8
lae .4
loi 8
dvf 8
lal -20
sti 8 ; y := x / 0.5
lni ; lin 24 prior to optimization
loc 1
stl -22 ; b := true
lni ; lin 25 prior to optimization
loc 122
stl -24 ; c := 'z'
lni ; lin 26 prior to optimization
loc 1
stl -2 ; for i:= 1
2
lol -2
dup 2
mli 2 ; i*i
lal -224
lol -2
lae .2
sar 2 ; a[i] :=
lol -2
loc 100
beq *3 ; to 100 do
inl -2 ; increment i and loop
bra *2
3
lin 27
lol -4
loc 27
adi 2 ; j + 27
sil 0 ; r.r1 :=
lni ; lin 28 prior to optimization
lol -22 ; b
lol 0
stf 10 ; r.r3 :=
lni ; lin 29 prior to optimization
lal -20
loi 16
adf 8 ; x + y
lol 0
adp 2
sti 8 ; r.r2 :=
lni ; lin 23 prior to optimization
lal -224
lol -4
lae .2
lar 2 ; a[j]
lil 0 ; r.r1
cal $sum ; call now
asp 4 ; remove parameters from stack
lfr 2 ; get function result
stl -2 ; i :=
4
lin 31
lol -2
zle *5 ; while i > 0 do
lol -4
lil 0
adi 2
stl -4 ; j := j + r.r1
del -2 ; i := i - 1
bra *4 ; loop
5
lin 32
lol 0
stl -226 ; make copy of address of r
lol -22
lol -226
stf 10 ; r3 := b
lal -20
loi 16
adf 8
lol -226
adp 2
sti 8 ; r2 := x + y
loc 0
sil -226 ; r1 := 0
lin 34 ; note the abscence of the unnecesary jump
lae 22 ; address of output structure
lol -4
cal $_wri ; write integer with default width
asp 4 ; pop parameters
lae 22
lol -2
loc 6
cal $_wsi ; write integer width 6
asp 6
lae 22
lal -12
loi 8
loc 9
loc 3
cal $_wrf ; write fixed format real, width 9, precision 3
asp 14
lae 22
lol -22
cal $_wrb ; write boolean, default width
asp 4
lae 22
cal $_wln ; writeln
asp 2
ret 0 ; return, no result
end 226
exp $_main
pro $_main,0 ; main program
.6
con 2,-1,22 ; description of external files
.5
rom 15.96F8
fil .1 ; maintain source file name
lae .6 ; description of external files
lae 0 ; base of hol area to relocate buffer addresses
cal $_ini ; initialize files, etc...
asp 4
lin 37
lae .5
loi 8
lae 2
sti 8 ; mx := 15.96
lni ; lin 38 prior to optimization
loc 99
ste 0 ; mi := 99
lni ; lin 39 prior to optimization
lae 10 ; address of r
cal $test
asp 2
loc 0 ; normal exit
cal $_hlt ; cleanup and finish
asp 2
end 0
mes 5 ; reals were used
.DE 0
The compact code corresponding to the above program is listed below.
Read it horizontally, line by line, not column by column.
Each number represents a byte of compact code, printed in decimal.
The first two bytes form the magic word.
.N 1
.IS 3
.DS B
173 0 159 122 122 122 255 242 1 161 250 124 116 46 112 0
255 156 245 40 2 245 0 128 120 155 249 123 115 117 109 160
249 123 115 117 109 122 67 128 63 120 3 122 88 122 152 122
242 2 161 121 219 122 255 155 249 124 116 101 115 116 160 249
124 116 101 115 116 245 226 0 242 3 161 253 128 123 52 46
56 255 242 4 161 253 128 123 48 46 53 255 159 123 245 30
255 122 122 255 159 123 96 122 120 255 159 123 98 122 120 255
159 123 116 122 120 255 159 123 118 122 120 255 159 123 100 128
120 255 159 123 108 128 120 255 67 140 69 121 113 116 68 73
116 69 123 81 122 69 126 3 122 113 118 68 57 242 3 72
128 58 108 112 128 68 58 108 72 128 57 242 4 72 128 44
128 58 100 112 128 68 69 121 113 98 68 69 245 122 0 113
96 68 69 121 113 118 182 73 118 42 122 81 122 58 245 32
255 73 118 57 242 2 94 122 73 118 69 220 10 123 54 118
18 122 183 67 147 73 116 69 147 3 122 104 120 68 73 98
73 120 111 130 68 58 100 72 136 2 128 73 120 4 122 112
128 68 58 245 32 255 73 116 57 242 2 59 122 65 120 20
249 123 115 117 109 8 124 64 122 113 118 184 67 151 73 118
128 125 73 116 65 120 3 122 113 116 41 118 18 124 185 67
152 73 120 113 245 30 255 73 98 73 245 30 255 111 130 58
100 72 136 2 128 73 245 30 255 4 122 112 128 69 120 104
245 30 255 67 154 57 142 73 116 20 249 124 95 119 114 105
8 124 57 142 73 118 69 126 20 249 124 95 119 115 105 8
126 57 142 58 108 72 128 69 129 69 123 20 249 124 95 119
114 102 8 134 57 142 73 98 20 249 124 95 119 114 98 8
124 57 142 20 249 124 95 119 108 110 8 122 88 120 152 245
226 0 155 249 125 95 109 97 105 110 160 249 125 95 109 97
105 110 120 242 6 151 122 119 142 255 242 5 161 253 128 125
49 53 46 57 54 255 50 242 1 57 242 6 57 120 20 249
124 95 105 110 105 8 124 67 157 57 242 5 72 128 57 122
112 128 68 69 219 110 120 68 57 130 20 249 124 116 101 115
116 8 122 69 120 20 249 124 95 104 108 116 8 122 152 120
159 124 160 255 159 125 255
.DE 0
.IE
.MS T A 0
.ME
.BP
.MS B A 0
.ME
.CT

View file

@ -1,773 +0,0 @@
.BP
.SN 11
.S1 "EM ASSEMBLY LANGUAGE"
We use two representations for assembly language programs,
one is in ASCII and the other is the compact assembly language.
The latter needs less space than the first for the same program
and therefore allows faster processing.
Our only program accepting ASCII assembly
language converts it to the compact form.
All other programs expect compact assembly input.
The first part of the chapter describes the ASCII assembly
language and its semantics.
The second part describes the syntax of the compact assembly
language.
The last part lists the EM instructions with the type of
arguments allowed and an indication of the function.
Appendix A gives a detailed description of the effect of all
instructions in the form of a Pascal program.
.S2 "ASCII assembly language"
An assembly language program consists of a series of lines, each
line may be blank, contain one (pseudo)instruction or contain one
label.
Input to the assembler is in lower case.
Upper case is used in this
document merely to distinguish keywords from the surrounding prose.
Comment is allowed at the end of each line and starts with a semicolon ";".
This kind of comment does not exist in the compact form.
.A
Labels must be placed all by themselves on a line and start in
column 1.
There are two kinds of labels, instruction and data labels.
Instruction labels are unsigned positive integers.
The scope of an instruction label is its procedure.
.A
The pseudoinstructions CON, ROM and BSS may be preceded by a
line containing a
1-8 character data label, the first character of which is a
letter, period or underscore.
The period may only be followed by
digits, the others may be followed by letters, digits and underscores.
The use of the character "." followed by a constant,
which must be in the range 1 to 32767 (e.g. ".40") is recommended
for compiler
generated programs.
These labels are considered as a special case and handled
more efficiently in compact assembly language (see below).
Note that a data label on its own or two consecutive labels are not
allowed.
.P
Each statement may contain an instruction mnemonic or pseudoinstruction.
These must begin in column 2 or later (not column 1) and must be followed
by a space, tab, semicolon or LF.
Everything on the line following a semicolon is
taken as a comment.
.P
Each input file contains one module.
A module may contain many procedures,
which may be nested.
A procedure consists of
a PRO statement, a (possibly empty)
collection of instructions and pseudoinstructions and finally an END
statement.
Pseudoinstructions are also allowed between procedures.
They do not belong to a specific procedure.
.P
All constants in EM are interpreted in the decimal base.
The ASCII assembly language accepts constant expressions
wherever constants are allowed.
The operators recognized are: +, -, *, % and / with the usual
precedence order.
Use of the parentheses ( and ) to alter the precedence order is allowed.
.S3 "Instruction arguments"
Unlike many other assembly languages, the EM assembly
language requires all arguments of normal and pseudoinstructions
to be either a constant or an identifier, but not a combination
of these two.
There is one exception to this rule: when a data label is used
for initialization or as an instruction argument,
expressions of the form 'label+constant' and 'label-constant'
are allowed.
This makes it possible to address, for example, the
third word of a ten word BSS block
directly.
Thus LOE LABEL+4 is permitted and so is CON LABEL+3.
The resulting address is must be in the same fragment as the label.
It is not allowed to add or subtract from instruction labels or procedure
identifiers,
which certainly is not a severe restriction and greatly aids
optimization.
.P
Instruction arguments can be constants,
data labels, data labels offsetted by a constant, instruction
labels and procedure identifiers.
The range of integers allowed depends on the instruction.
Most instructions allow only integers
(signed or unsigned)
that fit in a word.
Arguments used as offsets to pointers should fit in a
pointer-sized integer.
Finally, arguments to LDC should fit in a double-word integer.
.P
Several instructions have two possible forms:
with an explicit argument and with an implicit argument on top of the stack.
The size of the implicit argument is the wordsize.
The implicit argument is always popped before all other operands.
For example: 'CMI 4' specifies that two four-byte signed
integers on top of the stack are to be compared.
\&'CMI' without an argument expects a wordsized integer
on top of the stack that specifies the size of the integers to
be compared.
Thus the following two sequences are equivalent:
.N 2
.TS
center, tab(:) ;
l r 30 l r.
LDL:-10:LDL:-10
LDL:-14:LDL:-14
::LOC:4
CMI:4:CMI:
ZEQ:*1:ZEQ:*1
.TE 2
Section 11.1.6 shows the arguments allowed for each instruction.
.S3 "Pseudoinstruction arguments"
Pseudoinstruction arguments can be divided in two classes:
Initializers and others.
The following initializers are allowed: signed integer constants,
unsigned integer constants, floating-point constants, strings,
data labels, data labels offsetted by a constant, instruction
labels and procedure identifiers.
.P
Constant initializers in BSS, HOL, CON and ROM pseudoinstructions
can be followed by a letter I, U or F.
This indicator
specifies the type of the initializer: Integer, Unsigned or Float.
If no indicator is present I is assumed.
The size of the initializer is the wordsize unless
the indicator is followed by an integer specifying the
initializer's size.
This integer is governed by the same restrictions as for
transfer of objects to/from memory.
As in instruction arguments, initializers include expressions of the form:
\&"LABEL+offset" and "LABEL-offset".
The offset must be an unsigned decimal constant.
The 'IUF' indicators cannot be used in the offsets.
.P
Data labels are referred to by their name.
.P
Strings are surrounded by double quotes (").
Semicolon's in string do not indicate the start of comment.
In the ASCII representation the escape character \e (backslash)
alters the meaning of subsequent character(s).
This feature allows inclusion of zeroes, graphic characters and
the double quote in the string.
The following escape sequences exist:
.DS
.TS
center, tab(:);
l l l.
newline:NL\|(LF):\en
horizontal tab:HT:\et
backspace:BS:\eb
carriage return:CR:\er
form feed:FF:\ef
backslash:\e:\e\e
double quote:":\e"
bit pattern:\fBddd\fP:\e\fBddd\fP
.TE
.DE
The escape \fBddd\fP consists of the backslash followed by 1,
2, or 3 octal digits specifing the value of
the desired character.
If the character following a backslash is not one of those
specified,
the backslash is ignored.
Example: CON "hello\e012\e0".
Each string element initializes a single byte.
The ASCII character set is used to map characters onto values.
.P
Instruction labels are referred to as *1, *2, etc. in both branch
instructions and as initializers.
.P
The notation $procname means the identifier for the procedure
with the specified name.
This identifier has the size of a pointer.
.S3 Notation
First, the notation used for the arguments, classes of
instructions and pseudoinstructions.
.IS 2
.TS
tab(:);
l l l.
<cst>:\&=:integer constant (current range -2**31..2**31-1)
<dlb>:\&=:data label
<arg>:\&=:<cst> or <dlb> or <dlb>+<cst> or <dlb>-<cst>
<con>:\&=:integer constant, unsigned constant, floating-point constant
<str>:\&=:string constant (surrounded by double quotes),
<ilb>:\&=:instruction label
::'*' followed by an integer in the range 0..32767.
<pro>:\&=:procedure number ('$' followed by a procedure name)
<val>:\&=:<arg>, <con>, <pro> or <ilb>.
<par>:\&=:<val> or <str>
<...>*:\&=:zero or more of <...>
<...>+:\&=:one or more of <...>
[...]:\&=:optional ...
.TE
.IE
.S3 "Pseudoinstructions"
.S4 Storage declaration
Initialized global data is allocated by the pseudoinstruction CON,
which needs at least one argument.
Each argument is used to allocate and initialize a number of
consequtive bytes in data memory.
The number of bytes to be allocated and the alignment depend on the type
of the argument.
For each argument, an integral number of words,
determined by the argument type, is allocated and initialized.
.P
The pseudoinstruction ROM is the same as CON,
except that it guarantees that the initialized words
will not change during the execution of the program.
This information allows optimizers to do
certain calculations such as array indexing and
subrange checking at compile time instead
of at run time.
.P
The pseudoinstruction BSS allocates
uninitialized global data or large blocks of data initialized
by the same value.
The first argument to this pseudo is the number
of bytes required, which must be a multiple of the wordsize.
The other arguments specify the value used for initialization and
whether the initialization is only for convenience or a strict necessity.
The pseudoinstruction HOL is similar to BSS in that it requests an
(un)initialized global data block.
Addressing of a HOL block, however, is quasi absolute.
The first byte is addressed by 0,
the second byte by 1 etc. in assembly language.
The assembler/loader adds the base address of
the HOL block to these numbers to obtain the
absolute address in the machine language.
.P
The scope of a HOL block starts at the HOL pseudo and
ends at the next HOL pseudo or at the end of a module
whatever comes first.
Each instruction falls in the scope of at most one
HOL block, the current HOL block.
It is not allowed to have more than one HOL block per procedure.
.P
The alignment restrictions are enforced by the
pseudoinstructions.
All initializers are aligned on a multiple of their size or the wordsize
whichever is smaller.
Strings form an exception, they are to be seen as a sequence of initializers
each for one byte, i.e. strings are not padded with zero bytes.
Switching to another type of fragment or placing a label forces
word-alignment.
There are three types of fragments in global data space: CON, ROM and
BSS/HOL.
.N 2
.IS 2
.PS - 4
.PT "BSS <cst1>,<val>,<cst2>"
Reserve <cst1> bytes.
<val> is the value used to initialize the area.
<cst1> must be a multiple of the size of <val>.
<cst2> is 0 if the initialization is not strictly necessary,
1 if it is.
.PT "HOL <cst1>,<val>,<cst2>"
Idem, but all following absolute global data references will
refer to this block.
Only one HOL is allowed per procedure,
it has to be placed before the first instruction.
.PT "CON <val>+"
Assemble global data words initialized with the <val> constants.
.PT "ROM <val>+"
Idem, but the initialized data will never be changed by the program.
.PE
.IE
.S4 Partitioning
Two pseudoinstructions partition the input into procedures:
.IS 2
.PS - 4
.PT "PRO <pro>[,<cst>]"
Start of procedure.
<pro> is the procedure name.
<cst> is the number of bytes for locals.
The number of bytes for locals must be specified in the PRO or
END pseudoinstruction.
When specified in both, they must be identical.
.PT "END [<cst>]"
End of Procedure.
<cst> is the number of bytes for locals.
The number of bytes for locals must be specified in either the PRO or
END pseudoinstruction or both.
.PE
.IE
.S4 Visibility
Names of data and procedures in an EM module can either be
internal or external.
External names are known outside the module and are used to link
several pieces of a program.
Internal names are not known outside the modules they are used in.
Other modules will not 'see' an internal name.
.A
To reduce the number of passes needed,
it must be known at the first occurrence whether
a name is internal or external.
If the first occurrence of a name is in a definition,
the name is considered to be internal.
If the first occurrence of a name is a reference,
the name is considered to be external.
If the first occurrence is in one of the following pseudoinstructions,
the effect of the pseudo has precedence.
.IS 2
.PS - 4
.PT "EXA <dlb>"
External name.
<dlb> is known, possibly defined, outside this module.
Note that <dlb> may be defined in the same module.
.PT "EXP <pro>"
External procedure identifier.
Note that <pro> may be defined in the same module.
.PT "INA <dlb>"
Internal name.
<dlb> is internal to this module and must be defined in this module.
.PT "INP <pro>"
Internal procedure.
<pro> is internal to this module and must be defined in this module.
.PE
.IE
.S4 Miscellaneous
Two other pseudoinstructions provide miscellaneous features:
.IS 2
.PS - 4
.PT "EXC <cst1>,<cst2>"
Two blocks of instructions preceding this one are
interchanged before being processed.
<cst1> gives the number of lines of the first block.
<cst2> gives the number of lines of the second one.
Blank and pure comment lines do not count.
.PT "MES <cst>[,<par>]*"
A special type of comment.
Used by compilers to communicate with the
optimizer, assembler, etc. as follows:
.VS 1 0
.PS - 4
.PT "MES 0"
An error has occurred, stop further processing.
.PT "MES 1"
Suppress optimization.
.PT "MES 2,<cst1>,<cst2>"
Use wordsize <cst1> and pointer size <cst2>.
.PT "MES 3,<cst1>,<cst2>,<cst3>,<cst4>"
Indicates that a local variable is never referenced indirectly.
Used to indicate that a register may be used for a specific
variable.
<cst1> is offset in bytes from AB if positive
and offset from LB if negative.
<cst2> gives the size of the variable.
<cst3> indicates the class of the variable.
The following values are currently recognized:
.PS
.PT 0
The variable can be used for anything.
.PT 1
The variable is used as a loopindex.
.PT 2
The variable is used as a pointer.
.PT 3
The variable is used as a floating point number.
.PE 0
<cst4> gives the priority of the variable,
higher numbers indicate better candidates.
.PT "MES 4,<cst>,<str>"
Number of source lines in file <str> (for profiler).
.PT "MES 5"
Floating point used.
.PT "MES 6,<val>*"
Comment. Used to provide comments in compact assembly language.
.PT "MES 7,....."
Reserved.
.PT "MES 8,<pro>[,<dlb>]..."
Library module. Indicates that the module may only be loaded
if it is useful, that is, if it can satisfy any unresolved
references during the loading process.
May not be preceded by any other pseudo, except MES's.
.PT "MES 9,<cst>"
Guarantees that no more than <cst> bytes of parameters are
accessed, either directly or indirectly.
.PT "MES 10,<cst>[,<par>]*
This message number is reserved for the global optimizer.
It inserts these messages in its output as hints to backends.
<cst> indicates the type of hint.
.PT "MES 11"
Procedures containing this message are possible destinations of
non-local goto's with the GTO instruction.
Some backends keep locals in registers,
the locals in this procedure should not be kept in registers and
all registers containing locals of other procedures should be
saved upon entry to this procedure.
.PE 1
.VS 1 1
Each backend is free to skip irrelevant MES pseudos.
.PE
.IE
.S2 "The Compact Assembly Language"
The assembler accepts input in a highly encoded form.
This
form is intended to reduce the amount of file transport between the
front ends, optimizers
and back ends, and also reduces the amount of storage required for storing
libraries.
Libraries are stored as archived compact assembly language, not machine
language.
.P
When beginning to read the input, the assembler is in neutral state, and
expects either a label or an instruction (including the pseudoinstructions).
The meaning of the next byte(s) when in neutral state is as follows, where
b1, b2
etc. represent the succeeding bytes.
.N 1
.DS
.TS
tab(:) ;
rw17 4 l.
0:Reserved for future use
1-129:Machine instructions, see Appendix A, alphabetical list
130-149:Reserved for future use
150-161:BSS,CON,END,EXA,EXC,EXP,HOL,INA,INP,MES,PRO,ROM
162-179:Reserved for future pseudoinstructions
180-239:Instruction labels 0 - 59 (180 is local label 0 etc.)
240-244:See the Common Table below
245-255:Not used
.TE 1
.DE 0
After a label, the assembler is back in neutral state; it can immediately
accept another label or an instruction in the next byte.
No linefeeds are used to separate lines.
.P
If an opcode expects no arguments,
the assembler is back in neutral state after
reading the one byte containing the instruction number.
If it has one or
more arguments (only pseudos have more than 1), the arguments follow directly,
encoded as follows:
.N 1
.IS 2
.TS
tab(:);
r l.
0-239:Offsets from -120 to 119
240-255:See the Common Table below
.TE 1
Absence of an optional argument is indicated by a special
byte.
.IE 2
.CS
Common Table for Neutral State and Arguments
.CE
.TS
tab(:);
c c s c
l8 l l8 l.
class:bytes:description
<ilb>:240:b1:Instruction label b1 (Not used for branches)
<ilb>:241:b1 b2:16 bit instruction label (256*b2 + b1)
<dlb>:242:b1:Global label .0-.255, with b1 being the label
<dlb>:243:b1 b2:Global label .0-.32767
:::with 256*b2+b1 being the label
<dlb>:244:<string>:Global symbol not of the form .nnn
<cst>:245:b1 b2:16 bit constant
<cst>:246:b1 b2 b3 b4:32 bit constant
<cst>:247:b1 .. b8:64 bit constant
<arg>:248:<dlb><cst>:Global label + (possibly negative) constant
<pro>:249:<string>:Procedure name (not including $)
<str>:250:<string>:String used in CON or ROM (no quotes-no escapes)
<con>:251:<cst><string>:Integer constant, size <cst> bytes
<con>:252:<cst><string>:Unsigned constant, size <cst> bytes
<con>:253:<cst><string>:Floating constant, size <cst> bytes
:254::unused
<end>:255::Delimiter for argument lists or
:::indicates absence of optional argument
.TE 1
.P
The bytes specifying the value of a 16, 32 or 64 bit constant
are presented in two's complement notation, with the least
significant byte first. For example: the value of a 32 bit
constant is ((s4*256+b3)*256+b2)*256+b1, where s4 is b4-256 if
b4 is greater than 128 else s4 takes the value of b4.
A <string> consists of a <cst> inmediatly followed by
a sequence of bytes with length <cst>.
.P
.ne 8
The pseudoinstructions fall into several categories, depending on their
arguments:
.N 1
.DS
Group 1 -- EXC, BSS, HOL have a known number of arguments
Group 2 -- EXA, EXP, INA, INP have a string as argument
Group 3 -- CON, MES, ROM have a variable number of various things
Group 4 -- END, PRO have a trailing optional argument.
.DE 1
Groups 1 and 2
use the encoding described above.
Group 3 also uses the encoding listed above, with an <end> byte after the
last argument to indicate the end of the list.
Group 4 uses
an <end> byte if the trailing argument is not present.
.N 2
.IS 2
.TS
tab(|);
l s l
l s s
l 2 lw(46) l.
Example ASCII|Example compact
(LOC = 69, BRA = 18 here):
2||182
1||181
LOC|10|69 130
LOC|-10|69 110
LOC|300|69 245 44 1
BRA|*19|18 139
300||241 44 1
.3||242 3
CON|4,9,*2,$foo|151 124 129 240 2 249 123 102 111 111 255
CON|.35|151 242 35 255
.TE 0
.IE 0
.BP
.S2 "Assembly language instruction list"
.P
For each instruction in the list the range of argument values
in the assembly language is given.
The column headed \fIassem\fP contains the mnemonics defined
in 11.1.3.
The following column specifies restrictions of the argument
value.
Addresses have to obey the restrictions mentioned in chapter 2.
The classes of arguments
are indicated by letters:
.ds b \fBb\fP
.ds c \fBc\fP
.ds d \fBd\fP
.ds g \fBg\fP
.ds f \fBf\fP
.ds l \fBl\fP
.ds n \fBn\fP
.ds w \fBw\fP
.ds p \fBp\fP
.ds r \fBr\fP
.ds s \fBs\fP
.ds z \fBz\fP
.ds o \fBo\fP
.ds - \fB-\fP
.N 1
.TS
tab(:);
c s l l
l l 15 l l.
\fIassem\fP:constraints:rationale
\&\*c:cst:fits word:constant
\&\*d:cst:fits double word:constant
\&\*l:cst::local offset
\&\*g:arg:>= 0:global offset
\&\*f:cst::fragment offset
\&\*n:cst:>= 0:counter
\&\*s:cst:>0 , word multiple:object size
\&\*z:cst:>= 0 , zero or word multiple:object size
\&\*o:cst:> 0 , word multiple or fraction:object size
\&\*w:cst:> 0 , word multiple:object size *
\&\*p:pro::pro identifier
\&\*b:ilb:>= 0:label number
\&\*r:cst:0,1,2:register number
\&\*-:::no argument
.TE 1
.P
The * at the rationale for \*w indicates that the argument
can either be given as argument or on top of the stack.
If the argument is omitted, the argument is fetched from the
stack;
it is assumed to be a wordsized unsigned integer.
Instructions that check for undefined integer or floating-point
values and underflow or overflow
are indicated below by (*).
.N 1
.DS B
GROUP 1 - LOAD
LOC \*c : Load constant (i.e. push one word onto the stack)
LDC \*d : Load double constant ( push two words )
LOL \*l : Load word at \*l-th local (\*l<0) or parameter (\*l>=0)
LOE \*g : Load external word \*g
LIL \*l : Load word pointed to by \*l-th local or parameter
LOF \*f : Load offsetted (top of stack + \*f yield address)
LAL \*l : Load address of local or parameter
LAE \*g : Load address of external
LXL \*n : Load lexical (address of LB \*n static levels back)
LXA \*n : Load lexical (address of AB \*n static levels back)
LOI \*o : Load indirect \*o bytes (address is popped from the stack)
LOS \*w : Load indirect, \*w-byte integer on top of stack gives object size
LDL \*l : Load double local or parameter (two consecutive words are stacked)
LDE \*g : Load double external (two consecutive externals are stacked)
LDF \*f : Load double offsetted (top of stack + \*f yield address)
LPI \*p : Load procedure identifier
GROUP 2 - STORE
STL \*l : Store local or parameter
STE \*g : Store external
SIL \*l : Store into word pointed to by \*l-th local or parameter
STF \*f : Store offsetted
STI \*o : Store indirect \*o bytes (pop address, then data)
STS \*w : Store indirect, \*w-byte integer on top of stack gives object size
SDL \*l : Store double local or parameter
SDE \*g : Store double external
SDF \*f : Store double offsetted
GROUP 3 - INTEGER ARITHMETIC
ADI \*w : Addition (*)
SBI \*w : Subtraction (*)
MLI \*w : Multiplication (*)
DVI \*w : Division (*)
RMI \*w : Remainder (*)
NGI \*w : Negate (two's complement) (*)
SLI \*w : Shift left (*)
SRI \*w : Shift right (*)
GROUP 4 - UNSIGNED ARITHMETIC
ADU \*w : Addition
SBU \*w : Subtraction
MLU \*w : Multiplication
DVU \*w : Division
RMU \*w : Remainder
SLU \*w : Shift left
SRU \*w : Shift right
GROUP 5 - FLOATING POINT ARITHMETIC
ADF \*w : Floating add (*)
SBF \*w : Floating subtract (*)
MLF \*w : Floating multiply (*)
DVF \*w : Floating divide (*)
NGF \*w : Floating negate (*)
FIF \*w : Floating multiply and split integer and fraction part (*)
FEF \*w : Split floating number in exponent and fraction part (*)
GROUP 6 - POINTER ARITHMETIC
ADP \*f : Add \*f to pointer on top of stack
ADS \*w : Add \*w-byte value and pointer
SBS \*w : Subtract pointers in same fragment and push diff as size \*w integer
GROUP 7 - INCREMENT/DECREMENT/ZERO
INC \*- : Increment word on top of stack by 1 (*)
INL \*l : Increment local or parameter (*)
INE \*g : Increment external (*)
DEC \*- : Decrement word on top of stack by 1 (*)
DEL \*l : Decrement local or parameter (*)
DEE \*g : Decrement external (*)
ZRL \*l : Zero local or parameter
ZRE \*g : Zero external
ZRF \*w : Load a floating zero of size \*w
ZER \*w : Load \*w zero bytes
GROUP 8 - CONVERT (stack: source, source size, dest. size (top))
CII \*- : Convert integer to integer (*)
CUI \*- : Convert unsigned to integer (*)
CFI \*- : Convert floating to integer (*)
CIF \*- : Convert integer to floating (*)
CUF \*- : Convert unsigned to floating (*)
CFF \*- : Convert floating to floating (*)
CIU \*- : Convert integer to unsigned
CUU \*- : Convert unsigned to unsigned
CFU \*- : Convert floating to unsigned
GROUP 9 - LOGICAL
AND \*w : Boolean and on two groups of \*w bytes
IOR \*w : Boolean inclusive or on two groups of \*w bytes
XOR \*w : Boolean exclusive or on two groups of \*w bytes
COM \*w : Complement (one's complement of top \*w bytes)
ROL \*w : Rotate left a group of \*w bytes
ROR \*w : Rotate right a group of \*w bytes
GROUP 10 - SETS
INN \*w : Bit test on \*w byte set (bit number on top of stack)
SET \*w : Create singleton \*w byte set with bit n on (n is top of stack)
GROUP 11 - ARRAY
LAR \*w : Load array element, descriptor contains integers of size \*w
SAR \*w : Store array element
AAR \*w : Load address of array element
GROUP 12 - COMPARE
CMI \*w : Compare \*w byte integers, Push negative, zero, positive for <, = or >
CMF \*w : Compare \*w byte reals
CMU \*w : Compare \*w byte unsigneds
CMS \*w : Compare \*w byte values, can only be used for bit for bit equality test
CMP \*- : Compare pointers
TLT \*- : True if less, i.e. iff top of stack < 0
TLE \*- : True if less or equal, i.e. iff top of stack <= 0
TEQ \*- : True if equal, i.e. iff top of stack = 0
TNE \*- : True if not equal, i.e. iff top of stack non zero
TGE \*- : True if greater or equal, i.e. iff top of stack >= 0
TGT \*- : True if greater, i.e. iff top of stack > 0
GROUP 13 - BRANCH
BRA \*b : Branch unconditionally to label \*b
BLT \*b : Branch less (pop 2 words, branch if top > second)
BLE \*b : Branch less or equal
BEQ \*b : Branch equal
BNE \*b : Branch not equal
BGE \*b : Branch greater or equal
BGT \*b : Branch greater
ZLT \*b : Branch less than zero (pop 1 word, branch negative)
ZLE \*b : Branch less or equal to zero
ZEQ \*b : Branch equal zero
ZNE \*b : Branch not zero
ZGE \*b : Branch greater or equal zero
ZGT \*b : Branch greater than zero
GROUP 14 - PROCEDURE CALL
CAI \*- : Call procedure (procedure identifier on stack)
CAL \*p : Call procedure (with identifier \*p)
LFR \*s : Load function result
RET \*z : Return (function result consists of top \*z bytes)
GROUP 15 - MISCELLANEOUS
ASP \*f : Adjust the stack pointer by \*f
ASS \*w : Adjust the stack pointer by \*w-byte integer
BLM \*z : Block move \*z bytes; first pop destination addr, then source addr
BLS \*w : Block move, size is in \*w-byte integer on top of stack
CSA \*w : Case jump; address of jump table at top of stack
CSB \*w : Table lookup jump; address of jump table at top of stack
DCH \*- : Follow dynamic chain, convert LB to LB of caller
DUP \*s : Duplicate top \*s bytes
DUS \*w : Duplicate top \*w bytes
EXG \*w : Exchange top \*w bytes
FIL \*g : File name (external 4 := \*g)
GTO \*g : Non-local goto, descriptor at \*g
LIM \*- : Load 16 bit ignore mask
LIN \*n : Line number (external 0 := \*n)
LNI \*- : Line number increment
LOR \*r : Load register (0=LB, 1=SP, 2=HP)
LPB \*- : Convert local base to argument base
MON \*- : Monitor call
NOP \*- : No operation
RCK \*w : Range check; trap on error
RTT \*- : Return from trap
SIG \*- : Trap errors to proc identifier on top of stack, -2 resets default
SIM \*- : Store 16 bit ignore mask
STR \*r : Store register (0=LB, 1=SP, 2=HP)
TRP \*- : Cause trap to occur (Error number on stack)
.DE 0

View file

@ -1,163 +0,0 @@
.SN 7
.BP
.S1 "DESCRIPTORS"
Several instructions use descriptors, notably the range check instruction,
the array instructions, the goto instruction and the case jump instructions.
Descriptors reside in data space.
They may be constructed at run time, but
more often they are fixed and allocated in ROM data.
.P
All instructions using descriptors, except GTO, have as argument
the size of the integers in the descriptor.
All implementations have to allow integers of the size of a
word in descriptors.
All integers popped from the stack and used for indexing or comparing
must have the same size as the integers in the descriptor.
.S2 "Range check descriptors"
Range check descriptors consist of two integers:
.IS 2
.PS 1 4 "" .
.PT
lower bound~~~~~~~signed
.PT
upper bound~~~~~~~signed
.PE
.IE
The range check instruction checks an integer on the stack against
these bounds and causes a trap if the value is outside the interval.
The value itself is neither changed nor removed from the stack.
.S2 "Array descriptors"
Each array descriptor describes a single dimension.
For multi-dimensional arrays, several array instructions are
needed to access a single element.
Array descriptors contain the following three integers:
.IS 2
.PS 1 4 "" .
.PT
lower bound~~~~~~~~~~~~~~~~~~~~~signed
.PT
upper bound - lower bound~~~~~~~unsigned
.PT
number of bytes per element~~~~~unsigned
.PE
.IE
The array instructions LAR, SAR and AAR have the pointer to the start
of the descriptor as operand on the stack.
.sp
The element A[I] is fetched as follows:
.IS 2
.PS 1 4 "" .
.PT
Stack the address of A (e.g., using LAE or LAL)
.PT
Stack the value of I (n-byte integer)
.PT
Stack the pointer to the descriptor (e.g., using LAE)
.PT
LAR n (n is the size of the integers in the descriptor and I)
.PE
.IE
All array instructions first pop the address of the descriptor
and the index.
If the index is not within the bounds specified, a trap occurs.
If ok, (I~-~lower bound) is multiplied
by the number of bytes per element (the third word). The result is added
to the address of A and replaces A on the stack.
.A
At this point LAR, SAR and AAR diverge.
AAR is finished. LAR pops the address and fetches the data
item,
the size being specified by the descriptor.
The usual restrictions for memory access must be obeyed.
SAR pops the address and stores the
data item now exposed.
.S2 "Non-local goto descriptors"
The GTO instruction provides a way of returning directly to any
active procedure invocation.
The argument of the instruction is the address of a descriptor
containing three pointers:
.IS 2
.PS 1 4 "" .
.PT
value of PC after the jump
.PT
value of SP after the jump
.PT
value of LB after the jump
.PE
.IE
GTO replaces the loads PC, SP and LB from the descriptor,
thereby jumping to a procedure
and removing zeor or more frames from the stack.
The LB, SP and PC in the descriptor must belong to a
dynamically enclosing procedure,
because some EM implementations will need to backtrack through
the dynamic chain and use the implementation dependent data
in frames to restore registers etc.
.S2 "Case descriptors"
The case jump instructions CSA and CSB both
provide multiway branches selected by a case index.
Both fetch two operands from the stack:
first a pointer to the low address of the case descriptor
and then the case index.
CSA uses the case index as index in the descriptor table, but CSB searches
the table for an occurrence of the case index.
Therefore, the descriptors for CSA and CSB,
as shown in figure 4, are different.
All pointers in the table must be addresses of instructions in the
procedure executing the case instruction.
.P
CSA selects the new PC by indexing.
If the index, a signed integer, is greater than or equal to
the lower bound and less than or equal to the upper bound,
then fetch the new PC from the list of instruction pointers by indexing with
index-lower.
The table does not contain the value of the upper bound,
but the value of upper-lower as an unsigned integer.
The default instruction pointer is used when the index is out of bounds.
If the resulting PC is 0, then trap.
.P
CSB selects the new PC by searching.
The table is searched for an entry with index value equal to the case index.
That entry or, if none is found, the default entry contains the
new PC.
When the resulting PC is 0, a trap is performed.
.P
The choice of which case instruction to use for
each source language case statement
is up to the front end.
If the range of the index value is dense, i.e
.DS
(highest value - lowest value) / number of cases
.DE 1
is less than some threshold, then CSA is the obvious choice.
If the range is sparse, CSB is better.
.N 2
.DS
|--------------------| |--------------------| high address
| pointer for upb | | pointer n-1 |
|--------------------| |- - - - - - - |
| . | | index n-1 |
| . | |--------------------|
| . | | . |
| . | | . |
| . | | . |
| . | |--------------------|
| . | | pointer 1 |
|--------------------| |- - - - - - - |
| pointer for lwb+1 | | index 1 |
|--------------------| |--------------------|
| pointer for lwb | | pointer 0 |
|--------------------| |- - - - - - - |
| upper - lower | | index 0 |
|--------------------| |--------------------|
| lower bound | | number of entries |
|--------------------| |--------------------|
| default pointer | | default pointer | low address
|--------------------| |--------------------|
CSA descriptor CSB descriptor
Figure 4. Descriptor layout for CSA and CSB
.DE

View file

@ -1,377 +0,0 @@
.BP
.SN 4
.S1 "DATA ADDRESS SPACE"
The data address space is divided into three parts, called 'areas',
each with its own addressing method:
global data area,
local data area (including the stack),
and heap data area.
These data areas must be part of the same
address space because all data is accessed by
the same type of pointers.
.P
Space for global data is reserved using several pseudoinstructions in the
assembly language, as described in
the next paragraph and chapter 11.
The size of the global data area is fixed per program.
.A
Global data is addressed absolutely in the machine language.
Many instructions are available to address global data.
They all have an absolute address as argument.
Examples are LOE, LAE and STE.
.P
Part of the global data area is initialized by the
compiler, the
rest is not initialized at all or is initialized
with a value, typically -32768 or 0.
Part of the initialized global data may be made read-only
if the implementation supports protection.
.P
The local data area is used as a stack,
which grows from high to low addresses
and contains some data for each active procedure
invocation, called a 'frame'.
The size of the local data area varies dynamically during
execution.
Below the current procedure frame resides the operand stack.
The stack pointer SP always points to the bottom of
the local data area.
Local data is addressed by offsetting from the local base pointer LB.
LB always points to the frame of the current procedure.
Only the words of the current frame and the parameters
can be addressed directly.
Variables in other active procedures are addressed by following
the chain of statically enclosing procedures using the LXL or LXA instruction.
The variables in dynamically enclosing procedures can be
addressed with the use of the DCH instruction.
.A
Many instructions have offsets to LB as argument,
for instance LOL, LAL and STL.
The arguments of these instructions range from -1 to some
(negative) minimum
for the access of local storage and from 0 to some (positive)
maximum for parameter access.
.P
The procedure call instructions CAL and CAI each create a new frame
on the stack.
Each procedure has an assembly-time parameter specifying
the number of bytes needed for local storage.
This storage is allocated each time the procedure is called and
must be a multiple of the wordsize.
Each procedure, therefore, starts with a stack with the local variables
already allocated.
The return instructions RET and RTT remove a frame.
The actual parameters must be removed by the calling procedure.
.P
RET may copy some words from the stack of
the returning procedure to an unnamed 'function return area'.
This area is available for 'READ-ONCE' access using the LFR instruction.
The result of a LFR is only defined if the size used to fetch
is identical to the size used in the last return.
The instruction ASP, used to remove the parameters from the
stack, the branch instruction BRA and the non-local goto
instrucion GTO are the only ones that leave the contents of
the 'function return area' intact.
All other instructions are allowed to destroy the function
return area.
Thus parameters can be popped before fetching the function result.
The maximum size of all function return areas is
implementation dependent,
but should allow procedure instance identifiers and all
implemented objects of type integer, unsigned, float
and pointer to be returned.
In most implementations
the maximum size of the function return
area is twice the pointer size,
because we want to be able to handle 'procedure instance
identifiers' which consist of a procedure identifier and the LB
of a frame belonging to that procedure.
.P
The heap data area grows upwards, to higher numbered
addresses.
It is initially empty.
The initial value of the heap pointer HP
marks the low end.
The heap pointer may be manipulated
by the LOR and STR instructions.
The heap can only be addressed indirectly,
by pointers derived from previous values of HP.
.S2 "Global data area"
The initial size of the global data area is determined at assembly time.
Global data is allocated by several
pseudoinstructions in the EM assembly
language.
Each pseudoinstruction allocates one or more bytes.
The bytes allocated for a single pseudo form
a 'block'.
A block differs from a fragment, because,
under certain conditions, several blocks are allocated
in a single fragment.
This guarantees that the bytes of these blocks
are consecutive.
.P
Global data is addressed absolutely in binary
machine language.
Most compilers, however,
cannot assign absolute addresses to their global variables,
especially not if the language
allows programs to be composed of several separately compiled modules.
The assembly language therefore allows the compiler to name
the first address of a global data block with an alphanumeric label.
Moreover, the only way to address such a named global data block
in the assembly language is by using its name.
It is the task of the assembler/loader to
translate these labels into absolute addresses.
These labels may also be used
in CON and ROM pseudoinstructions to initialize pointers.
.P
The pseudoinstruction CON allocates initialized data.
ROM acts like CON but indicates that the initialized data will
not change during execution of the program.
The pseudoinstruction BSS allocates a block of uninitialized
or identically initialized
data.
The pseudoinstruction HOL is similar to BSS,
but it alters the meaning of subsequent absolute addressing in
the assembly language.
.P
Another type of global data is a small block,
called the ABS block, with an implementation defined size.
Storage in this type of block can only be addressed
absolutely in assembly language.
The first word has address 0 and is used to maintain the
source line number.
Special instructions LIN and LNI are provided to
update this counter.
A pointer at location 4 points to a string containing the
current source file name.
The instruction FIL can be used to update the pointer.
.P
All numeric arguments of the instructions that address
the global data area refer to locations in the
ABS block unless
they are preceded by at least one HOL pseudo in the same
module,
in which case they refer to the storage area allocated by the
last HOL pseudoinstruction.
Thus LOE 0 loads the zeroth word of the most recent HOL, unless no HOL has
appeared in the current file so
far, in which case it loads the zeroth word of the
ABS fragment.
.P
The global data area is highly fragmented.
The ABS block and each HOL and BSS block are separate fragments.
The way fragments are formed from CON and ROM blocks is more complex.
The assemblers group several blocks into a single fragment.
A fragment only contains blocks of the same type: CON or ROM.
It is guaranteed that the bytes allocated for two consecutive CON pseudos are
allocated consecutively in a single fragment, unless
these CON pseudos are separated in the assembly language program
by a data label definition or one or more of the following pseudos:
.DS
ROM, BSS, HOL and END
.DE
An analogous rule holds for ROM pseudos.
.S2 "Local data area"
The local data area consists of a sequence of frames, one for
each active procedure.
Below the frame of the current procedure resides the
expression stack.
Frames are generated by procedure calls and are
removed by procedure returns.
A procedure frame consists of six 'zones':
.DS
1. The return status block
2. The local variables and compiler temporaries
3. The register save block
4. The dynamic local generators
5. The operand stack.
6. The parameters of a procedure one level deeper
.DE
A sample frame is shown in Figure 1.
.P
Before a procedure call is performed the actual
parameters are pushed onto the stack of the calling procedure.
The exact details are compiler dependent.
EM allows procedures to be called with a variable number of
parameters.
The implementation of the C-language almost forces its runtime
system to push the parameters in reverse order, that is,
the first positional parameter last.
Most compilers use the C calling convention to be compatible.
The parameters of a procedure belong to the frame of the
calling procedure.
Note that the evaluation of the actual parameters may imply
the calling of procedures.
The parameters can be accessed with certain instructions using
offsets of 0 and greater.
The first byte of the last parameter pushed has offset 0.
Note that the parameter at offset 0 has a special use in the
instructions following the static chain (LXL and LXA).
These instructions assume that this parameter contains the LB of
the statically enclosing procedure.
Procedures that do not have a dynamically enclosing procedure
do not need a static link at offset 0.
.P
Two instructions are available to perform procedure calls, CAL
and CAI.
Several tasks are performed by these call instructions.
.A
First, a part of the status of the calling procedure is
saved on the stack in the return status block.
This block should contain the return address of the calling
procedure, its LB and other implementation dependent data.
The size of this block is fixed for any given implementation
because the lexical instructions LPB, LXL and LXA must be able to
obtain the base addresses of the procedure parameters \fBand\fP local
variables.
An alternative solution can be used on machines with a highly
segmented address space.
The stack frames need not be contiguous then and the first
status save area can contain the parameter base AB,
which has the value of SP just after the last parameter has
been pushed.
.A
Second, the LB is changed to point to the
first word above the local variables.
The new LB is a copy of the SP after the return status
block has been pushed.
.A
Third, the amount of local storage needed by the procedure is
reserved.
The parameters and local storage are accessed by the same instructions.
Negative offsets are used for access to local variables.
The highest byte, that is the byte nearest
to LB, has to be accessed with offset -1.
The pseudoinstruction specifying the entry point of a
procedure, has an argument that specifies the amount of local
storage needed.
The local variables allocated by the CAI or CAL instructions
are the only ones that can be accessed with a fixed negative offset.
The initial value of the allocated words is
not defined, but implementations that check for undefined
values will probably initialize them with a
special 'undefined' pattern, typically -32768.
.A
Fourth, any EM implementation is allowed to reserve a variable size
block beneath the local variables.
This block could, for example, be used to save a variable number
of registers.
.A
Finally, the address of the entry point of the called procedure
is loaded into the Program Counter.
.P
The ASP instruction can be used to allocate further (dynamic)
local storage.
The base address of such storage must be obtained with a LOR~SP
instruction.
This same instruction ASP may also be used
to remove some words from the stack.
.P
There is a version of ASP, called ASS, which fetches the number
of bytes to allocate from the stack.
It can be used to allocate space for local
objects whose size is unknown at compile time,
so called 'dynamic local generators'.
.P
Control is returned to the calling procedure with a RET instruction.
Any return value is then copied to the 'function return area'.
The frame created by the call is deallocated and the status of
the calling procedure is restored.
The value of SP just after the return value has been popped must
be the same as the
value of SP just before executing the first instruction of this
invocation.
This means that when a RET is executed the operand stack can
only contain the return value and all dynamically generated locals must be
deallocated.
Violating this restriction might result in hard to detect
errors.
The calling procedure has to remove the parameters from the stack.
This can be done with the aforementioned ASP instruction.
.P
Each procedure frame is a separate fragment.
Because any fragment may be placed anywhere in memory,
procedure frames need not be contiguous.
.DS
|===============================|
| actual parameter n-1 |
|-------------------------------|
| . |
| . |
| . |
|-------------------------------|
| actual parameter 0 | ( <- AB )
|===============================|
|===============================|
|///////////////////////////////|
|///// return status block /////|
|///////////////////////////////| <- LB
|===============================|
| |
| local variables |
| |
|-------------------------------|
| |
| compiler temporaries |
| |
|===============================|
|///////////////////////////////|
|///// register save block /////|
|///////////////////////////////|
|===============================|
| |
| dynamic local generators |
| |
|===============================|
| operand |
|-------------------------------|
| operand |
|===============================|
| parameter m-1 |
|-------------------------------|
| . |
| . |
| . |
|-------------------------------|
| parameter 0 | <- SP
|===============================|
Figure 1. A sample procedure frame and parameters.
.DE
.S2 "Heap data area"
The heap area starts empty, with HP
pointing to the low end of it.
HP always contains a word address.
A copy of HP can always be obtained with the LOR instruction.
A new value may be stored in the heap pointer using the STR instruction.
If the new value is greater than the old one,
then the heap grows.
If it is smaller, then the heap shrinks.
HP may never point below its original value.
All words between the current HP and the original HP
are allocated to the heap.
The heap may not grow into a part of memory that is already allocated
for the stack.
When this is attempted, the STR instruction will cause a trap to occur.
.P
The only way to address the heap is indirectly.
Whenever an object is allocated by increasing HP,
then the old HP value must be saved and can be used later to address
the allocated object.
If, in the meantime, HP is decreased so that the object
is no longer part of the heap, then an attempt to access
the object is not allowed.
Furthermore, if the heap pointer is increased again to above
the object address, then access to the old object gives undefined results.
.P
The heap is a single fragment.
All bytes have consecutive addresses.
No limits are imposed on the size of the heap as long as it fits
in the available data address space.

File diff suppressed because it is too large Load diff

View file

@ -1,9 +0,0 @@
main() {
register int l,j ;
for ( j=0 ; (l=getchar()) != -1 ; j++ ) {
if ( j%16 == 15 ) printf("%3d\n",l&0377 ) ;
else printf("%3d ",l&0377 ) ;
}
printf("\n") ;
}

View file

@ -1,178 +0,0 @@
mes 2,2,2 ; wordsize 2, pointersize 2
.1
rom 't.p\000' ; the name of the source file
hol 552,-32768,0 ; externals and buf occupy 552 bytes
exp $sum ; sum can be called from other modules
pro $sum,2 ; procedure sum; 2 bytes local storage
lin 8 ; code from source line 8
ldl 0 ; load two locals ( a and b )
adi 2 ; add them
ret 2 ; return the result
end 2 ; end of procedure ( still two bytes local storage )
.2
rom 1,99,2 ; descriptor of array a[]
exp $test ; the compiler exports all level 0 procedures
pro $test,226 ; procedure test, 226 bytes local storage
.3
rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in
.4 ; global storage
rom 0.5F8 ; same for 0.5
mes 3,-226,2,2 ; compiler temporary not referenced indirect
mes 3,-24,2,0 ; the same is true for i, j, b and c in test
mes 3,-22,2,0
mes 3,-4,2,0
mes 3,-2,2,0
mes 3,-20,8,0 ; and for x and y
mes 3,-12,8,0
lin 20 ; maintain source line number
loc 1
stl -4 ; j := 1
lni ; was lin 21 prior to optimization
lol -4
loc 3
mli 2
loc 6
adi 2
stl -2 ; i := 3 * j + 6
lni ; was lin 22 prior to optimization
lae .3
loi 8
lal -12
sti 8 ; x := 4.8
lni ; was lin 23 prior to optimization
lal -12
loi 8
lae .4
loi 8
dvf 8
lal -20
sti 8 ; y := x / 0.5
lni ; was lin 24 prior to optimization
loc 1
stl -22 ; b := true
lni ; was lin 25 prior to optimization
loc 122
stl -24 ; c := 'z'
lni ; was lin 26 prior to optimization
loc 1
stl -2 ; for i:= 1
2
lol -2
dup 2
mli 2 ; i*i
lal -224
lol -2
lae .2
sar 2 ; a[i] :=
lol -2
loc 100
beq *3 ; to 100 do
inl -2 ; increment i and loop
bra *2
3
lin 27
lol -4
loc 27
adi 2 ; j + 27
sil 0 ; r.r1 :=
lni ; was lin 28 prior to optimization
lol -22 ; b
lol 0
stf 10 ; r.r3 :=
lni ; was lin 29 prior to optimization
lal -20
loi 16
adf 8 ; x + y
lol 0
adp 2
sti 8 ; r.r2 :=
lni ; was lin 23 prior to optimization
lal -224
lol -4
lae .2
lar 2 ; a[j]
lil 0 ; r.r1
cal $sum ; call now
asp 4 ; remove parameters from stack
lfr 2 ; get function result
stl -2 ; i :=
4
lin 31
lol -2
zle *5 ; while i > 0 do
lol -4
lil 0
adi 2
stl -4 ; j := j + r.r1
del -2 ; i := i - 1
bra *4 ; loop
5
lin 32
lol 0
stl -226 ; make copy of address of r
lol -22
lol -226
stf 10 ; r3 := b
lal -20
loi 16
adf 8
lol -226
adp 2
sti 8 ; r2 := x + y
loc 0
sil -226 ; r1 := 0
lin 34 ; note the abscence of the unnecesary jump
lae 22 ; address of output structure
lol -4
cal $_wri ; write integer with default width
asp 4 ; pop parameters
lae 22
lol -2
loc 6
cal $_wsi ; write integer width 6
asp 6
lae 22
lal -12
loi 8
loc 9
loc 3
cal $_wrf ; write fixed format real, width 9, precision 3
asp 14
lae 22
lol -22
cal $_wrb ; write boolean, default width
asp 4
lae 22
cal $_wln ; writeln
asp 2
ret 0 ; return, no result
end 226
exp $_main
pro $_main,0 ; main program
.6
con 2,-1,22 ; description of external files
.5
rom 15.96F8
fil .1 ; maintain source file name
lae .6 ; description of external files
lae 0 ; base of hol area to relocate buffer addresses
cal $_ini ; initialize files, etc...
asp 4
lin 37
lae .5
loi 8
lae 2
sti 8 ; x := 15.9
lni ; was lin 38 prior to optimization
loc 99
ste 0 ; mi := 99
lni ; was lin 39 prior to optimization
lae 10 ; address of r
cal $test
asp 2
loc 0 ; normal exit
cal $_hlt ; cleanup and finish
asp 2
end 0
mes 4,40 ; length of source file is 40 lines
mes 5 ; reals were used

View file

@ -1,40 +0,0 @@
program example(output);
{This program just demonstrates typical EM code.}
type rec = record r1: integer; r2:real; r3: boolean end;
var mi: integer; mx:real; r:rec;
function sum(a,b:integer):integer;
begin
sum := a + b
end;
procedure test(var r: rec);
label 1;
var i,j: integer;
x,y: real;
b: boolean;
c: char;
a: array[1..100] of integer;
begin
j := 1;
i := 3 * j + 6;
x := 4.8;
y := x/0.5;
b := true;
c := 'z';
for i:= 1 to 100 do a[i] := i * i;
r.r1 := j+27;
r.r3 := b;
r.r2 := x+y;
i := sum(r.r1, a[j]);
while i > 0 do begin j := j + r.r1; i := i - 1 end;
with r do begin r3 := b; r2 := x+y; r1 := 0 end;
goto 1;
1: writeln(j, i:6, x:9:3, b)
end; {test}
begin {main program}
mx := 15.96;
mi := 99;
test(r)
end.

View file

@ -1,32 +0,0 @@
CFLAGS=-O
HOME=../../..
install \
all: em emdmp tables
tables: mktables $(HOME)/util/ass/ip_spec.t
mktables $(HOME)/util/ass/ip_spec.t tables
mktables: mktables.c $(HOME)/h/em_spec.h $(HOME)/h/em_flag.h \
$(HOME)/util/data/em_data.a $(HOME)/util/ass/ip_spec.h
cc -O -o mktables mktables.c $(HOME)/util/data/em_data.a
em.out: em.p
apc -mint -O em.p >emerrs ; mv e.out em.out
em: em.p
apc -O -i em.p >emerrs ; mv a.out em
nem.p: em.p
sed -e '/maxadr = t16/s//maxadr =t15/' -e '/maxdata = 8191; /s//maxdata = 14335;/' -e '/ adr=.*long/s// adr= 0..maxadr/' <em.p >nem.p
nem: nem.p
apc -O -i nem.p >emerrs ; mv a.out nem
emdmp: emdmp.c
cc -o emdmp -O emdmp.c
cmp:
pr:
@pr em.p mktables.c emdmp.c

View file

@ -1,5 +0,0 @@
This interpreter is meant for inclusion in the EM manual.
Although slow, it showed decent behaviour on several tests.
The only monitor calls implemented are exit, read(untested),
write and ioctl - just reurns the correct code for telling it's
a terminal -

File diff suppressed because it is too large Load diff

View file

@ -1,210 +0,0 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/* Author: E.G. Keizer */
/* Print a readable version of the data in the post mortem dump */
/* dmpc [-s] [-dn,m] [file] */
#include "/usr/em/h/local.h"
#include <stdio.h>
#include <ctype.h>
int dflag = 0 ;
long l_low,l_high;
int sflag;
int wsize,asize;
long tsize,dsize;
long ignmask,uerrorproc,cause;
long pc,sp,lb,hp,pd,pb;
char *cstr[] = {
"Array bound error",
"Range bound error",
"Set error",
"Integer overflow",
"Float overflow",
"Float underflow",
"Divide by 0",
"Divide by 0.0",
"Integer undefined",
"Float undefined",
"Conversion error",
"User error 11",
"User error 12",
"User error 13",
"User error 14",
"User error 15",
"Stack overflow",
"Heap overflow",
"Illegal instruction",
"Illegal size parameter",
"Case error",
"Memory fault",
"Illegal pointer",
"Illegal pc",
"Bad argument of LAE",
"Bad monitor call",
"Bad line number",
"GTO descriptor error"
};
FILE *fcore;
char *core = "core" ;
int nbyte=0;
char *pname;
int readbyte();
int read2();
long readaddr();
long readword();
unsigned getbyte();
long getword();
long getaddr();
main(argc,argv) char **argv;
{
register i ;
long line,fileaddr;
char tok ;
scanargs(argc,argv); fcore=fopen(core,"r") ;
if ( fcore==NULL ) fatal("Can't open %s",core) ;
if ( read2()!=010255 ) fatal("not a post mortem dump");
if ( read2()!=VERSION ) fatal("wrong version dump file");
wsize=read2(); asize=read2();
if ( wsize>4 ) fatal("cannot handle word size %d",wsize) ;
if ( asize>4 ) fatal("cannot handle pointer size %d",asize) ;
tsize=readaddr(); dsize=readaddr();
ignmask=readaddr(); uerrorproc=readaddr(); cause=readaddr();
pc=readaddr(); sp=readaddr(); lb=readaddr(); hp=readaddr();
pd=readaddr(); pb=readaddr();
if ( sflag==0 ) {
line=getword(0L);
fileaddr=getaddr(4L);
if ( fileaddr ) {
for ( i=0 ; i<40 ; i++ ) {
tok=getbyte(fileaddr++) ;
if ( !isprint(tok) ) break ;
putc(tok,stdout);
}
printf(" ");
}
if ( line ) {
printf("line %D",line) ;
}
if ( fileaddr || line ) printf(", ");
fseek(fcore,512L,0);
if ( cause>27 ) {
printn("cause",cause) ;
} else {
prints("cause",cstr[(int)cause]);
}
printn("pc",pc);printn("sp",sp);printn("lb",lb);
printn("hp",hp);
if ( pd ) printn("pd",pd) ;
if ( pb ) printn("pb",pb) ;
printn("errproc",uerrorproc) ;
printn("ignmask",ignmask) ;
if ( tsize ) printn("Text size",tsize) ;
if ( dsize ) printn("Data size",dsize) ;
}
if ( dflag==0 ) return 0;
fatal("d-flag not implemeted (yet)");
return 1 ;
}
scanargs(argc,argv) char **argv ; {
pname=argv[0];
while ( argv++, argc-- > 1 ) {
switch( argv[0][0] ) {
case '-': switch( argv[0][1] ) {
case 's': sflag++ ; break ;
case 'l': dflag++ ; break ;
default : fatal(": [-s] [-ln.m] [file]") ;
} ;
break ;
default :core=argv[0] ;
}
}
}
prints(s1,s2) char *s1,*s2; {
printf("%-15s %s\n",s1,s2);
}
printn(s1,d) char *s1; long d; {
printf("%-15s %15ld\n",s1,d);
}
/* VARARGS1 */
fatal(s1,p1,p2,p3,p4,p5) char *s1 ; {
fprintf(stderr,"%s: ",pname);
fprintf(stderr,s1,p1,p2,p3,p4,p5) ;
fprintf(stderr,"\n") ;
exit(1) ;
}
int getb() {
int i ;
i=getc(fcore) ;
if ( i==EOF ) fatal("Premature EOF");
return i&0377 ;
}
int read2() {
int i ;
i=getb() ; return getb()*256 + i ;
}
long readaddr() {
long res ;
register int i ;
res=0 ;
for (i=0 ; i<asize ; i++ ) res |= getb()<<(8*i) ;
return res ;
}
long readword() {
long res ;
register int i ;
res=0 ;
for (i=0 ; i<wsize ; i++ ) res |= getb()<<(8*i) ;
return res ;
}
unsigned getbyte(a) long a ; {
fseek(fcore,a+512,0) ;
return getb() ;
}
long getword(a) long a ; {
fseek(fcore,a+512,0) ;
return readword() ;
}
long getaddr(a) long a ; {
fseek(fcore,a+512,0) ;
return readaddr() ;
}

View file

@ -1,244 +0,0 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/* Author: E.G. Keizer */
#include <stdio.h>
#include "/usr/em/util/ass/ip_spec.h"
#include "/usr/em/h/em_spec.h"
#include "/usr/em/h/em_flag.h"
/* This program reads the human readable interpreter specification
and produces a efficient machine representation that can be
translated by a C-compiler.
*/
#define ESCAP 256
int nerror = 0 ;
int atend = 0 ;
int line = 1 ;
int maxinsl= 0 ;
extern char em_mnem[][4] ;
char esca[] = "escape" ;
#define ename(no) ((no)==ESCAP?esca:em_mnem[(no)])
extern char em_flag[] ;
main(argc,argv) char **argv ; {
if ( argc>1 ) {
if ( freopen(argv[1],"r",stdin)==NULL) {
fatal("Cannot open %s",argv[1]) ;
}
}
if ( argc>2 ) {
if ( freopen(argv[2],"w",stdout)==NULL) {
fatal("Cannot create %s",argv[2]) ;
}
}
if ( argc>3 ) {
fatal("%s [ file [ file ] ]",argv[0]) ;
}
atend=0 ;
readin();
atend=1 ;
return nerror ;
}
readin() {
char *ident();
char *firstid ;
int opcode,flags;
int c;
while ( !feof(stdin) ) {
firstid=ident() ;
if ( *firstid=='\n' || feof(stdin) ) continue ;
opcode = getmnem(firstid) ;
printf("%d ",opcode+1) ;
flags = decflag(ident(),opcode) ;
switch(em_flag[opcode]&EM_PAR) {
case PAR_D: case PAR_F: case PAR_B: case PAR_L: case PAR_C:
putchar('S') ;
}
putchar(' ');
while ( (c=readchar())!='\n' && c!=EOF ) putchar(c) ;
putchar('\n') ;
}
}
char *ident() {
/* skip spaces and tabs, anything up to space,tab or eof is
a identifier.
Anything from # to end-of-line is an end-of-line.
End-of-line is an identifier all by itself.
*/
static char array[200] ;
register int c ;
register char *cc ;
do {
c=readchar() ;
} while ( c==' ' || c=='\t' ) ;
for ( cc=array ; cc<&array[(sizeof array) - 1] ; cc++ ) {
if ( c=='#' ) {
do {
c=readchar();
} while ( c!='\n' && c!=EOF ) ;
}
*cc = c ;
if ( c=='\n' && cc==array ) break ;
c=readchar() ;
if ( c=='\n' ) {
pushback(c) ;
break ;
}
if ( c==' ' || c=='\t' || c==EOF ) break ;
}
*++cc=0 ;
return array ;
}
int getmnem(str) char *str ; {
char (*ptr)[4] ;
for ( ptr = em_mnem ; *ptr<= &em_mnem[sp_lmnem][0] ; ptr++ ) {
if ( strcmp(*ptr,str)==0 ) return (ptr-em_mnem) ;
}
error("Illegal mnemonic") ;
return 0 ;
}
error(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; {
if ( !atend ) fprintf(stderr,"line %d: ",line) ;
fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ;
fprintf(stderr,"\n");
nerror++ ;
}
mess(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; {
if ( !atend ) fprintf(stderr,"line %d: ",line) ;
fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ;
fprintf(stderr,"\n");
}
fatal(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; {
error(str,a1,a2,a3,a4,a5,a6) ;
exit(1) ;
}
#define ILLGL -1
check(val) int val ; {
if ( val!=ILLGL ) error("Illegal flag combination") ;
}
int decflag(str,opc) char *str ; {
int type ;
int escape ;
int range ;
int wordm ;
int notzero ;
char c;
type=escape=range=wordm=notzero= ILLGL ;
while ( c= *str++ ) {
switch ( c ) {
case 'm' :
check(type) ; type=OPMINI ; break ;
case 's' :
check(type) ; type=OPSHORT ; break ;
case '-' :
check(type) ; type=OPNO ;
if ( (em_flag[opc]&EM_PAR)==PAR_W ) c='i' ;
break ;
case '1' :
check(type) ; type=OP8 ; break ;
case '2' :
check(type) ; type=OP16 ; break ;
case '4' :
check(type) ; type=OP32 ; break ;
case '8' :
check(type) ; type=OP64 ; break ;
case 'e' :
check(escape) ; escape=0 ; break ;
case 'N' :
check(range) ; range= 2 ; break ;
case 'P' :
check(range) ; range= 1 ; break ;
case 'w' :
check(wordm) ; wordm=0 ; break ;
case 'o' :
check(notzero) ; notzero=0 ; break ;
default :
error("Unknown flag") ;
}
putchar(c);
}
if ( type==ILLGL ) error("Type must be specified") ;
switch ( type ) {
case OP64 :
case OP32 :
if ( escape!=ILLGL ) error("Conflicting escapes") ;
escape=ILLGL ;
case OP16 :
case OP8 :
case OPSHORT :
case OPNO :
if ( notzero!=ILLGL ) mess("Improbable OPNZ") ;
if ( type==OPNO && range!=ILLGL ) {
mess("No operand in range") ;
}
}
if ( escape!=ILLGL ) type|=OPESC ;
if ( wordm!=ILLGL ) type|=OPWORD ;
switch ( range) {
case ILLGL : type|=OP_BOTH ; break ;
case 1 : type|=OP_POS ; break ;
case 2 : type|=OP_NEG ; break ;
}
if ( notzero!=ILLGL ) type|=OPNZ ;
return type ;
}
static int pushchar ;
static int pushf ;
int readchar() {
int c ;
if ( pushf ) {
pushf=0 ;
c = pushchar ;
} else {
if ( feof(stdin) ) return EOF ;
c=getc(stdin) ;
}
if ( c=='\n' ) line++ ;
return c ;
}
pushback(c) {
if ( pushf ) {
fatal("Double pushback") ;
}
pushf++ ;
pushchar=c ;
if ( c=='\n' ) line-- ;
}

View file

@ -1,180 +0,0 @@
.BP
.S1 "INTRODUCTION"
EM is a family of intermediate languages designed for producing
portable compilers.
The general strategy is for a program called
.B front end
to translate the source program to EM.
Another program,
.B back
.BW end
translates EM to target assembly language.
Alternatively, the EM code can be assembled to a binary form
and interpreted.
These considerations led to the following goals:
.IS 2 10
.PS 1 4
.PT
The design should allow translation to,
or interpretation on, a wide range of existing machines.
Design decisions should be delayed as far as possible
and the implications of these decisions should
be localized as much as possible.
.N
The current microcomputer technology offers 8, 16 and 32 bit machines
with various sizes of address space.
EM should be flexible enough to be useful on most of these
machines.
The differences between the members of the EM family should only
concern the wordsize and address space size.
.PT
The architecture should ease the task of code generation for
high level languages such as Pascal, C, Ada, Algol 68, BCPL.
.PT
The instruction set used by the interpreter should be compact,
to reduce the amount of memory needed
for program storage, and to reduce the time needed to transmit
programs over communication lines.
.PT
It should be designed with microprogrammed implementations in
mind; in particular, the use of many short fields within
instruction opcodes should be avoided, because their extraction by the
microprogram or conversion to other instruction formats is inefficient.
.PE
.IE
.A
The basic architecture is based on the concept of a stack. The stack
is used for procedure return addresses, actual parameters, local variables,
and arithmetic operations.
There are several built-in object types,
for example, signed and unsigned integers,
floating point numbers, pointers and sets of bits.
There are instructions to push and pop objects
to and from the stack.
The push and pop instructions are not typed.
They only care about the size of the objects.
For each built-in type there are
reverse Polish type instructions that pop one or more
objects from the top of
the stack, perform an operation, and push the result back onto the
stack.
For all types except pointers,
these instructions have the object size
as argument.
.P
There are no visible general registers used for arithmetic operands
etc. This is in contrast to most third generation computers, which usually
have 8 or 16 general registers. The decision not to have a group of
general registers was fully intentional, and follows W.L. Van der
Poel's dictum that a machine should have 0, 1, or an infinite
number of any feature. General registers have two primary uses: to hold
intermediate results of complicated expressions, e.g.
.IS 5 0 1
((a*b + c*d)/e + f*g/h) * i
.IE 1
and to hold local variables.
.P
Various studies
have shown that the average expression has fewer than two operands,
making the former use of registers of doubtful value. The present trend
toward structured programs consisting of many small
procedures greatly reduces the value of registers to hold local variables
because the large number of procedure calls implies a large overhead in
saving and restoring the registers at every call.
.BP
.P
Although there are no general purpose registers, there are a
few internal registers with specific functions as follows:
.IS 2
.N 1
.TS
tab(:);
l 1 l l.
PC:-:Program Counter:Pointer to next instruction
LB:-:Local Base:Points to base of the local variables \
in the current procedure.
SP:-:Stack Pointer:Points to the highest occupied word on the stack.
HP:-:Heap Pointer:Points to the top of the heap area.
.TE 1
.IE
.A
Furthermore, reverse Polish code is much easier to generate than
multi-register machine code, especially if highly efficient code is
desired.
When translating to assembly language the back end can make
good use of the target machine's registers.
An EM machine can
achieve high performance by keeping part of the stack
in high speed storage (a cache or microprogram scratchpad memory) rather
than in primary memory.
.P
Again according to van der Poel's dictum,
all EM instructions have zero or one argument.
We believe that instructions needing two arguments
can be split into two simpler ones.
The simpler ones can probably be used in other
circumstances as well.
Moreover, these two instructions together often
have a shorter encoding than the single
instruction before.
.P
This document describes EM at three different levels:
the abstract level, the assembly language level and
the machine language level.
.A
The most important level is that of the abstract EM architecture.
This level deals with the basic design issues.
Only the functional capabilities of instructions are relevant, not their
format or encoding.
Most chapters of this document refer to the abstract level
and it is explicitly stated whenever
another level is described.
.A
The assembly language is intended for the compiler writer.
It presents a more or less orthogonal instruction
set and provides symbolic names for data.
Moreover, it facilitates the linking of
separately compiled 'modules' into a single program
by providing several pseudoinstructions.
.A
The machine language is designed for interpretation with a compact
program text and easy decoding.
The binary representation of the machine language instruction set is
far from orthogonal.
Frequent instructions have a short opcode.
The encoding is fully byte oriented.
These bytes do not contain small bit fields, because
bit fields would slow down decoding considerably.
.P
A common use for EM is for producing portable (cross) compilers.
When used this way, the compilers produce
EM assembly language as their output.
To run the compiled program on the target machine,
the back end, translates the EM assembly language to
the target machine's assembly language.
When this approach is used, the format of the EM
machine language instructions is irrelevant.
On the other hand, when writing an interpreter for EM machine language
programs, the interpreter must deal with the machine language
and not with the symbolic assembly language.
.P
As mentioned above, the
current microcomputer technology offers 8, 16 and 32 bit
machines with address spaces ranging from 2\v'-0.5m'16\v'0.5m'
to 2\v'-0.5m'32\v'0.5m' bytes.
Having one size of pointers and integers restricts
the usefulness of the language.
We decided to have a different language for each combination of
word and pointer size.
All languages offer the same instruction set and differ only in
memory alignment restrictions and the implicit size assumed in
several instructions.
The languages
differ slightly for the
different size combinations.
For example: the
size of any object on the stack and alignment restrictions.
The wordsize is restricted to powers of 2 and
the pointer size must be a multiple of the wordsize.
Almost all programs handling EM will be parametrized with word
and pointer size.

View file

@ -1,376 +0,0 @@
.SN 8
.VS 1 0
.BP
.S1 "ENVIRONMENT INTERACTIONS"
EM programs can interact with their environment in three ways.
Two, starting/stopping and monitor calls, are dealt with in this chapter.
The remaining way to interact, interrupts, will be treated
together with traps in chapter 9.
.S2 "Program starting and stopping"
EM user programs start with a call to a procedure called
m_a_i_n.
The assembler and backends look for the definition of a procedure
with this name in their input.
The call passes three parameters to the procedure.
The parameters are similar to the parameters supplied by the
UNIX
.FS
UNIX is a Trademark of Bell Laboratories.
.FE
operating system to C programs.
These parameters are often called
.BW argc ,
.B argv
and
.BW envp .
Argc is the parameter nearest to LB and is a wordsized integer.
The other two are pointers to the first element of an array of
string pointers.
.N
The
.B argv
array contains
.B argc
strings, the first of which contains the program call name.
The other strings in the
.B argv
array are the program parameters.
.P
The
.B envp
array contains strings in the form "name=string", where 'name'
is the name of an environment variable and string its value.
The
.B envp
is terminated by a zero pointer.
.P
An EM user program stops if the program returns from the first
invocation of m_a_i_n.
The contents of the function return area are used to procure a
wordsized program return code.
EM programs also stop when traps and interrupts occur that are
not caught and when the exit monitor call is executed.
.S2 "Input/Output and other monitor calls"
EM differs from most conventional machines in that it has high level i/o
instructions.
Typical instructions are OPEN FILE and READ FROM FILE instead
of low level instructions such as setting and clearing
bits in device registers.
By providing such high level i/o primitives, the task of implementing
EM on various non EM machines is made considerably easier.
.P
I/O is initiated by the MON instruction, which expects an iocode on top
of the stack.
Often there are also parameters which are pushed on the
stack in reverse order, that is: last
parameter first.
Some i/o functions also provide results, which are returned on the stack.
In the list of monitor calls we use several types of parameters and results,
these types consist of integers and unsigneds of varying sizes, but never
smaller than the wordsize, and the two pointer types.
.N 1
The names of the types used are:
.IS 4
.PS - 10
.PT int
an integer of wordsize
.PT int2
an integer whose size is the maximum of the wordsize and 2
bytes
.PT int4
an integer whose size is the maximum of the wordsize and 4
bytes
.PT intp
an integer with the size of a pointer
.PT uns2
an unsigned integer whose size is the maximum of the wordsize and 2
.PT unsp
an unsigned integer with the size of a pointer
.PT ptr
a pointer into data space
.PE 1
.IE 0
The table below lists the i/o codes with their results and
parameters.
This list is similar to the system calls of the UNIX Version 7
operating system.
.BP
.A
To execute a monitor call, proceed as follows:
.IS 2
.N 1
.PS a 4 "" )
.PT
Stack the parameters, in reverse order, last parameter first.
.PT
Push the monitor call number (iocode) onto the stack.
.PT
Execute the MON instruction.
.PE 1
.IE
An error code is present on the top of the stack after
execution of most monitor calls.
If this error code is zero, the call performed the action
requested and the results are available on top of the stack.
Non-zero error codes indicate a failure, in this case no
results are available and the error code has been pushed twice.
This construction enables programs to test for failure with a
single instruction (~TEQ or TNE~) and still find out the cause of
the failure.
The result name 'e' is reserved for the error code.
.N 1
List of monitor calls.
.DS B
number name parameters results function
1 Exit status:int Terminate this process
2 Fork e,flag,pid:int Spawn new process
3 Read fildes:int;buf:ptr;nbytes:unsp
e:int;rbytes:unsp Read from file
4 Write fildes:int;buf:ptr;nbytes:unsp
e:int;wbytes:unsp Write on a file
5 Open string:ptr;flag:int
e,fildes:int Open file for read and/or write
6 Close fildes:int e:int Close a file
7 Wait e:int;status,pid:int2
Wait for child
8 Creat string:ptr;mode:int
e,fildes:int Create a new file
9 Link string1,string2:ptr
e:int Link to a file
10 Unlink string:ptr e:int Remove directory entry
12 Chdir string:ptr e:int Change default directory
14 Mknod string:ptr;mode,addr:int2
e:int Make a special file
15 Chmod string:ptr;mode:int2
e:int Change mode of file
16 Chown string:ptr;owner,group:int2
e:int Change owner/group of a file
18 Stat string,statbuf:ptr
e:int Get file status
19 Lseek fildes:int;off:int4;whence:int
e:int;oldoff:int4 Move read/write pointer
20 Getpid pid:int2 Get process identification
21 Mount special,string:ptr;rwflag:int
e:int Mount file system
22 Umount special:ptr e:int Unmount file system
23 Setuid userid:int2 e:int Set user ID
24 Getuid e_uid,r_uid:int2 Get user ID
25 Stime time:int4 e:int Set time and date
26 Ptrace request:int;pid:int2;addr:ptr;data:int
e,value:int Process trace
27 Alarm seconds:uns2 previous:uns2 Schedule signal
28 Fstat fildes:int;statbuf:ptr
e:int Get file status
29 Pause Stop until signal
30 Utime string,timep:ptr
e:int Set file times
33 Access string,mode:int e:int Determine file accessibility
34 Nice incr:int Set program priority
35 Ftime bufp:ptr e:int Get date and time
36 Sync Update filesystem
37 Kill pid:int2;sig:int
e:int Send signal to a process
41 Dup fildes,newfildes:int
e,fildes:int Duplicate a file descriptor
42 Pipe e,w_des,r_des:int Create a pipe
43 Times buffer:ptr Get process times
44 Profil buff:ptr;bufsiz,offset,scale:intp Execution time profile
46 Setgid gid:int2 e:int Set group ID
47 Getgid e_gid,r_gid:int Get group ID
48 Sigtrp trapno,signo:int
e,prevtrap:int See below
51 Acct file:ptr e:int Turn accounting on or off
53 Lock flag:int e:int Lock a process
54 Ioctl fildes,request:int;argp:ptr
e:int Control device
56 Mpxcall cmd:int;vec:ptr e:int Multiplexed file handling
59 Exece name,argv,envp:ptr
e:int Execute a file
60 Umask complmode:int2 oldmask:int2 Set file creation mode mask
61 Chroot string:ptr e:int Change root directory
.DE 1
Codes 0, 11, 13, 17, 31, 32, 38, 39, 40, 45, 49, 50, 52,
55, 57, 58, 62, and 63 are
not used.
.P
All monitor calls, except fork and sigtrp
are the same as the UNIX version 7 system calls.
.P
The sigtrp entry maps UNIX signals onto EM interrupts.
Normally, trapno is in the range 0 to 252.
In that case it requests that signal signo
will cause trap trapno to occur.
When given trap number -2, default signal handling is reset, and when given
trap number -3, the signal is ignored.
.P
The flag returned by fork is 1 in the child process and 0 in
the parent.
The pid returned is the process-id of the other process.
.BP
.S1 "TRAPS AND INTERRUPTS"
EM provides a means for the user program to catch all traps
generated by the program itself, the hardware, or external conditions.
This mechanism uses five instructions: LIM, SIM, SIG, TRP and RTT.
This section of the manual may be omitted on the first reading since it
presupposes knowledge of the EM instruction set.
.P
The action taken when a trap occures is determined by the value
of an internal EM trap register.
This register contains a pointer to a procedure.
Initially the pointer used is zero and all traps halt the
program with, hopefully, a useful message to the outside world.
The SIG instruction can be used to alter the trap register,
it pops a procedure pointer from the
stack into the trap register.
When a trap occurs after storing a nonzero value in the trap
register, the procedure pointed to by the trap register
is called with the trap number
as the only parameter (see below).
SIG returns the previous value of the trap register on the
stack.
Two consecutive SIGs are a no-op.
When a trap occurs, the trap register is reset to its initial
condition, to prevent recursive traps from hanging the machine up,
e.g. stack overflow in the stack overflow handling procedure.
.P
The runtime systems for some languages need to ignore some EM
traps.
EM offers a feature called the ignore mask.
It contains one bit for each of the lowest 16 trap numbers.
The bits are numbered 0 to 15, with the least significant bit
having number 0.
If a certain bit is 1 the corresponding trap never
occurs and processing simply continues.
The actions performed by the offending instruction are
described by the Pascal program in appendix A.
.N
If the bit is 0, traps are not ignored.
The instructions LIM and SIM allow copying and replacement of
the ignore mask.~
.P
The TRP instruction generates a trap, the trap number being found on the
stack.
This is, among other things,
useful for library procedures and runtime systems.
It can also be used by a low level trap procedure to pass the trap to a
higher level one (see example below).
.P
The RTT instruction returns from the trap procedure and continues after the
trap.
In the list below all traps marked with an asterisk ('*') are
considered to be fatal and it is explicitly undefined what happens if
you try to restart after the trap.
.P
The way a trap procedure is called is completely compatible
with normal calling conventions. The only way a trap procedure
differs from normal procedures is the return. It has to use RTT instead
of RET. This is necessary because the complete runtime status is saved on the
stack before calling the procedure and all this status has to be reloaded.
Error numbers are in the range 0 to 252.
The trap numbers are divided into three categories:
.IS 4
.N 1
.PS - 10
.PT ~~0-~63
EM machine errors, e.g. illegal instruction.
.PS - 8
.PT ~0-15
maskable
.PT 16-63
not maskable
.PE
.PT ~64-127
Reserved for use by compilers, run time systems, etc.
.PT 128-252
Available for user programs.
.PE 1
.IE
EM machine errors are numbered as follows:
.DS I 5
.TS
tab(@);
n l l.
0@EARRAY@Array bound error
1@ERANGE@Range bound error
2@ESET@Set bound error
3@EIOVFL@Integer overflow
4@EFOVFL@Floating overflow
5@EFUNFL@Floating underflow
6@EIDIVZ@Divide by 0
7@EFDIVZ@Divide by 0.0
8@EIUND@Undefined integer
9@EFUND@Undefined float
10@ECONV@Conversion error
16*@ESTACK@Stack overflow
17*@EHEAP@Heap overflow
18*@EILLINS@Illegal instruction
19*@EODDZ@Illegal size argument
20*@ECASE@Case error
21*@EMEMFLT@Addressing non existent memory
22*@EBADPTR@Bad pointer used
23*@EBADPC@Program counter out of range
24@EBADLAE@Bad argument of LAE
25@EBADMON@Bad monitor call
26@EBADLIN@Argument of LIN too high
27@EBADGTO@GTO descriptor error
.TE
.DE 0
.P
As an example,
suppose a subprocedure has to be written to do a numeric
calculation.
When an overflow occurs the computation has to be stopped and
the higher level procedure must be resumed.
This can be programmed as follows using the mechanism described above:
.DS B
mes 2,2,2 ; set sizes
ersave
bss 2,0,0 ; Room to save previous value of trap procedure
msave
bss 2,0,0 ; Room to save previous value of trap mask
pro calcule,0 ; entry point
lxl 0 ; fill in non-local goto descriptor with LB
ste jmpbuf+4
lor 1 ; and SP
ste jmpbuf+2
lim ; get current ignore mask
ste msave ; save it
lim
loc 4 ; bit for EFOVFL
ior 2 ; set in mask
sim ; ignore EFOVFL from now on
lpi $catch ; load procedure identifier
sig ; catch wil get all traps now
ste ersave ; save previous trap procedure identifier
; perform calculation now, possibly generating overflow
1 ; label jumped to by catch procedure
loe ersave ; get old trap procedure
sig ; refer all following trap to old procedure
asp 2 ; remove result of sig
loe msave ; restore previous mask
sim ; done now
; load result of calculation
ret 2 ; return result
jmpbuf
con *1,0,0
end
.DE 0
.VS 1 1
.DS
Example of catch procedure
pro catch,0 ; Local procedure that must catch the overflow trap
lol 2 ; Load trap number
loc 4 ; check for overflow
bne *1 ; if other trap, call higher trap procedure
gto jmpbuf ; return to procedure calcule
1 ; other trap has occurred
loe ersave ; previous trap procedure
sig ; other procedure will get the traps now
asp 2 ; remove the result of sig
lol 2 ; stack trap number
trp ; call other trap procedure
rtt ; if other procedure returns, do the same
end
.DE

View file

@ -1,6 +0,0 @@
BEGIN { printf ".TS\nlw(6) lw(8) rw(3) rw(6) 14 lw(6) lw(8) rw(3) rw(6) 14 lw(6) lw(8) rw(3) rw(6).\n" }
NF == 4 { printf "%s\t%s\t%d\t%d",$1,$2,$3,$4 }
NF == 3 { printf "%s\t%s\t\t%d",$1,$2,$3 }
{ if ( NR%3 == 0 ) printf("\n") ; else printf("\t"); }
END { if ( NR%3 != 0 ) printf("\n")
printf ".TE\n" }

View file

@ -1,62 +0,0 @@
.SN 3
.BP
.S1 "INSTRUCTION ADDRESS SPACE"
The instruction space of the EM machine contains
the code for procedures.
Tables necessary for the execution of this code, for example, procedure
descriptor tables, may also be present.
The instruction space does not change during
the execution of a program, so that it may be
protected.
No further restrictions to the instruction address space are
necessary for the abstract and assembly language level.
.P
Each procedure has a single entry point: the first instruction.
A special type of pointer identifies a procedure.
Pointers into the instruction
address space have the same size as pointers into data space and
can, for example, contain the address of the first instruction
or an index in a procedure descriptor table.
.A
There is a single EM program counter, PC, pointing
to the next instruction to be executed.
The procedure pointed to by PC is
called the 'current' procedure.
A procedure may call another procedure using the CAL or CAI
instruction.
The calling procedure remains 'active' and is resumed whenever the called
procedure returns.
Note that a procedure has several 'active' invocations when
called recursively.
.P
Each procedure must return properly.
It is not allowed to fall through to the
code of the next procedure.
There are several ways to exit from a procedure:
.IS 3
.PS
.PT
the RET instruction, which returns to the
calling procedure.
.PT
the RTT instruction, which exits a trap handling routine and resumes
the trapping instruction (see next chapter).
.PT
the GTO instruction, which is used for non-local goto's.
It can remove several frames from the stack and transfer
control to an active procedure.
(see also MES~11 in paragraph 11.1.4.4)
.PE
.IE
.P
All branch instructions can transfer control
to any label within the same procedure.
Branch instructions can never jump out of a procedure.
.P
Several language implementations use a so called procedure
instance identifier, a combination of a procedure identifier and
the LB of a stack frame, also called static link.
.P
The program text for each procedure, as well as any tables,
are fragments and can be allocated anywhere
in the instruction address space.

File diff suppressed because it is too large Load diff

View file

@ -1,390 +0,0 @@
.BP
.SN 10
.S1 "EM MACHINE LANGUAGE"
The EM machine language is designed to make program text compact
and to make decoding easy.
Compact program text has many advantages: programs execute faster,
programs occupy less primary and secondary storage and loading
programs into satellite processors is faster.
The decoding of EM machine language is so simple,
that it is feasible to use interpreters as long as EM hardware
machines are not available.
This chapter is irrelevant when back ends are used to
produce executable target machine code.
.S2 "Instruction encoding"
A design goal of EM is to make the
program text as compact as possible.
Decoding must be easy, however.
The encoding is fully byte oriented, without any small bit fields.
There are 256 primary opcodes, two of which are an escape to
two groups of 256 secondary opcodes each.
.A
EM instructions without arguments have a single opcode assigned,
possibly escaped:
.DS
|--------------|
| opcode |
|--------------|
or
|--------------|--------------|
| escape | opcode |
|--------------|--------------|
.DE
The encoding for instructions with an argument is more complex.
Several instructions have an address from the global data area
as argument.
Other instructions have different opcodes for positive
and negative arguments.
.N 1
There is always an opcode that takes the next two bytes as argument,
high byte first:
.DS
|--------------|--------------|--------------|
| opcode | hibyte | lobyte |
|--------------|--------------|--------------|
or
|--------------|--------------|--------------|--------------|
| escape | opcode | hibyte | lobyte |
|--------------|--------------|--------------|--------------|
.DE
.DS
An extra escape is provided for instructions with four or eight byte arguments.
|--------------|--------------|--------------| |--------------|
| ESCAPE | opcode | hibyte |...| lobyte |
|--------------|--------------|--------------| |--------------|
.DE
For most instructions some argument values predominate.
The most frequent combinations of instruction and argument
will be encoded in a single byte, called a mini:
.DS
|---------------|
|opcode+argument| (mini)
|---------------|
.DE
The number of minis is restricted, because only
254 primary opcodes are available.
Many instructions have the bulk of their arguments
fall in the range 0 to 255.
Instructions that address global data have their arguments
distributed over a wider range,
but small values of the high byte are common.
For all these cases there is another encoding
that combines the instruction and the high byte of the argument
into a single opcode.
These opcodes are called shorties.
Shorties may be escaped.
.DS
|--------------|--------------|
| opcode+high | lobyte | (shortie)
|--------------|--------------|
or
|--------------|--------------|--------------|
| escape | opcode+high | lobyte |
|--------------|--------------|--------------|
.DE
Escaped shorties are useless if the normal encoding has a primary opcode.
Note that for some instruction-argument combinations
several different encodings are available.
It is the task of the assembler to select the shortest of these.
The savings by these mini and shortie
opcodes are considerable, about 55%.
.P
Further improvements are possible:
the arguments of
many instructions are a multiple of the wordsize.
Some do also not allow zero as an argument.
If these arguments are divided by the wordsize and,
when zero is not allowed, then decremented by 1, more of them can
be encoded as shortie or mini.
The arguments of some other instructions
rarely or never assume the value 0, but start at 1.
The value 1 is then encoded as 0,
2 as 1 and so on.
.P
Assigning opcodes to instructions by the assembler is completely
table driven.
For details see appendix B.
.S2 "Procedure descriptors"
The procedure identifiers used in the interpreter are indices
into a table of procedure descriptors.
Each descriptor contains:
.IS 6
.PS - 4
.PT 1.
the number of bytes to be reserved for locals at each
invocation.
.N
This is a pointer-szied integer.
.PT 2.
the start address of the procedure
.PE
.IE
.S2 "Load format"
The EM machine language load format defines the interface between
the EM assembler/loader and the EM machine itself.
A load file consists of a header, the program text to be executed,
a description of the global data area and the procedure descriptor table,
in this order.
All integers in the load file are presented with the
least significant byte first.
.P
The header has two parts: the first half (eight 16-bit integers)
aids in selecting
the correct EM machine or interpreter.
Some EM machines, for instance, may have hardware floating point
instructions.
.N
The header entries are as follows (bit 0 is rightmost):
.IS 2
.VS 1 0
.PS 1 4 "" :
.PT
magic number (07255)
.PT
flag bits with the following meaning:
.PS - 7 "" :
.PT bit 0
TEST; test for integer overflow etc.
.PT bit 1
PROFILE; for each source line: count the number of memory
cycles executed.
.PT bit 2
FLOW; for each source line: set a bit in a bit map table if
instructions on that line are executed.
.PT bit 3
COUNT; for each source line: increment a counter if that line
is entered.
.PT bit 4
REALS; set if a program uses floating point instructions.
.PT bit 5
EXTRA; more tests during compiler debugging.
.PE
.PT
number of unresolved references.
.PT
version number; used to detect obsolete EM load files.
.PT
wordsize ; the number of bytes in each machine word.
.PT
pointer size ; the number of bytes available for addressing.
.PT
unused
.PT
unused
.PE
.IE
The second part of the header (eight entries, of pointer size bytes each)
describes the load file itself:
.IS 2
.PS 1 4 "" :
.PT
NTEXT; the program text size in bytes.
.PT
NDATA; the number of load-file descriptors (see below).
.PT
NPROC; the number of entries in the procedure descriptor table.
.PT
ENTRY; procedure number of the procedure to start with.
.PT
NLINE; the maximum source line number.
.PT
SZDATA; the address of the lowest uninitialized data byte.
.PT
unused
.PT
unused
.PE
.IE
.P
The program text consists of NTEXT bytes.
NTEXT is always a multiple of the wordsize.
The first byte of the program text is the
first byte of the instruction address
space, i.e. it has address 0.
Pointers into the program text are found in the procedure descriptor
table where relocation is simple and in the global data area.
The initialization of the global data area allows easy
relocation of pointers into both address spaces.
.P
The global data area is described by the NDATA descriptors.
Each descriptor describes a number of consecutive words (of~wordsize)
and consists of a sequence of bytes.
While reading the descriptors from the load file, one can
initialize the global data area from low to high addresses.
The size of the initialized data area is given by SZDATA,
this number can be used to check the initialization.
.N
The header of each descriptor consists of a byte, describing the type,
and a count.
The number of bytes used for this (unsigned) count depends on the
type of the descriptor and
is either a pointer-sized integer
or one byte.
The meaning of the count depends on the descriptor type.
At load time an interpreter can
perform any conversion deemed necessary, such as
reordering bytes in integers
and pointers and adding base addresses to pointers.
.BP
.A
In the following pictures we show a graphical notation of the
initializers.
The leftmost rectangle represents the leading byte.
.N 1
.DS
.PS - 4 " "
Fields marked with
.N 1
.PT n
contain a pointer-sized integer used as a count
.PT m
contain a one-byte integer used as a count
.PT b
contain a one-byte integer
.PT w
contain a wordsized integer
.PT p
contain a data or instruction pointer
.PT s
contain a null terminated ASCII string
.PE 1
.DE 0
.VS 1 1
.DS
-------------------
| 0 | n | repeat last initialization n times
-------------------
.DE
.DS
---------
| 1 | m | m uninitialized words
---------
.DE
.DS
____________
/ bytes \e
----------------- -----
| 2 | m | b | b |...| b | m initialized bytes
----------------- -----
.DE
.DS
_________
/ word \e
-----------------------
| 3 | m | w |... m initialized wordsized integers
-----------------------
.DE
.DS
_________
/ pointer \e
-----------------------
| 4 | m | p |... m initialized data pointers
-----------------------
.DE
.DS
_________
/ pointer \e
-----------------------
| 5 | m | p |... m initialized instruction pointers
-----------------------
.DE
.DS
____________
/ bytes \e
-------------------------
| 6 | m | b | b |...| b | initialized integer of size m
-------------------------
.DE
.DS
____________
/ bytes \e
-------------------------
| 7 | m | b | b |...| b | initialized unsigned of size m
-------------------------
.DE
.DS
____________
/ string \e
-------------------------
| 8 | m | s | initialized float of size m
-------------------------
.DE 3
.PS - 8
.PT type~0:
If the last initialization initialized k bytes starting
at address \fIa\fP, do the same initialization again n times,
starting at \fIa\fP+k, \fIa\fP+2*k, .... \fIa\fP+n*k.
This is the only descriptor whose starting byte
is followed by an integer with the
size of a
pointer,
in all other descriptors the first byte is followed by a one-byte count.
This descriptor must be preceded by a descriptor of
another type.
.PT type~1:
Reserve m words, not explicitly initialized (BSS and HOL).
.PT type~2:
The m bytes following the descriptor header are
initializers for the next m bytes of the
global data area.
m is divisible by the wordsize.
.PT type~3:
The m words following the header are initializers for the next m words of the
global data area.
.PT type~4:
The m data address space pointers following the header are
initializers for the next
m data pointers in the global data area.
Interpreters that represent EM pointers by
target machine addresses must relocate all data pointers.
.PT type~5:
The m instruction address space pointers following the header are
initializers for the next
m instruction pointers in the global data area.
Interpreters that represent EM instruction pointers by
target machine addresses must relocate these pointers.
.PT type~6:
The m bytes following the header form
a signed integer number with a size of m bytes,
which is an initializer for the next m bytes
of the global data area.
m is governed by the same restrictions as for
transfer of objects to/from memory.
.PT type~7:
The m bytes following the header form
an unsigned integer number with a size of m bytes,
which is an initializer for the next m bytes
of the global data area.
m is governed by the same restrictions as for
transfer of objects to/from memory.
.PT type~8:
The header is followed by an ASCII string, null terminated, to
initialize, in global data,
a floating point number with a size of m bytes.
m is governed by the same restrictions as for
transfer of objects to/from memory.
The ASCII string contains the notation of a real as used in the
Pascal language.
.PE
.P
The NPROC procedure descriptors on the load file consist of
an instruction space address (of~pointer~size) and
an integer (of~pointer~size) specifying the number of bytes for
locals.

View file

@ -1,16 +0,0 @@
.so /usr/lib/tmac/tmac.kun
.SS 6
.RP
.PL 12i 11i
.LL 89
.MS T E
\!.TL '%'''
.ME
.MS T O
\!.TL '''%'
.ME
.MS B
.sp 1
.ME
.SM S1 B
.SM S2 B

View file

@ -1,245 +0,0 @@
.SN 5
.BP
.S1 "MAPPING OF EM DATA MEMORY ONTO TARGET MACHINE MEMORY"
The EM architecture is designed to be implemented
on many existing and future machines.
EM memory is highly fragmented to make
adaptation to various memory architectures possible.
Format and encoding of pointers is explicitly undefined.
.P
This chapter gives solutions to some of the
anticipated problems.
First, we describe a possible memory layout for machines
with 64K bytes of address space.
Here we use a member of the EM family with 2-byte word and pointer
size.
The most straightforward layout is shown in figure 2.
.N 1
.DS
65534 -> |-------------------------------|
|///////////////////////////////|
|//// unimplemented memory /////|
|///////////////////////////////|
ML -> |-------------------------------|
| |
| | <- LB
| stack and local area |
| |
|-------------------------------| <- SP
|///////////////////////////////|
|//////// inaccessible /////////|
|///////////////////////////////|
|-------------------------------| <- HP
| |
| heap area |
| |
| |
HB -> |-------------------------------|
| |
| global data area |
| |
EB -> |-------------------------------|
| |
| program text | <- PC
| |
| ( and tables ) |
| |
| |
PB -> |-------------------------------|
|///////////////////////////////|
|////////// undefined //////////|
|///////////////////////////////|
0 -> |-------------------------------|
Figure 2. Memory layout showing typical register
positions during execution of an EM program.
.DE 2
The base registers for the various memory pieces can be stored
in target machine registers or memory.
.IS
.N 1
.TS
tab(;);
l 1 l l l.
PB;:;program base;points to the base of the instruction address space.
EB;:;external base;points to the base of the data address space.
HB;:;heap base;points to the base of the heap area.
ML;:;memory limit;marks the high end of the addressable data space.
.TE 1
.IE
The stack grows from high
EM addresses to low EM addresses, and the heap the
other way.
The memory between SP and HP is not accessible,
but may be allocated later to the stack or the heap if needed.
The local data area is allocated starting at the high end of
memory.
.P
Because EM address 0 is not mapped onto target
address 0, a problem arises when pointers are used.
If a program pushed a constant, say 6, onto the stack,
and then tried to indirect through it,
the wrong word would be fetched,
because EM address 6 is mapped onto target address EB+6
and not target address 6 itself.
This particular problem is solved by explicitly declaring
the format of a pointer to be undefined,
so that using a constant as a pointer is completely illegal.
However, the general problem of mapping pointers still exists.
.P
There are two possible solutions.
In the first solution, EM pointers are represented
in the target machine as true EM addresses,
for example, a pointer to EM address 6 really is
stored as a 6 in the target machine.
This solution implies that every time a pointer is fetched
EB must be added before referencing
the target machine's memory.
If the target machine has powerful indexing
facilities, EB can be kept in a target machine register,
and the relocation can indeed be done on
every reference to the data address space
at a modest cost in speed.
.P
The other solution consists of having EM pointers
refer to the true target machine address.
Thus the instruction LAE 6 (Load Address of External 6)
would push the value of EB+6 onto the stack.
When this approach is chosen, back ends must know
how to offset from EB, to translate all
instructions that manipulate EM addresses.
However, the problem is not completely solved,
because a front end may have to initialize a pointer
in CON or ROM data to point to a global address.
This pointer must also be relocated by the back end or the interpreter.
.P
Although the EM stack grows from high to low EM addresses,
some machines have hardware PUSH and POP
instructions that require the stack to grow upwards.
If reasons of efficiency urge you to use these
instructions, then EM
can be implemented with the memory layout
upside down, as shown in figure 3.
This is possible because the pointer format is explicitly undefined.
The first element of a word array will have a
lower physical address than the second element.
.N 2
.DS
| | | |
| EB=60 | | ^ |
| | | | |
|-----------------| |-----------------|
105 | 45 | 44 | 104 214 | 41 | 40 | 215
|-----------------| |-----------------|
103 | 43 | 42 | 102 212 | 43 | 42 | 213
|-----------------| |-----------------|
101 | 41 | 40 | 100 210 | 45 | 44 | 211
|-----------------| |-----------------|
| | | | |
| v | | EB=255 |
| | | |
Type A Type B
.sp 2
Figure 3. Two possible memory implementations.
Numbers within the boxes are EM addresses.
The other numbers are physical addresses.
.DE 2
.A 0 0
So, we have two different EM memory implementations:
.IS
.PS - 4
.PT A~-
stack downwards
.PT B~-
stack upwards
.PE
.IE
.P
For each of these two possibilities we give the translation of
the EM instructions to push the third byte of a global data
block starting at EM address 40 onto the stack and to load the
word at address 40.
All translations assume a word and pointer size of two bytes.
The target machine used is a PDP-11 augmented with push and pop instructions.
Registers 'r0' and 'r1' are used and suffer from sign extension for byte
transfers.
Push $40 means push the constant 40, not word 40.
.P
The translation of the EM instructions depends on the pointer representation
used.
For each of the two solutions explained above the translation is given.
.P
First, the translation for the two implementations using EM addresses as
pointer representation:
.DS
.TS
tab(:), center;
l s l s l s
_ s _ s _ s
l 2 l 6 l 2 l 6 l 2 l.
EM:type A:type B
LAE:40:push:$40:push:$40
ADP:3:pop:r0:pop:r0
::add:$3,r0:add:$3,r0
::push:r0:push:r0
LOI:1:pop:r0:pop:r0
::-::neg:r0
::clr:r1:clr:r1
::bisb:eb(r0),r1:bisb:eb(r0),r1
::push:r1:push:r1
LOE:40:push:eb+40:push:eb-41
.TE
.DE
.BP
.P
The translation for the two implementations, if the target machine address is
used as pointer representation, is:
.N 1
.DS
.TS
tab(:), center;
l s l s l s
_ s _ s _ s
l 2 l 6 l 2 l 6 l 2 l.
EM:type A:type B
LAE:40:push:$eb+40:push:$eb-40
ADP:3:pop:r0:pop:r0
::add:$3,r0:sub:$3,r0
::push:r0:push:r0
LOI:1:pop:r0:pop:r0
::clr:r1:clr:r1
::bisb:(r0),r1:bisb:(r0),r1
::push:r1:push:r1
LOE:40:push:eb+40:push:eb-41
.TE
.DE
.P
The translation presented above is not intended to be optimal.
Most machines can handle these simple cases in one or two instructions.
It demonstrates, however, the flexibility of the EM design.
.P
There are several possibilities to implement EM on machines with
address spaces larger than 64k bytes.
For EM with two byte pointers one could allocate instruction and
data space each in a separate 64k piece of memory.
EM pointers still have to fit in two bytes,
but the base registers PB and EB may be loaded in hardware registers
wider than 16 bits, if available.
EM implementations can also make efficient use of a machine
with separate instruction and data space.
.P
EM with 32 bit pointers allows one to make use of machines
with large address spaces.
In a virtual, segmented memory system one could use a separate
segment for each fragment.

View file

@ -1,80 +0,0 @@
.BP
.SN 2
.S1 MEMORY
The EM machine has two distinct address spaces,
one for instructions and one for data.
The data space is divided up into 8-bit bytes.
The smallest addressable unit is a byte.
Bytes are numbered consecutively from 0 to some maximum.
All sizes in EM are expressed in bytes.
.P
Some EM instructions can transfer objects containing several bytes
to and/or from memory.
The size of all objects larger than a word must be a multiple of
the wordsize.
The size of all objects smaller than a word must be a divisor
of the wordsize.
For example: if the wordsize is 2 bytes, objects of the sizes 1,
2, 4, 6,... are allowed.
The address of such an object is the lowest address of all bytes it contains.
For objects smaller than the wordsize, the
address must be a multiple of the object size.
For all other objects the address must be a multiple of the
wordsize.
For example, if an instruction transfers a 4-byte object to memory at
location \fIm\fP and the wordsize is 2,
\fIm\fP must be a multiple of 2 and the bytes at
locations \fIm\fP, \fIm\fP\|+\|1,\fIm\fP\|+\|2 and
\fIm\fP\|+\|3 are overwritten.
.P
The size of almost all objects in EM
is an integral number of words.
Only two operations are allowed on
objects whose size is a divisor of the wordsize:
push it onto the stack and pop it from the stack.
The addressing of these objects in memory is always indirect.
If such a small object is pushed onto the stack
it is assumed to be a small integer and stored
in the least significant part of a word.
The rest of the word is cleared to zero,
although
EM provides a way to sign-extend a small integer.
Popping a small object from the stack removes a word
from the stack, stores the least significant byte(s)
of this word in memory and discards the rest of the word.
.P
The format of pointers into both address spaces is explicitly undefined.
The size of a pointer, however, is fixed for a member of EM, so that
the compiler writer knows how much storage to allocate for a pointer.
.P
A minor problem is raised by the undefined pointer format.
Some languages, notably Pascal, require a special,
otherwise illegal, pointer value to represent the nil pointer.
The current Pascal-VU compiler uses the
integer value 0 as nil pointer.
This value is also used by many C programs as a normally impossible address.
A better solution would be to have a special
instruction loading an illegal pointer value,
but it is hard to imagine an implementation
for which the current solution is inadequate,
especially because the first word in the EM data space
is special and probably not the target of any pointer.
.P
The next two chapters describe the EM memory
in more detail.
One describes the instruction address space,
the other the data address space.
.P
A design goal of EM has been to allow
its implementation on a wide range of existing machines,
as well as allowing a new one to be built in hardware.
To this extent we have tried to minimize the demands
of EM on the memory structure of the target machine.
Therefore, apart from the logical partitioning,
EM memory is divided into 'fragments'.
A fragment consists of consecutive machine
words and has a base address and a size.
Pointer arithmetic is only defined within a fragment.
The only exception to this rule is comparison with the null
pointer.
All fragments must be word aligned.

View file

@ -1,5 +0,0 @@
case $# in
1) make "$1".t ; ntlp "$1".t^lpr ;;
*) echo $0 heeft een argument nodig ;;
esac

View file

@ -1,4 +0,0 @@
case $# in
1) make $1.t ; ntout $1.t ;;
*) echo $0 heeft een argument nodig ;;
esac

View file

@ -1,38 +0,0 @@
.po 0
.TP 1
.ll 79
.sp 15
.ce 4
DESCRIPTION OF A MACHINE
ARCHITECTURE FOR USE WITH
BLOCK STRUCTURED LANGUAGES
.sp 6
.ce 4
Andrew S. Tanenbaum
Hans van Staveren
Ed G. Keizer
Johan W. Stevenson\v'-0.5m'*\v'0.5m'
.sp 2
.ce
August 1983
.sp 2
.ce
Informatica Rapport IR-81
.sp 13
Abstract
.sp 2
.ti +5
EM is a family of intermediate languages
designed for producing portable compilers.
A program called
.B front end
translates source programs to EM.
Another program,
.B back
.BW end ,
translates EM to the assembly language of the target machine.
Alternatively, the EM program can be assembled to a highly
efficient binary format for interpretation.
This document describes the EM languages in detail.
.sp 4
\v'-0.5m'*\v'0.5m' Present affiliation: NV Philips, Eindhoven

View file

@ -1,130 +0,0 @@
.SN 6
.BP
.S1 "TYPE REPRESENTATIONS"
The representations used for typed objects are not precisely
specified by EM.
Sometimes we only specify that a typed object occupies a
certain amount of space and state no further restrictions.
If one wants to have a different representation of the value of
an object on the stack one has to use a convert instruction
in most cases.
We do specify some relations between the representations of
types.
This allows some intermixed use of operators for different types
on the same object(s).
For example, the instruction ZER pushes signed and
unsigned integers with the value zero and empty sets.
ZER has as only argument the size of the object.
.A
The representation of floating point numbers is a good example,
it allows widely varying implementations.
The only ways to create floating point numbers are via
initialization and via conversions from integer numbers.
Only by using conversions to integers and comparing
two floating point numbers with each other, can these numbers
be converted to human readable output.
Implementations may use base 10, base 2 or any other
base for exponents, and have freedom in choosing the range of
exponent and mantissa.
.A
Other types are more precisely described.
In the following paragraphs a description will be given of the
restrictions imposed on the representation of the types used.
A number \fBn\fP used in these paragraphs indicates the size of
the object in \fIbits\fP.
.S2 "Unsigned integers"
The range of unsigned integers is 0..2\v'-0.5m'\fBn\fP\v'0.5m'-1.
A binary representation is assumed.
The order of the bits within an object is knowingly left
unspecified.
Discussing bit order within each 8-bit byte is academic,
so the only real freedom of this specification lies in the byte
order.
We really do not care whether an implementation of a 4-byte
integer has its bytes in a particular order of significance.
This of course means that some sequences of instructions have
unpredictable effects.
For example:
.DS
LOC 258 ; STL 0 ; LAL 0 ; LOI 1 ( wordsize >=2 )
.DE
The value on the stack after executing this sequence
can be anything,
but will most likely be 1 or 2.
.A
Conversion between unsigned integers of different sizes have to
be done with explicit convert instructions.
One cannot simply pad an unsigned integer with zero's at either end
and expect a correct result.
.A
We assume existence of at least single word unsigned arithmetic
in any implementation.
.S2 "Signed Integers"
The range of signed integers is -2\v'-0.5m'\fBn\fP-1\v'0.5m'~..~2\v'-0.5m'\fBn\fP-1\v'0.5m'-1,
in other words the range of signed integers of \fBn\fP bits
using two's complement arithmetic.
The representation is the same as for unsigned integers except
the range 2\v'-0.5m'\fBn\fP-1\v'0.5m'~..~2\v'-0.5m'\fBn\fP\v'0.5m'-1 is mapped on the
range -2\v'-0.5m'\fBn\fP-1\v'0.5m'~..~-1.
In other words, the most significant bit is used as sign bit.
The convert instructions between signed and unsigned integers
of the same size can be used to catch errors.
.A
The value -2\v'-0.5m'\fBn\fP-1\v'0.5m' is used for undefined
signed integers.
EM implementations should trap when this value is used in an
operation on signed integers.
The instruction mask, accessed with SIM and LIM -~see chapter 9~- ,
can be used to disable such traps.
.A
We assume existence of at least single word signed arithmetic
in any implementation.
.BP
.S2 "Floating point values"
Floating point values must have a signed mantissa and a signed
exponent.
Although no base is specified, base 2 is the normal choice,
because the FEF instruction pushes the exponent in base 2.
.A
The implementation of floating point arithmetic is optional.
The compilers currently in use have runtime parameters for the
size of the floating point values they should use.
Common choices are 4 and/or 8 bytes.
.S2 Pointers
EM has two kinds of pointers: for instruction and for data
space.
Each kind can only be used for its own space, conversion between
these two subtypes is impossible.
We assume that pointers have a range from 0 upwards.
Any implementation may have holes in the pointer range between
fragments.
One can of course not expect to be able to address two megabyte
of memory using a 2-byte pointer.
Normally, a 2-byte pointer allows up to 65536 bytes of
addressable memory.
.A
Pointer representation has one restriction.
The pointer with the same representation as the integer zero of
the same size should be invalid.
Some languages and/or runtime systems represent the nil
pointer as zero.
.S2 "Bit sets"
All bit sets of size \fBn\fP are subsets of the set
{~i~|~i>=0,~i<\fBn\fP~}.
A bit set contains a bit for each element showing its
presence or absence.
Bit sets are subdivided into words.
The word with the lowest EM address governs the subset
{~i~|~i>=0,~i<\fBm\fP~}, where \fBm\fP is the number of bits in
a word.
The next higher words each govern the next higher \fBm\fP set elements.
The relation between a set with size of
a word and an unsigned integer word is that
the value of the unsigned integer is the summation of the
2\v'-0.5m'i\v'0.5m' where i is in the set.
.A
Example: a 2-word bit set (wordsize 2) containing the
elements 1, 6, 8, 15, 18, 21, 27 and 28 is composed of two
integers, e.g. at addresses 40 and 42.
The word at 40 contains the value 33090 (or~-32446),
the word at 42 contains the value 6180.

View file

@ -1,621 +0,0 @@
.\" $Header$
.nr LL 7.5i
.nr PD 1v
.TL
Amsterdam Compiler Kit installation guide
.AU
Ed Keizer
.AI
Wiskundig Seminarium
Vrije Universiteit
Amsterdam
.NH
Introduction
.PP
This document
describes the process of installing Amsterdam Compiler Kit.
It depends on your combination of hard- and software how
hard it will be to install the kit.
This description is intended for a PDP 11/44 running
.UX
Version 7.
Installation on other PDP 11's should be easy, as long
as they have separate instruction and data space.
Installation on machine's without this feature, like PDP 11/34,
PDP 11/60 requires extensive surgery on some programs and is
thought of as impossible.
See chapter 6 for installation on other systems.
.NH
Restoring tree
.PP
The process of installing Amsterdam Compiler Kit is quite simple.
It is important that the original Amsterdam Compiler Kit
distribution tree structure is restored.
Proceed as follows
.IP " -" 10
Create a directory, for example /usr/em, on a device
with at least 20000 blocks left.
.IP " -"
Change to that directory (cd ...); it will be the working directory.
.IP " -"
Extract all files from the distribution medium, for instance
magtape:
\fBtar x\fP.
.IP " -"
Keep a copy of the original distribution to be able to repeat the process
of installation in case of disasters.
This copy is also useful as a reference point for diff-listings.
.LP
The directories in the tree contain the following information:
.nr PD 1v
.IP "lib" 14
.br
almost all binaries and shell files used by commands and
library em_data.a from misc/data
.IP "lib/ack"
.br
The command descriptor files used by the program ack.
.nr PD 0
.IP "bin"
.br
the few utilities that knot things together
.IP "etc"
.br
The MAIN description of EM sits here.
contains files (e.g. em_table) describing
the opcodes and pseudos in use,
the operands allowed, effect in stack etc. etc.
Make in this directory creates most of the files in h
.IP "include"
.br
More or less system independent include files needed by modules
in the C library from lang/cem/libcc.
Especially needed for "stdio".
.IP "h"
.br
The #include files for:
.nf
as_spec.h Used by EM assembler and interpreters.
em_abs.h Contains trap numbers and address for lin and fil
em_flag.h Definition of bits in array em_flag in lib/em_data.a
Describes parameters effect on flow of instructions
em_mes.h Definition of names for mes pseudo numbers
em_mnem.h instruction => compact mapping.
em_pseu.h pseudo instruction => compact mapping
em_ptyp.h Useful for compact code reading/writing,
defines classes of parameters
em_spec.h Definition of constants used in compact code
local.h Various definitions for local versions
pc_err.h Definitions of error numbers in Pascal
pc_file.h Macro's used in file handling in Pascal
em_path.h Pathnames used by \fIack\fP, intended
for all utilities
pc_size.h Sizes of objects used by Pascal compiler and
run-time system.
em_reg.h Definition of names for register types.
.IP "doc"
.br
Documentation
.nf
cg.doc Use and internal specification of the backend.
.br
regadd.doc Update for cg.doc concerning register variables
.br
regadd.doc Description of steps to add register variables.
.br
ack.doc Layout of description files needed for each machine.
.br
cref.doc C reference manual, addendum
.br
install.doc Ack Installation Guide
.br
pcref.doc Pascal reference manual, addendum
.br
peep.doc Description of the peephole optimizer
.br
em.doc EM reference manual
.br
toolkit.doc A general overview of the toolkit
.br
v7bugs.doc Bugs in the standard V7 system
.br
val.doc Pascal validation suite version 3 report
.nf
.IP "doc/em.doc"
.br
The EM-manual IR-81
.IP "doc/em.doc/int"
.br
The EM interpreter written in pascal
.IP "mkun"
.br
The PUBMAC macro package for nroff/troff from the Katholieke Universiteit at
Nijmegen.
It is used for the EM reference manual,
the Makefile installs the macro package in
/usr/lib/tmac/tmac.mkun*.
This package is in the public domain.
.IP "mach"
.br
just there to group the directories for all machines
these directories have sub-directories named:
.nf
as the assembler ( *.s + libraries => a.out )
cg the new backend ( *.m => *.s )
lib the libraries for all run-time systems
these libraries are used by the assembler.
libpc Used to create Pascal run-time system in 'lib'
libcc Used to create C run-time system in 'lib'
libem Sources for EM runtime system, result sits in 'lib'
test Various tests
dl Down-load programs
int Source for an interpreter
available are:
PMDS II 68000, wordsize 2, ptrsize 4
mach/m68k2
mach/m68k2/as
mach/m68k2/cg
mach/m68k2/libem
mach/m68k2/lib
mach/m68k2/dl
mach/m68k2/libpc
mach/m68k2/libcc
mach/m68k2/libsys
bare 6809
mach/6809
mach/6809/as
8080, wordsize 2, ptrsize 2
mach/8080
mach/8080/as
mach/8080/test
mach/8080/libcc
mach/8080/lib
bare 8086, wordsize 2, ptrsize 2
mach/i86
mach/i86/as
mach/i86/lib
mach/i86/libcc
mach/i86/dl
mach/i86/libem
mach/i86/libpc
mach/i86/saio (library for stand-alone EM on 86/12A )
pdp 11, UNIX/V7, wordsize 2, ptrsize 2
mach/pdp
mach/pdp/test
mach/pdp/libem
mach/pdp/lib
mach/pdp/libcc
mach/pdp/libpc
mach/pdp/cg
mach/pdp/int -PDP 11/44 EM interpreter
vax 780, UNIX V7, wordsize 4, ptrsize 4
mach/vax4
mach/vax4/cg
mach/vax4/lib
mach/vax4/libcc
mach/vax4/libem
mach/vax4/libpc
z80, CP/M, wordsize 2, ptrsize 2
mach/z80
mach/z80/as
mach/z80/libem
mach/z80/lib
mach/z80/libcc
mach/z80/libpc
mach/z80/int -Z80 EM interpreter
z80, nascom
mach/z80a
mach/z80a/dl
vax 11/780, Berkeley UNIX, wordsize 2, ptrsize 4
mach/vax2
mach/vax2/cg
mach/vax2/lib
mach/vax2/libpc
mach/vax2/libem
bare 6500, wordsize 2, ptrsize 2
mach/6500
mach/6500/as
mach/6500/dl
mach/6500/libem
mach/6500/lib
bare 6800, wordsize 2, ptrsize 2
mach/6800
mach/6800/as
EM virtual machine code, wordsize 2, ptrsize 2
mach/int
mach/int/libcc
mach/int/libpc
mach/int/lib
mach/int/test
The directory proto contains files used by most machines.
e.g. makefiles for libraries for C and Pascal
mach/proto
mach/proto/libg
.fi
.IP "emtest"
.br
Contains prototype of em test set.
.IP "man"
.br
Man files for various utilities
.IP "lang"
.br
just there to group the directories for all front-ends
.IP "lang/pc"
.br
Pascal front-end
.IP "lang/pc/libpc"
.br
Source of Pascal run-time system ( in EM or C )
.IP "lang/pc/test"
.br
Some test programs written in Pascal
.IP "lang/pc/pem"
.br
The compiler proper
.IP "lang/cem"
.br
C front-end
.IP "lang/cem/libcc"
.br
Directories with sources of C runtime system, libraries (in EM or C)
.IP "lang/cem/libcc/gen"
.br
Sources for routines in chapter III of UNIX programmers manual,
excluding STDIO
.IP "lang/cem/libcc/stdio"
.br
STDIO sources
.IP "lang/cem/libcc/mon"
.br
Sources for routines in chapter II, written in EM
.IP "lang/cem/comp"
.br
The compiler proper
.IP "lang/cem/ctest"
.br
C test set
.IP "lang/cem/ctest/cterr"
.br
Programs developed for pinpointing previous errors
.IP "lang/cem/ctest/ct*"
.br
The test programs.
.IP "util"
.br
Contains directories with various utilities
.IP "util/opt"
.br
EM peephole optimizer (*.k => *.m)
.IP "util/misc"
.br
Decode (*.[km] => *.e) + encode (*.e => *.k)
.IP "util/data"
.br
The C-code for `lib/em_data.a`
These sources are created by the Makefile in `etc`
.IP "util/ass"
.br
The EM assembler ( *.[km] + libraries => e.out )
.IP "util/arch"
.br
The archiver to be used for ALL EM utilities
.IP "util/cgg"
.br
A program needed for compiling backends.
.IP "util/cpp"
.br
The V7 C preprocessor.
.LP
All pathnames mentioned in the text of this document are relative to the
working directory, unless they start with '/'.
.PP
The person doing the installation needs permission to write in the
directories of the Amsterdam Compiler Kit distribution tree.
Preferably you should log in as sys (uid=3,gid=0).
.NH
Pathnames
.PP
Absolute pathnames are concentrated in "h/em_path.h".
Only the pascal runtime system and the utility \fIack\fP use
absolute pathnames to access files in the kit.
The tree is distributed with /usr/em as the working
directory.
The definition of EM_DIR in em_path.h should be altered to
specify the root
directory for the Compiler Kit distribution on your system.
Em_path.h also specifies which directory should be used for
temporary files.
Most programs from the kit do indeed use that directory
although some remain stubborn and use /tmp or /usr/tmp.
.LP
The shape of the tree should not be altered lightly because
most Makefiles and the
utility \fIack\fP know the shape of the ACK tree.
All pathnames in all Makefiles are relative, that is do not
have "/" as the first character.
The knowledge of the utility \fIack\fP about the shape of the tree is
concentrated in the files in the directory lib/ack.
.NH
Commands
.PP
The kit is distributed with all available commands in the bin
directory.
The commands distributed are:
.IP "\fIack\fP, \fIacc\fP, \fIapc\fP and their links"
.br
They are used to compile the Pascal, C, etc... programs.
.IP \fIarch\fP
.br
The archiver used for the EM- and universal assembler.
.IP "\fIem\fP and \fIeminform\fP"
.br
The EM interpretator for the PDP-11 and the program to unravel
its post-mortem information.
.LP
We currently make the kit available to our users by telling
them that they should include the bin directory of the kit in
their PATH shell variable.
The programs will still work when moved to a different
directory.
The copying should preferably be done with tar, since links are
heavily used.
Renaming of the programs linked to \fIack\fP will not always
produce the desired result.
This program uses its call name as an argument.
Any call name not being \fIcc\fP, \fIacc\fP, \fIpc\fP or \fIapc\fP will be
interpreted as the name of a 'machine description' and the
program will try to find a description file with that name.
All recompilations will only touch the utilities in the bin
directory, not your own copies.
.NH
Options
.PP
There is one important option in h/local.h.
The utility \fIack\fP uses a default machine name when called
as \fIacc\fP, \fIcc\fP, \fIapc\fP, \fIpc\fP or \fIack\fP.
The machine name used for default is determined by the
definition of ACKM in h/local.h.
The current definition is \fIpdp\fP.
.PP
The distribution is tailored to one specific opreating system per CPU type.
For some of these CPU's it is possible to tailor the distribution to another
operating system.
The steps to be taken are described in READ_ME (or README) files in the
subdirectories of the directory in EM_DIR/mach for that particular machine.
For example: The vax2 distribution is tailoerd to BSD4.1, but has #define's
for BSD4.1c and BSD4.2.
For the names and places of these define's look in EM_DIR/mach/vax2/cg and
EM_DIR/mach/vax2/libem.
.NH
Recompilation
.PP
The kit comes with binaries in the directories \fBbin\fP and
\fBlib\fP.
Some directories among mach/*/lib contain archives with object files,
notably mach/pdp/lib.
The binaries and object files are for a PDP 11/44 with floating
point running UNIX V7.
.PP
Almost all directories contain a "Makefile" or a shell command file called
"make".
Apart from commands applying to that specific directory these
files all recognize a few special commands.
When called with one of these they will apply the command to
their own directory and all subdirectories.
The special commands are:
.IP "install" 20
recompile and install all binaries and libraries.
.br
Some Makefiles allow errors to occur in the programs they call.
They ignore such errors and notify the user with the message
"~....... error code n: ignored".
Whenever such a message appears in the output you can ignore it
too.
.br
The installation of the PUBMAC macro package is not done
automatically from the higher level directory.
.IP "cmp"
recompile all binaries and libraries and compare them to the
ones already installed.
.IP pr
print the sources and documentation on the standard output.
.IP opr
make pr | opr
.br
Opr should be an off-line printer daemon.
On some systems it exists under another name e.g. lpr.
The easiest way to call such a spooler is using a shell script
with the name opr that calls lpr.
This script should be placed in /usr/bin or EM_DIR/bin or
one of the directories in your PATH.
.IP clean
remove all files not needed for day-to-day use,
that is binaries not in bin or lib, object files etc.
.LP
Example:
.nf
.sp 1
make install
.sp 1
.fi
given as command in the home directory will cause
recompilation of all programs in the kit.
.LP
Recompilation of the complete kit lasts about 9 hours an a PDP
11/44.
.NH 2
Recompilation on a different machine.
.PP
Installation on other systems will often require recompilation
of all programs.
The presence of a C compiler is essential for recompilation.
Except the Pascal compiler proper all programs are written in C.
Some modules are derived from \fIyacc\fP sources.
Retranslating these programs from that yacc source is not
necessary, although it might improve performance.
Some versions of \fIyacc\fP 'know' that the resulting C programs will
run on a 32-bit int machine.
C modules produced by such a \fIyacc\fP are not portable and
should not be used to (cross)compile programs for 16-bit machines.
We assume a version UNIX which, apart from the C-compiler,
contains most normal utilities, like ed, sed, grep, make, the
Bourne shell etc.
All Makefiles use the system C-compiler.
The existence of a backend for your system is of course essential
if you wish to produce executable files for that system.
When the backend exists it is also possible to boot the Pascal
Compiler,
that is written in Pascal itself.
The kit contains the compact code files for the 2/2 and 2/4
versions of the Pascal compiler.
The current version of this compiler can only be used on machines
with a 16-bit word size and 16- or 32-bit pointers.
The Makefile automatically tries to boot the Pascal compiler
from one of these compact code files, if the compiler proves
unable to compile itself.
.PP
The native assemblers and loaders are used on PDP-11 and VAX.
The description files in lib/ack for other systems use our
universal assembler.
The load file produced by this assembler is not directly
usable in any system known to us,
but has to be converted before it can be put to use.
The \fIdl\fP programs present for some machines unravel
these load files and transmit commands to load memory
to a microprocessor over a serial line.
The PDP-11 version of our universal assembler is supplied
with a conversion program.
The file man/a.out.5 contains a description of the format of
the universal assembler load file,
it might be useful to those who wish or need to write their
own conversion programs.
.br
Berkeley UNIX for the VAX'en has (at least) three different
versions, BSD4.1a, BSD4.1c and BSD4.2. The READ_ME files in the
directories mach/vax2/cg, mach/vax2/libem, mach/vax4/cg and
mach/vax4/libem tell you how to adapt the vax2 and vax4 backend
to these versions.
.NH 2
Recompiling libraries
.PP
The kit contains sources for part II and III of the C-library, except
the math functions, they are grabbed from our V7 system and sometimes
altered in a EM dependent way or replaced altogether when the original
was in assembly.
These files can be used to make libraries for the Ack C-compiler.
The recompilation process uses a few include files.
The include directory in the EM home directory contains a few more
or less system independent include files.
The system dependent include files are fetched from /usr/include
on the system you use to recompile.
This may lead to several problems.
Sometimes the system differs so much from V7 that certain manifest constants
do not exist any more.
At other times these include files were written for a compiler without
a restriction on name length.
In that case - I've seen it happen - people tend to use differing
identifiers that are identical in the first eight characters.
All these problems you have to solve yourself,
the libraries are only included as an extra and too much system
dependent to give any guarantees.
.NH
Fixes to the UNIX V7 system
.PP
UNIX System V7 has a few bugs that prevent a part of or the whole kit
from working properly.
To be honest, we do not know which of the following changes are
essential to the functioning of our kit.
.PP
The file "doc/v7bugs.doc" gives for each of the following bugs
a small test program and a diff listing of the source files that have to be
modified.
.IP 1
Bug in the C optimizer for unsigned comparison
.nr PD 0
.IP 2
The loader 'ld' fails for large data and text portions
.IP 3
Floating point registers are not saved if more memory is needed.
.IP 4
Floating point registers are not copied to child in fork().
.nr PD 1v
.LP
Use the test programs to see if the errors are present in your system
and to check if the modifications are effective.
.NH
Testing
.PP
Test sets are available in Pascal, C and EM assembly.
.IP em 8
.br
The directory emtest contains a few EM test programs.
The EM assembly files in these tests must be transformed into
load files, thereby avoiding use of the EM optimizer.
These tests use the LIN and NOP instructions to mark the passing of each
test.
The NOP instruction prints the current line number during the
test phase.
Each test notifies its correctness by calling LIN with a unique
number followed by a NOP which prints this line number.
The test finishes normally with 0 as the last number printed
In all other cases a bug showed its
existence.
.IP Pascal
.br
The directory lang/pc/test contains a few pascal test programs.
All these programs print the number of errors found and a
identification of these errors.
.IP C
.br
The sub-directories in lang/cem/ctest contain C test programs.
The idea behind these tests is:
when you have a program called xx.c, compile it into xx.cem.
Run it with standard output to xx.cem.r, compare this file to
xx.cem.g, a file containing the 'ideal' output.
Any differences will point to implementation differences or
bugs.
Giving the command "run gen" or plain "run" starts this
process.
The differences will be presented on standard output.
The contents of the result files depend on the wordsize,
the xx.cem.g files on the distribution are intended for a
16-bit machine.
.NH
Documentation
.PP
Manual pages for Amsterdam Compiler Kit can be copied
to "/usr/man/man?" by the
following commands:
.DS
cd man
make install
.DE
.LP
Several documents are provided:
.DS
doc/toolkit.doc: a general overview
doc/pcref.doc: the Pascal-frontend reference manual
doc/val.doc: the results of running the Pascal Validation Suite
doc/cref.doc: the C-frontend manual
doc/em.doc: a description of the EM machine architecture
doc/peep.doc: internal documentation for the peephole optimizer
doc/cg.doc: documentation for backend writers and maintainers
doc/regadd.doc: addendum to previous document describing register variables
doc/install.doc: this document
.DE
.LP
The Validation Suite is a collection of more than 200 Pascal programs,
designed by Brian Wichmann and Arthur Sale to test Pascal compilers.
We are not allowed to distribute it, but you may
request a copy from
.DS
Richard J. Cichelli
A.N.P.A.
1350 Sullivan Trail
P.O. Box 598
Easton, Pennsylvania 18042
USA
.DE
.LP
Good luck.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,506 +0,0 @@
.\" $Header$
.TL
Internal documentation on the peephole optimizer
.br
from the Amsterdam Compiler Kit
.NH 1
Introduction
.PP
Part of the Amsterdam Compiler Kit is a program to do
peephole optimization on an EM program.
The optimizer scans the program to match patterns from a table
and if found makes the optimization from the table,
and with the result of the optimization
it tries to find yet another optimization
continuing until no more optimizations are found.
.PP
Furthermore it does some optimizations that can not be called
peephole optimizations for historical reasons,
like branch chaining and the deletion of unreachable code.
.PP
The peephole optimizer consists of three parts
.IP 1)
A driving table
.IP 2)
A program translating the table to internal format
.IP 3)
C code compiled with the table to make the optimizer proper
.PP
In this document the table format, internal format and
data structures in the optimizer will be explained,
plus a hint on what the code does where it might not be obvious.
It is a simple program mostly.
.NH 1
Table format
.PP
The driving table consists of pattern/replacement pairs,
in principle one per line,
although a line starting with white space is considered
a continuation line for the previous.
The general format is:
.DS
optimization : pattern ':' replacement '\en'
.sp
pattern : EMlist optional_boolean_expression
.sp
replacement : EM_plus_operand_list
.DE
Example of a simple one
.DS
loc stl $1==0 : zrl $2
.DE
There is no real limit for the length of the pattern or the replacement,
the replacement might even be longer than the pattern,
and expressions can be made arbitrarily complicated.
.PP
The expressions in the table are made of the following pieces:
.IP -
Integer constants
.IP -
$\fIn\fP, standing for the operand of the \fIn\fP'th EM
instruction in the pattern,
undefined if that instruction has no operand.
.IP -
w, standing for the wordsize of the code optimized.
.IP -
p, for the pointersize.
.IP -
defined(expr), true if expression is defined
.IP -
samesign(expr,expr), true if expressions have the same sign.
.IP -
sfit(expr,expr), ufit(expr,expr),
true if the first expression fits signed or unsigned in the number
of bits given in the second expression.
.IP -
rotate(expr,expr),
first expression rotated left the number of bits given by the second expression.
.IP -
notreg(expr),
true if the local with the expression as number is not a candidate to put
in a register.
.IP -
rom(\fIn\fP,expr), contents of the rom descriptor at index expr that
is associated with the global label that should be the argument of
the \fIn\fP'th EM instruction.
Undefined if such a thing does not exist.
.PP
The usual arithmetic operators may be used on integer values,
if any operand is undefined the expression is undefined,
except for the defined() function above.
An undefined expression used for its truth value is false.
All arithmetic on local label operands is forbidden,
only things allowed are tests for equality.
Arithmetic on global labels makes sense,
i.e. one can add a global label and a constant,
but not two global labels.
.PP
In the table one can use five additional EM instructions in patterns.
These are:
.IP lab
Stands for a local label
.IP LLP
Load Local Pointer, translates into a
.B lol
or into a
.B ldl
depending on the relationship between wordsize and pointersize.
.IP LEP
Load External Pointer, translates into a
.B loe
or into a
.B lde .
.IP SLP
Store Local Pointer,
.B stl
or
.B sdl .
.IP SEP
Store External Pointer,
.B ste
or
.B sde .
.PP
There is only one peephole optimizer,
so the substitutions to be made for the last four instructions
are made at run time before the first optimizations are made.
.NH 1
Internal format
.PP
The translating program,
.I mktab
converts the table into an array of bytes where all
patterns follow unaligned.
Format of a pattern is:
.IP 1)
One byte for high byte of hash value,
will be explained later on.
.IP 2)
Two bytes for the index of the next pattern in a chain.
.IP 3)
An integer\u*\d,
.FS
* An integer is encoded as a byte when less than 255,
otherwise as a byte containing 255 followed by two
bytes with the real value.
.FE
pattern length.
.IP 4)
The list of pattern opcodes, one per byte.
.IP 5)
An integer expression index, 0 if not used.
.IP 6)
An integer, replacement length.
.IP 7)
A list of pairs consisting of a one byte opcode and an integer
expression index.
.PP
The expressions are kept in an array of triples,
implementing a binary tree.
The
.I mktab
program tries to minimize the number of triples by reusing
duplicates and even reverses the operands of commutative operators
when doing so would spare a triple.
.NH 1
A tour through the sources
.PP
Now we will walk through the sources and note things of interest.
.NH 2
The header files
.PP
The header files are the place where data structures and options reside.
.NH 3
alloc.h
.PP
In the header file alloc.h several defines can be used to select various
kinds of core allocation schemes.
This is important on small machines like the PDP-11 since a complete
procedure must be in core at the same space,
and the peephole optimizer should not be the limiting factor in
determining the maximum size of procedures if possible.
Options are:
.IP -
USEMALLOC, standard malloc() and free() are used instead of the own
core allocation package.
Not recommended unless the own package does not work on some bizarre
machine.
.IP -
COREDEBUG, prints large amounts of information about core management.
Better not define it unless you change the code and it stops working.
.IP -
SEPID, if you define this you will get an extra procedure that will
go through a lot of work to scrape the last bytes together if the
system won't provide more.
This is not a good idea if memory is scarce and code and data reside
in the same spaces, since the room used by the procedure might well
be more than the room saved.
.IP -
STACKROOM, number of shorts used in stack space.
This is used if memory is scarce and stack space and data space are
different.
On the PDP-11 a UNIX process starts with an 8K stack segment which
cannot be transferred to the data segment.
Under these conditions one can use a lot of the stack space for storage.
.NH 3
assert.h
.PP
Just defines the assert macro.
When compiled with -DNDEBUG all asserts will be off.
.NH 3
ext.h
.PP
Gives external definitions of variables used by more than one module.
.NH 3
line.h
.PP
Defines the structures used to keep instructions,
one structure per line of EM code,
and the structure to keep arguments of pseudos,
one structure per argument.
Both structures essentially contain a pointer to the next,
a type,
and a union containing information depending on the type.
Core is allocated only for the part of the union used.
.PP
The
.I
struct line
.R
has a very compact encoding for small integers,
they are encoded in the type field.
On the PDP-11 this gives a line structure of only 4 bytes for most
instructions.
.NH 3
lookup.h
.PP
Contains definition of the struct used for symbol table management,
global labels and procedure names are kept in one table.
.NH 3
optim.h
.PP
If one defines the DIAGOPT option in this header file,
for every optimization performed a number is written on stderr.
The number gives the number of the pattern in the table
or one of the four special numbers in this header file.
.NH 3
param.h
.PP
Contains one settable option,
LONGOFF.
If this is not defined the optimizer can only optimize programs
with wordsize 2 and pointersize 2.
Set this only if it must be run on a Z80 or something pathetic like that.
.PP
Other defines here should not be touched.
.NH 3
pattern.h
.PP
Contains defines of indices in a pattern,
definition of the expression triples,
definitions of the various expression operators
and definition of the result struct where expression results are put.
.PP
This header file is the main one that is also included by
.I mktab .
.NH 3
proinf.h
.PP
This one contains definitions
for the local label table structs
and for the struct where all information for one procedure is kept.
This is in one struct so it can be saved easily when recursive
procedures have to be resolved.
.NH 3
types.h
.PP
Collection of typedefs to be used by almost all modules.
.NH 2
The C code itself.
.PP
The C code will now be the center of our attention.
We will make a walk through the sources and we will try
to follow the sources in a logical order.
So we will start at
.NH 3
main.c
.PP
The main.c module contains the main() function.
Here nothing spectacular happens,
only thing of interest is the handling of flags:
.IP -L
This is an instruction to the peephole optimizer to perform
one of its auxiliary functions, the generation of a library module.
This makes the peephole optimizer write its output on a temporary file,
and at the end making the real output by first generating a list
of exported symbols and then copying the temporary file behind it.
.IP -n
Disables all optimization.
Only thing the optimizer does now is filling in the blank after the
.I END
pseudo and resolving recursive procedures.
.PP
The place where main() is left is the call to getlines() which brings
us to
.NH 3
getline.c
.PP
This module reads the EM code and constructs a list of
.I
struct line
.R
records,
linked together backwards,
i.e. the first instruction read is the last in the list.
Pseudos are handled here also,
for most pseudos this just means that a chain of argument records
is linked into the linked line list but some pseudos get special attention:
.IP exc
This pseudo is acted upon right away.
Lines read are shuffled around according to instruction.
.IP mes
Some messages are acted upon.
These are:
.RS
.IP ms_err 8
The input is drained, just in case it is a pipe.
After that the optimizer exits.
.IP ms_opt
The do not optimize flag is set.
Acts just like -n on the command line.
.IP ms_emx
The word- and pointersize are read,
complain if we are not able to handle this.
.IP ms_reg
We take notice of the offset of this local.
See also comments in the description of peephole.c
.RE
.IP pro
A new procedure starts, if we are already in one save the status,
else process collected input.
Collect information about this procedure and if already in a procedure
call getlines() recursively.
.IP end
Process collected input.
.PP
The phrase "process collected input" is used twice,
which brings us to
.NH 3
process.c
.PP
This module contains the entry point process() which is called at any
time the collected input must be processed.
It calls a variety of other routines to get the real work done.
Routines in this module are in chronological order:
.IP symknown 12
Marks all symbols seen until now as known,
i.e. it is now known whether their scope is local or global.
This information is used again during output.
.IP symvalue
Runs through the chain of pseudos to give values to data labels.
This needs an extra pass.
It cannot be done during the getlines pass, since an
.B exc
pseudo could destroy things.
Nor can it be done during the backward pass since it is impossible
to do good fragment numbering backward.
.IP checklocs
Checks whether all local labels referenced are defined.
It needs to be sure about this since otherwise the
semi global optimizations made cannot work.
.IP relabel
This routine finds the final destination for each label in the procedure.
Labels followed by unconditional branches or other labels are marked during
the peephole fase and this leeds to chains of identical labels.
These chains are followed here, and in the local label table each label
has associated with it its replacement label, after this procedure is run.
Care is taken in this routine to prevent a loop in the program to
cause the optimizer to loop.
.IP cleanlocals
This routine empties the local label table after everything
is processed.
.PP
But before this can all be done,
the backward linked list of instructions first has to be reversed,
so here comes
.NH 3
backward.c
.PP
The routine backward has a number of functions:
.IP -
It reverses the backward linked list, making two forward linked lists,
one for the instructions and one for the pseudos.
.IP -
It notes the last occurrence of data labels in the backward linked list
and puts it in the global symbol table.
This is of course the first occurence in the procedure.
This information is needed to decide whether the symbols are global
or local to this module.
.IP -
It decides about the fragment boundaries of data blocks.
Fragments are numbered backwards starting at 3.
This is done to be able to make the type of an expression
containing a symbol equal to its fragment.
This type can then not clash with the types integer and local label.
.IP -
It allocates a rom buffer to every data label with a rom behind
it, if that rom contains only plain integers at the start.
.PP
The first thing done after process() has called backward() and some
of its own little routines is a call to the real routine,
the one that does the work the program was written for
.NH 3
peephole.c
.PP
The first routines in peephole.c
implement a linked list for the offsets of local variables
that are candidates for a register implementation.
Several patterns use the notreg() function,
since it is forbidden to combine a load of that variable
with the load of another and
it is not allowed to take the address of that variable.
.PP
The routine peephole hashes the patterns the first time it is called
after which it doesn't do much more than calling optimize.
But first hashpatterns().
.PP
The patterns are hashed at run time of the optimizer because of
the
.B LLP ,
.B LEP ,
.B SLP
and
.B SEP
instructions added to the instruction set in this optimizer.
These are first replaced everywhere in the table by the correct
replacement after which the first three instructions of the
pattern are hashed and the pattern is linked into one of the
256 linked lists.
There is a define CHK_HASH in this module that you
can set if you do not trust the randomness of the hashing
function.
.PP
The attention now shifts to optimize().
This routine calls basicblock() for every piece of code between two labels.
It also notes which labels have another label or a branch behind them
so the relabel() routine from process.c can do something with that.
.PP
Basicblock() keeps making passes over its basic block
until no more optimizations are found.
This might be inefficient if there is a long basicblock with some
deep recursive optimization in one part of it.
The entire basic block is then scanned a lot of times just for
that one piece.
The alternative is backing up after making an optimization and running
through the same code again, but that is difficult
in a single linked list.
.PP
It hashes instructions and calls trypat() for every pattern that has
a full hash value match,
i.e. lower byte and upper byte equal.
Longest pattern is tried first.
.PP
Trypat() checks length and opcodes of the pattern.
If correct it fills the iargs[] array with argument values
and calculates the expression.
If that is also correct the work shifts to tryrepl().
.PP
Tryrepl() generates the list of replacement instructions,
links it into the list and returns true.
Why then the name tryrepl() if it always succeeds?
Well, there is a mechanism in the optimizer,
unused until today that makes it possible to do optimizations that cannot
be described by the table.
It is possible to give a number as a replacement which will cause the
optimizer to call a routine special() to do some work.
This routine might decide not to do an optimization and return false.
.PP
The last routine that is called from process() is putline()
to write the optimized code, bringing us to
.NH 3
putline.c
.PP
The major part of putline.c is the standard set of routines
that makes EM compact code.
The extra functions performed are:
.IP -
For every occurence of a global symbol it might be necessary to
output a
.B exa ,
.B exp ,
.B ina
or
.B inp
pseudo instruction.
That task is performed.
.IP -
The
.B lin
instructions are optimized here,
.B lni
instructions added for
.B lin
instructions and superfluous
.B lin
instructions deleted.

View file

@ -1,133 +0,0 @@
.\" $Header$
.TL
Addition of register variables to an existing table.
.NH 1
Introduction
.PP
This is a short description of the newest feature in the
table driven code generator for the Amsterdam Compiler Kit.
It describes how to add register variables to an existing table.
This assumes you have the distribution of October 1983 or later.
It is not clear whether you should read this when starting with
a table for a new machine,
or whether you should wait till the table is well debugged already.
.NH 1
Modifications to the table itself.
.NH 2
Register section
.PP
You can add just before the properties of the register one
of the following:
.IP - 2
regvar
.IP -
regvar ( pointer )
.IP -
regvar ( loop )
.IP -
regvar ( float )
.LP
All register variables of one type must be of the same size,
and they may have no subregisters.
.NH 2
Codesection
.PP
.IP - 2
Two pseudo functions are added to the list allowed inside expressions:
.RS
.IP 1) 3
inreg ( expr ) has as a parameter the offset of a local,
and returns 0,1 or 2:
.RS
.IP 2: 3
if the variable is in a register.
.IP 1:
if the variable could be in a register but isn't.
.IP 0:
if the variable cannot be in a register.
.RE
.IP 2)
regvar ( expr ) returns the register associated with the variable.
Undefined if it is not in a register.
So regvar ( expr ) is defined if and only if inreg (expr ) == 2.
.RE
.IP -
It is now possible to remove() a register expression,
this is of course needed for a store into a register local.
.IP -
The return out of a procedure may now involve register restores,
so the special word 'return' in the table will invoke a user defined
function.
.NH 1
Modifications to mach.c
.PP
If register variables are used in a table, the program
.I cgg
will define the word REGVARS during compilation of the sources.
So the following functions described here should be bracketed
by #ifdef REGVARS and #endif.
.IP - 2
regscore(off,size,typ,freq,totyp) long off;
.br
This function should assign a score to a register variable,
the score should preferably be the estimated number of bytes
gained when it is put in a register.
Off and size are the offset and size of the variable,
typ is the type, that is reg_any, reg_pointer, reg_loop or reg_float.
Freq is the number of times it occurs statically, and totyp
is the type of the register it is planned to go into.
.br
Keep in mind that the gain should be net, that is the cost for
register save/restore sequences and the cost of initialisation
in the case of parameters should already be included.
.IP -
i_regsave()
.br
This function is called at the start of a procedure, just before
register saves are done.
It can be used to initialise some variables if needed.
.IP -
f_regsave()
.br
This function is called at end of the register save sequence.
It can be used to do the real saving if multiple register move
instructions are available.
.IP -
regsave(regstr,off,size) char *regstr; long off;
.br
Should either do the real saving or set up a table to have
it done by f_regsave.
Note that initialisation of parameters should also be done,
or planned here.
.IP -
regreturn()
.br
Should restore saved registers and return.
The function result is already in the function return area by now.
.NH 1
Examples
.PP
Here are some examples out of the PDP 11 table
.DS
lol inreg($1)==2| | | regvar($1) | |
lil inreg($1)==2| | | {regdef2, regvar($1)} | |
stl inreg($1)==2| xsource2 |
remove(regvar($1))
move(%[1],regvar($1)) | | |
inl inreg($1)==2| | remove(regvar($1))
"inc %(regvar($1)%)"
setcc(regvar($1)) | | |
.DE
.NH 1
Afterthoughts.
.PP
At the time of this writing the tables for the PDP 11 and the M68000 and
the VAX are converted, in all cases the two byte wordsize versions.
No big problems have occurred, but experience has shown that it is
necessary to check your table carefully for all patterns with locals in them
because if you forget one code will be generated by that one coderule
to use the memoryslot the local is not in.

View file

@ -1,897 +0,0 @@
.\" $Header$
.RP
.ND
.nr LL 78m
.tr ~
.ds as *
.TL
A Practical Tool Kit for Making Portable Compilers
.AU
Andrew S. Tanenbaum
Hans van Staveren
E. G. Keizer
Johan W. Stevenson
.AI
Mathematics Dept.
Vrije Universiteit
Amsterdam, The Netherlands
.AB
The Amsterdam Compiler Kit is an integrated collection of programs designed to
simplify the task of producing portable (cross) compilers and interpreters.
For each language to be compiled, a program (called a front end)
must be written to
translate the source program into a common intermediate code.
This intermediate code can be optimized and then either directly interpreted
or translated to the assembly language of the desired target machine.
The paper describes the various pieces of the tool kit in some detail, as well
as discussing the overall strategy.
.sp
Keywords: Compiler, Interpreter, Portability, Translator
.sp
CR Categories: 4.12, 4.13, 4.22
.sp 12
Author's present addresses:
A.S. Tanenbaum, H. van Staveren, E.G. Keizer: Mathematics
Dept., Vrije Universiteit, Postbus 7161, 1007 MC Amsterdam,
The Netherlands
J.W. Stevenson: NV Philips, S&I, T&M, Building TQ V5, Eindhoven,
The Netherlands
.AE
.NH 1
Introduction
.PP
As more and more organizations acquire many micro- and minicomputers,
the need for portable compilers is becoming more and more acute.
The present situation, in which each hardware vendor provides its own
compilers -- each with its own deficiencies and extensions, and none of them
compatible -- leaves much to be desired.
The ideal situation would be an integrated system containing a family
of (cross) compilers, each compiler accepting a standard source language and
producing code for a wide variety of target machines.
Furthermore, the compilers should be compatible, so programs written in
one language can call procedures written in another language.
Finally, the system should be designed so as to make adding new languages
and new machines easy.
Such an integrated system is being built at the Vrije Universiteit.
Its design and implementation is the subject of this article.
.PP
Our compiler building system, which is called the "Amsterdam Compiler Kit"
(ACK), can be thought of as a "tool kit."
It consists of a number of parts that can be combined to form compilers
(and interpreters) with various properties.
The tool kit is based on an idea (UNCOL) that was first suggested in 1960
[7], but which never really caught on then.
The problem which UNCOL attempts to solve is how to make a compiler for
each of
.I N
languages on
.I M
different machines without having to write
.I N
x
.I M
programs.
.PP
As shown in Fig. 1, the UNCOL approach is to write
.I N
"front ends," each
of which translates one source language to a common intermediate language,
UNCOL (UNiversal Computer Oriented Language), and
.I M
"back ends," each
of which translates programs in UNCOL to a specific machine language.
Under these conditions, only
.I N
+
.I M
programs must be written to provide all
.I N
languages on all
.I M
machines, instead of
.I N
x
.I M
programs.
.PP
Various researchers have attempted to design a suitable UNCOL
[2,8], but none of these have become popular.
It is our belief that previous attempts have failed because they have been
too ambitious, that is, they have tried to cover all languages
and all machines using a single UNCOL.
Our approach is more modest: we cater only to algebraic languages
and machines whose memory consists of 8-bit bytes, each with its own address.
Typical languages that could be handled include
Ada, ALGOL 60, ALGOL 68, BASIC, C, FORTRAN,
Modula, Pascal, PL/I, PL/M, PLAIN, and RATFOR,
whereas COBOL, LISP, and SNOBOL would be less efficient.
Examples of machines that could be included are the Intel 8080 and 8086,
Motorola 6800, 6809, and 68000, Zilog Z80 and Z8000, DEC PDP-11 and VAX,
and IBM 370 but not the Burroughs 6700, CDC Cyber, or Univac 1108 (because
they are not byte-oriented).
With these restrictions, we believe the old UNCOL idea can be used as the
basis of a practical compiler-building system.
.KF
.sp 15P
.ce 1
Fig. 1. The UNCOL model.
.sp
.KE
.NH 1
An Overview of the Amsterdam Compiler Kit
.PP
The tool kit consists of eight components:
.sp
1. The preprocessor.
2. The front ends.
3. The peephole optimizer.
4. The global optimizer.
5. The back end.
6. The target machine optimizer.
7. The universal assembler/linker.
8. The utility package.
.sp
.PP
A fully optimizing compiler,
depicted in Fig. 2, has seven cascaded phases.
Conceptually, each component reads an input file and writes a
transformed output file to be used as input to the next component.
In practice, some components may use temporary files to allow multiple
passes over the input or internal intermediate files.
.KF
.sp 12P
.ce 1
Fig. 2. Structure of the Amsterdam Compiler Kit.
.sp
.KE
.PP
In the following paragraphs we will briefly describe each component.
After this overview, we will look at all of them again in more detail.
A program to be compiled is first fed into the (language independent)
preprocessor, which provides a simple macro facility,
and similar textual facilties.
The preprocessor's output is a legal program in one of the programming
languages supported, whereas the input is a program possibly augmented
with macros, etc.
.PP
This output goes into the appropriate front end, whose job it is to
produce intermediate code.
This intermediate code (our UNCOL) is the machine language for a simple
stack machine called EM (Encoding Machine).
A typical front end might build a parse tree from the input, and then
use the parse tree to generate EM code, which is similar to reverse Polish.
In order to perform this work, the front end has to maintain tables of
declared variables, labels, etc., determine where to place the
data structures in memory, and so on.
.PP
The EM code generated by the front end is fed into the peephole optimizer,
which scans it with a window of a few instructions, replacing certain
inefficient code sequences by better ones.
Such a search is important because EM contains instructions to handle
numerous important special cases efficiently
(e.g., incrementing a variable by 1).
It is our strategy to relieve the front ends of the burden of hunting for
special cases because there are many front ends and only one peephole
optimizer.
By handling the special cases in the peephole optimizer,
the front ends become simpler, easier to write and easier to maintain.
.PP
Following the peephole optimizer is a global optimizer [5], which
unlike the peephole optimizer, examines the program as a whole.
It builds a data flow graph to make possible a variety of
global optimizations,
among them, moving invariant code out of loops, avoiding redundant
computations, live/dead analysis and eliminating tail recursion.
Note that the output of the global optimizer is still EM code.
.PP
Next comes the back end, which differs from the front ends in a
fundamental way.
Each front end is a separate program, whereas the back end is a single
program that is driven by a machine dependent driving table.
The driving table for a specific machine tells how the EM code is mapped
onto the machine's assembly language.
Although a simple driving table might just macro expand each EM instruction
into a sequence of target machine instructions, a much more sophisticated
translation strategy is normally used, as described later.
For speed, the back end does not actually read in the driving table at run time.
Instead, the tables are compiled along with the back end in advance, resulting
in one binary program per machine.
.PP
The output of the back end is a program in the assembly language of some
particular machine.
The next component in the pipeline reads this program and performs peephole
optimization on it.
The optimizations performed here involve idiosyncracies
of the target machine that cannot be performed in the machine-independent
EM-to-EM peephole optimizer.
Typically these optimizations take advantage of special instructions or special
addressing modes.
.PP
The optimized target machine assembly code then goes into the final
component in the pipeline, the universal assembler/linker.
This program assembles the input to object format, extracting routines from
libraries and including them as needed.
.PP
The final component of the tool kit is the utility package, which contains
various test programs, interpreters for EM code,
EM libraries, conversion programs, and other aids for the implementer and
user.
.NH 1
The Preprocessor
.PP
The function of the preprocessor is to extend all the programming languages
by adding certain generally useful facilities to them in a uniform way.
One of these is a simple macro system, in which the user can give names to
character strings.
The names can be used in the program, with the knowledge that they will be
macro expanded prior to being input to the front end.
Macros can be used for named constants, expanding short "procedures"
in line, etc.
.PP
Another useful facility provided by the preprocessor is the ability to
include compile-time libraries.
On large projects, it is common to have all the declarations and definitions
gathered together in a few files that are textually included in the programs
by instructing the preprocessor to read them in, thus fooling the front end
into thinking that they were part of the source program.
.PP
A third feature of the preprocessor is conditional compilation.
The input program can be split up into labeled sections.
By setting flags, some of the sections can be deleted by the preprocessor,
thus allowing a family of slightly different programs to be conveniently stored
on a single file.
.NH 1
The Front Ends
.PP
A front end is a program that converts input in some source language to a
program in EM.
At present, front ends
exist or are in preparation for Pascal, C, and Plain, and are being considered
for Ada, ALGOL 68, FORTRAN 77, and Modula 2.
Each of the present front ends is independent of all the other ones,
although a general-purpose, table-driven front end is conceivable, provided
one can devise a way to express the semantics of the source language in the
driving tables.
The Pascal front end uses a top-down parsing algorithm (recursive descent),
whereas the C and Plain front ends are bottom-up.
.PP
All front ends, independent of the language being compiled,
produce a common intermediate code called EM, which is
the assembly language for a simple stack machine.
The EM machine is based on a memory architecture
containing a stack for local variables, a (static) data area for variables
declared in the outermost block and global to the whole program, and a heap
for dynamic data structures.
In some ways EM resembles P-code [6], but is more general, since it is
intended for a wider class of languages than just Pascal.
.PP
The EM instruction set has been described elsewhere
[9,10,11]
so we will only briefly summarize it here.
Instructions exist to:
.sp
1. Load a variable or constant of some length onto the stack.
2. Store the top item on the stack in memory.
3. Add, subtract, multiply, divide, etc. the top two stack items.
4. Examine the top one or two stack items and branch conditionally.
5. Call procedures and return from them.
.sp
.PP
Loads and stores come in several variations, corresponding to the most common
programming language semantics, for example, constants, simple variables,
fields of a record, elements of an array, and so on.
Distinctions are also made between variables local to the current block
(i.e., stack frame), those in the outermost block (static storage), and those
at intermediate lexicographic levels, which are accessed by following the
static chain at run time.
.PP
All arithmetic instructions have a type (integer, unsigned, real,
pointer, or set) and an
operand length, which may either be explicit or may be popped from the stack
at run time.
Monadic branch instructions pop an item from the stack and branch if it is
less than zero, less than or equal to zero, etc.
Dyadic branch instructions pop two items, compare them, and branch accordingly.
.PP
In addition to these basic EM instructions, there is a collection of special
purpose instructions (e.g., to increment a local variable), which are typically
produced from the simple ones by the peephole optimizer.
Although the complete EM instruction set contains nearly 150 instructions,
only about 60 of them are really primitive; the rest are simply abbreviations
for commonly occurring EM instruction sequences.
.PP
Of particular interest is the way object sizes are parametrized.
The front ends allow the user to indicate how many bytes an integer, real, etc.
should occupy.
Given this information, the front ends can allocate memory, determining
the placement of variables within the stack frame.
Sizes for primitive types are restricted to 8, 16, 32, 64, etc. bits.
The front ends are also parametrized by the target machine's word length
and address size so they can tell, for example, how many "load" instructions
to generate to move a 32-bit integer.
In the examples used henceforth,
we will assume a 16-bit word size and 16-bit integers.
.PP
Since only byte-addressable target machines are permitted,
it is nearly
always possible to implement any requested sizes on any target machine.
For example, the designer of the back end tables for the Z80 should provide
code for 8-, 16-, and 32-bit arithmetic.
In our view, the Pascal, C, or Plain programmer specifies what lengths
are needed,
without reference to the target machine,
and the back end provides it.
This approach greatly enhances portability.
While it is true that doing all arithmetic using 32-bit integers on the Z80
will not be terribly fast, we feel that if that is what the programmer needs,
it should be possible to implement it.
.PP
Like all assembly languages, EM has not only machine instructions, but also
pseudoinstructions.
These are used to indicate the start and end of each procedure, allocate
and initialize storage for data, and similar functions.
One particularly important pseudoinstruction is the one that is used to
transmit information to the back end for optimization purposes.
It can be used to suggest variables that are good candidates to assign to
registers, delimit the scope of loops, indicate that certain variables
contain a useful value (next operation is a load) or not (next operation is
a store), and various other things.
.NH 1
The Peephole Optimizer
.PP
The peephole optimizer reads in unoptimized EM programs and writes out
optimized ones.
Both the input and output are expressed in a highly compact code, rather than
in ASCII, to reduce the i/o time, which would otherwise dominate the CPU
time.
The program itself is table driven, and is, by and large, ignorant of the
semantics of EM.
The knowledge of EM is contained in a
language- and machine-independent table consisting of about 400
pattern-replacement pairs.
We will briefly describe the kinds of optimizations it performs below;
a more complete discussion can be found in [9].
.PP
Each line in the driving table describes one optimization, consisting of a
pattern part and a replacement part.
The pattern part is a series of one or more EM instructions and a boolean
expression.
The replacement part is a series of EM instructions with operands.
A typical optimization might be:
.sp
LOL LOC ADI STL ($1 = $4) and ($2 = 1) and ($3 = 2) ==> INL $1
.sp
where the text prior to the ==> symbol is the pattern and the text after it is
the replacement.
LOL loads a local variable onto the stack, LOC loads a constant onto the stack,
ADI is integer addition, and STL is store local.
The pattern specifies that four consecutive EM instructions are present, with
the indicated opcodes, and that furthermore the operand of the first
instruction (denoted by $1) and the fourth instruction (denoted by $4) are the
same, the constant pushed by LOC is 1, and the size of the integers added by
ADI is 2 bytes.
(EM instructions have at most one operand, so it is not necessary to specify
the operand number.)
Under these conditions, the four instructions can be replaced by a single INL
(increment local) instruction whose operand is equal to that of LOL.
.PP
Although the optimizations cover a wide range, the main ones
can be roughly divided into the following categories.
\fIConstant folding\fR
is used to evaluate constant expressions, such as 2*3~+~7 at
compile time instead of run time.
\fIStrength reduction\fR
is used to replace one operation, such as multiply, by
another, such as shift.
\fIReordering of expressions\fR
helps in cases like -K/5, which can be better
evaluated as K/-5, because the former requires
a division and a negation, whereas the latter requires only a division.
\fINull instructions\fR
include resetting the stack pointer after a call with 0 parameters,
offsetting zero bytes to access the
first element of a record, or jumping to the next instruction.
\fISpecial instructions\fR
are those like INL, which deal with common special cases
such as adding one to a variable or comparing something to zero.
\fIGroup moves\fR
are useful because a sequence
of consecutive moves can often be replaced with EM code
that allows the back end to generate a loop instead of in line code.
\fIDead code elimination\fR
is a technique for removing unreachable statements, possibly made unreachable
by previous optimizations.
\fIBranch chain compression\fR
can be applied when a branch instruction jumps to another branch instruction.
The first branch can jump directly to the final destination instead of
indirectly.
.PP
The last two optimizations logically belong in the global optimizer but are
in the local optimizer for historical reasons (meaning that the local
optimizer has been the only optimizer for many years and the optimizations were
easy to do there).
.NH 1
The Global Optimizer
.PP
In contrast to the peephole optimizer, which examines the EM code a few lines
at a time through a small window, the global optimizer examines the
program's large scale structure.
Three distinct types of optimizations can be found here:
.sp
1. Interprocedural optimizations.
2. Intraprocedural optimizations.
3. Basic block optimizations.
.sp
We will now look at each of these in turn.
.PP
Interprocedural optimizations are those spanning procedure boundaries.
The most important one is deciding to expand procedures in line,
especially short procedures that occur in loops and pass several parameters.
If it takes more time or memory to pass the parameters than to do the work,
the program can be improved by eliminating the procedure.
The inverse optimization -- discovering long common code sequences and
turning them into a procedure -- is also possible, but much more difficult.
Like much of the global optimizer's work, the decision to make or not make
a certain program transformation is a heuristic one, based on knowledge of
how the back end works, how most target machines are organized, etc.
.PP
The heart of the global optimizer is its analysis of individual
procedures.
To perform this analysis, the optimizer must locate the basic blocks,
instruction sequences which can be entered only at the top and exited
only at the bottom.
It then constructs a data flow graph, with the basic blocks as nodes and
jumps between blocks as arcs.
.PP
From the data flow graph, many important properties of the program can be
discovered and exploited.
Chief among these is the presence of loops, indicated by cycles in the graph.
One important optimization is looking for code that can be moved outside the
loop, either prior to it or subsequent to it.
Such code motion saves execution time, although it does not save memory.
Unrolling loops is also possible and desirable in some cases.
.PP
Another area in which global analysis of loops is especially important is
in register allocation.
While it is true that EM does not have any registers to allocate,
the optimizer can easily collect information to allow the
back end to allocate registers wisely.
For example, the global optimizer can collect static frequency-of-use
and live/dead information about variables.
(A variable is dead at some point in the program if its current value is
not needed, i.e., the next reference to it overwrites it rather than
reading it; if the current value will eventually be used, the variable is
live.)
If two variables are never simultaneously live over some interval of code
(e.g., the body of a loop), they can be packed into a single variable,
which, if used often enough, may warrant being assigned to a register.
.PP
Many loops involve arrays: this leads to other optimizations.
If an array is accessed sequentially, with each iteration using the next
higher numbered element, code improvement is often possible.
Typically, a pointer to the bottom element of each array can be set up
prior to the loop.
Within the loop the element is accessed indirectly via the pointer, which is
also incremented by the element size on each iteration.
If the target machine has an autoincrement addressing mode and the pointer
is assigned to a register, an array access can often be done in a single
instruction.
.PP
Other intraprocedural optimizations include removing tail recursion
(last statement is a recursive call to the procedure itself),
topologically sorting the basic blocks to minimize the number of branch
instructions, and common subexpression recognition.
.PP
The third general class of optimizations done by the global optimizer is
improving the structure of a basic block.
For the most part these involve transforming arithmetic or boolean
expressions into forms that are likely to result in better target code.
As a simple example, A~+~B*C can be converted to B*C~+~A.
The latter can often
be handled by loading B into a register, multiplying the register by C, and
then adding in A, whereas the former may involve first putting A into a
temporary, depending on the details of the code generation table.
Another example of this kind of basic block optimization is transforming
-B~+~A~<~0 into the equivalent, but simpler, A~<~B.
.NH 1
The Back End
.PP
The back end reads a stream of EM instructions and generates assembly code
for the target machine.
Although the algorithm itself is machine independent, for each target
machine a machine dependent driving table must be supplied.
The driving table effectively defines the mapping of EM code to target code.
.PP
It will be convenient to think of the EM instructions being read as a
stream of tokens.
For didactic purposes, we will concentrate on two kinds of tokens:
those that load something onto the stack, and those that perform some operation
on the top one or two values on the stack.
The back end maintains at compile time a simulated stack whose behavior
mirrors what the stack of a hardware EM machine would do at run time.
If the current input token is a load instruction, a new entry is pushed onto
the simulated stack.
.PP
Consider, as an example, the EM code produced for the statement K~:=~I~+~7.
If K and I are
2-byte local variables, it will normally be LOL I; LOC 7; ADI~2; STL K.
Initially the simulated stack is empty.
After the first token has been read and processed, the simulated stack will
contain a stack token of type MEM with attributes telling that it is a local,
giving its address, etc.
After the second token has been read and processed, the top two tokens on the
simulated stack will be CON (constant) on top and MEM directly underneath it.
.PP
At this point the back end reads the ADI~2 token and
looks in the driving table to find a line or lines that define the
action to be taken for ADI~2.
For a typical multiregister machine, instructions will exist to add constants
to registers, but not to memory.
Consequently, the driving table will not contain an entry for ADI~2 with stack
configuration CON, MEM.
.PP
The back end is now faced with the problem of how to get from its
current stack configuration, CON, MEM, which is not listed, to one that is
listed.
The table will normally contain rules (which we call "coercions")
for converting between CON, REG, MEM, and similar tokens.
Therefore the back end attempts to "coerce" the stack into a configuration
that
.I is
present in the table.
A typical coercion rule might tell how to convert a MEM into
a REG, namely by performing the actions of allocating a
register and emitting code to move the memory word to that register.
Having transformed the compile-time stack into a configuration allowed for
ADI~2, the rule can be carried out.
A typical rule
for ADI~2 might have stack configuration REG, MEM
and would emit code to add the MEM to the REG, leaving the stack
with a single REG token instead of the REG and MEM tokens present before the
ADI~2.
.PP
In general, there will be more than one possible coercion path.
Assuming reasonable coercion rules for our example,
we might be able to convert
CON MEM into CON REG by loading the variable I into a register.
Alternatively, we could coerce CON to REG by loading the constant into a register.
The first coercion path does the add by first loading I into a register and
then adding 7 to it.
The second path first loads 7 into a register and then adds I to it.
On machines with a fast LOAD IMMEDIATE instruction for small constants
but no fast ADD IMMEDIATE, or vice
versa, one code sequence will be preferable to the other.
.PP
In fact, we actually have more choices than suggested above.
In both coercion paths a register must be allocated.
On many machines, not every register can be used in every operation, so the
choice may be important.
On some machines, for example, the operand of a multiply must be in an odd
register.
To summarize, from any state (i.e., token and stack configuration), a
variety of choices can be made, leading to a variety of different target
code sequences.
.PP
To decide which of the various code sequences to emit, the back end must have
some information about the time and memory cost of each one.
To provide this information, each rule in the driving table, including
coercions, specifies both the time and memory cost of the code emitted when
the rule is applied.
The back end can then simply try each of the legal possibilities (including all
the possible register allocations) to find the cheapest one.
.PP
This situation is similar to that found in a chess or other game-playing
program, in which from any state a finite number of moves can be made.
Just as in a chess program, the back end can look at all the "moves" that can
be made from each state reachable from the original state, and thus find the
sequence that gives the minimum cost to a depth of one.
More generally, the back end can evaluate all paths corresponding to accepting
the next
.I N
input tokens, find the cheapest one, and then make the first move along
that path, precisely the way a chess program would.
.PP
Since the back end is analogous to both a parser and a chess playing program,
some clarifying remarks may be helpful.
First, chess programs and the back end must do some look ahead, whereas the
parser for a well-designed grammar can usually suffice with one input token
because grammars are supposed to be unambiguous.
In contrast, many legal mappings
from a sequence of EM instructions to target code may exist.
Second, like a parser but unlike a chess program, the back end has perfect
information -- it does not have to contend with an unpredictable opponent's
moves.
Third, chess programs normally make a static evaluation of the board and
label the
.I nodes
of the tree with the resulting scores.
The back end, in contrast, associates costs with
.I arcs
(moves) rather than nodes (states).
However, the difference is not essential, since it could
also label each node with the cumulative cost from the root to that node.
.PP
As mentioned above, the cost field in the table contains
.I both
the time and memory costs for the code emitted.
It should be clear that the back end could use either one
or some linear combination of them as the scoring function for evaluating moves.
A user can instruct the compiler to optimize for time or for memory or
for, say, 0.3 x time + 0.7 x memory.
Thus the same compiler can provide a wide range of performance options to
the user.
The writer of the back end table can take advantage of this flexibility by
providing several code sequences with different tradeoffs for each EM
instruction (e.g., in line code vs. call to a run time routine).
.PP
In addition to the time-space tradeoffs, by specifying the depth of search
parameter,
.I N ,
the user can effectively also tradeoff compile time vs. object
code quality, for whatever code metric has been chosen.
In summary, by combining the properties of a parser and a game playing program,
it is possible to make a code generator that is table driven,
highly flexible, and has the ability to produce good code from a
stack machine intermediate code.
.NH 1
The Target Machine Optimizer
.PP
In the model of Fig 2., the peephole optimizer comes before the global
optimizer.
It may happen that the code produced by the global optimizer can also
be improved by another round of peephole optimization.
Conceivably, the system could have been designed to iterate peephole and
global optimizations until no more of either could be performed.
.PP
However, both of these optimizations are done on the machine independent
EM code.
Neither is able to take advantage of the peculiarities and idiosyncracies with
which most target machines are well endowed.
It is the function of the final
optimizer to do any (peephole) optimizations that still remain.
.PP
The algorithm used here is the same as in the EM peephole optimizer.
In fact, if it were not for the differences between EM syntax, which is
very restricted, and target assembly language syntax,
which is less so, precisely the same program could be used for both.
Nevertheless, the same ideas apply concerning patterns and replacements, so
our discussion of this optimizer will be restricted to one example.
.PP
To see what the target optimizer might do, consider the
PDP-11 instruction sequence sub #2,r0; mov (r0),x.
First 2 is subtracted from register 0, then the word pointed to by it
is moved to x.
The PDP-11 happens to have an addressing mode to perform this sequence in
one instruction: mov -(r0),x.
Although it is conceivable that this instruction could be included in the
back end driving table for the PDP-11, it is awkward to do so because it
can occur in so many contexts.
It is much easier to catch things like this in a separate program.
.NH 1
The Universal Assembler/Linker
.PP
Although assembly languages for different machines may appear very different
at first glance, they have a surprisingly large intersection.
We have been able to construct an assembler/linker that is almost entirely
independent of the assembly language being processed.
To tailor the program to a specific assembly language, it is necessary to
supply a table giving the list of instructions, the bit patterns required for
each one, and the language syntax.
The machine independent part of the assembler/linker is then compiled with the
table to produce an assembler and linker for a particular target machine.
Experience has shown that writing the necessary table for a new machine can be
done in less than a week.
.PP
To enforce a modicum of uniformity, we have chosen to use a common set of
pseudoinstructions for all target machines.
They are used to initialize memory, allocate uninitialized memory, determine the
current segment, and similar functions found in most assemblers.
.PP
The assembler is also a linker.
After assembling a program, it checks to see if there are any
unsatisfied external references.
If so, it begins reading the libraries to find the necessary routines, including
them in the object file as it finds them.
This approach requires libraries to be maintained in assembly language form,
but eliminates the need for inventing a language to express relocatable
object programs in a machine independent way.
It also simplifies the assembler, since producing absolute object code is
easier than producing relocatable object code.
Finally, although assembly language libraries may be somewhat larger than
relocatable object module libraries, the loss in speed due to having more
input may be more than compensated for by not having to pass an intermediate
file between the assembler and linker.
.NH 1
The Utility Package
.PP
The utility package is a collection of programs designed to aid the
implementers of new front ends or new back ends.
The most useful ones are the test programs.
For example, one test set, EMTEST, systematically checks out a back end by
executing an ever larger subset of the EM instructions.
It starts out by testing LOC, LOL and a few of the other essential instructions.
If these appear to work, it then tries out new instructions one at a time,
adding them to the set of instructions "known" to work as they pass the tests.
.PP
Each instruction is tested with a variety of operands chosen from values
where problems can be expected.
For example, on target machines which have 16-bit index registers but only
allow 8-bit displacements, a fundamentally different algorithm may be needed
for accessing
the first few bytes of local variables and those with offsets of thousands.
The test programs have been carefully designed to thoroughly test all relevant
cases.
.PP
In addition to EMTEST, test programs in Pascal, C, and other languages are also
available.
A typical test is:
.sp
i := 9; \fBif\fP i + 250 <> 259 \fBthen\fP error(16);
.sp
Like EMTEST, the other test programs systematically exercise all features of the
language being tested, and do so in a way that makes it possible to pinpoint
errors precisely.
While it has been said that testing can only demonstrate the presence of errors
and not their absence, our experience is that
the test programs have been invaluable in debugging new parts of the system
quickly.
.PP
Other utilities include programs to convert
the highly compact EM code produced by front ends to ASCII and vice versa,
programs to build various internal tables from human writable input formats,
a variety of libraries written in or compiled to EM to make them portable,
an EM assembler, and EM interpreters for various machines.
.PP
Interpreting the EM code instead of translating it to target machine language
is useful for several reasons.
First, the interpreters provide extensive run time diagnostics including
an option to list the original source program (in Pascal, C, etc.) with the
execution frequency or execution time for each source line printed in the
left margin.
Second, since an EM program is typically about one-third the size of a
compiled program, large programs can be executed on small machines.
Third, running the EM code directly makes it easier to pinpoint errors in
the EM output of front ends still being debugged.
.NH 1
Summary and Conclusions
.PP
The Amsterdam Compiler Kit is a tool kit for building
portable (cross) compilers and interpreters.
The main pieces of the kit are the front ends, which convert source programs
to EM code, optimizers, which improve the EM code, and back ends, which convert
the EM code to target assembly language.
The kit is highly modular, so writing one front end
(and its associated runtime routines)
is sufficient to implement
a new language on a dozen or more machines, and writing one back end table
and one universal assembler/linker table is all that is needed to bring up all
the previously implemented languages on a new machine.
In this manner, the contents, and hopefully the usefulness, of the toolkit
will increase in time.
.PP
We believe the principal lesson to be learned from our work is that the old
UNCOL idea is basically a sound way to produce compilers, provided suitable
restrictions are placed on the source languages and target machines.
We also believe that although compilers produced by this technology may not
be equal to the very best handcrafted compilers,
in terms of object code quality, they are certainly
competitive with many existing compilers.
However, when one factors in the cost of producing the compiler,
the possible slight loss in performance may be more than compensated for by the
large decrease in production cost.
As a consequence of our work and similar work by other researchers [1,3,4],
we expect integrated compiler building kits to become increasingly popular
in the near future.
.PP
The toolkit is now available for various computers running the
.UX
operating system.
For information, contact the authors.
.NH 1
References
.LP
.nr r 0 1
.in +4
.ti -4
\fB~\n+r.\fR Graham, S.L.
Table-Driven Code Generation.
.I "Computer~13" ,
8 (August 1980), 25-34.
.PP
A discussion of systematic ways to do code generation,
in particular, the idea of having a table with templates that match parts of
the parse tree and convert them into machine instructions.
.sp 2
.ti -4
\fB~\n+r.\fR Haddon, B.K., and Waite, W.M.
Experience with the Universal Intermediate Language Janus.
.I "Software Practice & Experience~8" ,
5 (Sept.-Oct. 1978), 601-616.
.PP
An intermediate language for use with ALGOL 68, Pascal, etc. is described.
The paper discusses some problems encountered and how they were dealt with.
.sp 2
.ti -4
\fB~\n+r.\fR Johnson, S.C.
A Portable Compiler: Theory and Practice.
.I "Ann. ACM Symp. Prin. Prog. Lang." ,
Jan. 1978.
.PP
A cogent discussion of the portable C compiler.
Particularly interesting are the author's thoughts on the value of
computer science theory.
.sp 2
.ti -4
\fB~\n+r.\fR Leverett, B.W., Cattell, R.G.G, Hobbs, S.O., Newcomer, J.M.,
Reiner, A.H., Schatz, B.R., and Wulf, W.A.
An Overview of the Production-Quality Compiler-Compiler Project.
.I Computer~13 ,
8 (August 1980), 38-49.
.PP
PQCC is a system for building compilers similar in concept but differing in
details from the Amsterdam Compiler Kit.
The paper describes the intermediate representation used and the code generation
strategy.
.sp 2
.ti -4
\fB~\n+r.\fR Lowry, E.S., and Medlock, C.W.
Object Code Optimization.
.I "Commun.~ACM~12",
(Jan. 1969), 13-22.
.PP
A classic paper on global object code optimization.
It covers data flow analysis, common subexpressions, code motion, register
allocation and other techniques.
.sp 2
.ti -4
\fB~\n+r.\fR Nori, K.V., Ammann, U., Jensen, K., Nageli, H.
The Pascal P Compiler Implementation Notes.
Eidgen. Tech. Hochschule, Zurich, 1975.
.PP
A description of the original P-code machine, used to transport the Pascal-P
compiler to new computers.
.sp 2
.ti -4
\fB~\n+r.\fR Steel, T.B., Jr. UNCOL: the Myth and the Fact. in
.I "Ann. Rev. Auto. Prog."
Goodman, R. (ed.), vol 2., (1960), 325-344.
.PP
An introduction to the UNCOL idea by its originator.
.sp 2
.ti -4
\fB~\n+r.\fR Steel, T.B., Jr.
A First Version of UNCOL.
.I "Proc. Western Joint Comp. Conf." ,
(1961), 371-377.
.PP
The first detailed proposal for an UNCOL. By current standards it is a
primitive language, but it is interesting for its historical perspective.
.sp 2
.ti -4
\fB~\n+r.\fR Tanenbaum, A.S., van Staveren, H., and Stevenson, J.W.
Using Peephole Optimization on Intermediate Code.
.I "ACM Trans. Prog. Lang. and Sys. 3" ,
1 (Jan. 1982) pp. 21-36.
.PP
A detailed description of a table-driven peephole optimizer.
The driving table provides a list of patterns to match as well as the
replacement text to use for each successful match.
.sp 2
.ti -4
\fB\n+r.\fR Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren, H.
Description of an Experimental Machine Architecture for use with Block
Structured Languages.
Informatica Rapport 81, Vrije Universiteit, Amsterdam, 1983.
.PP
The defining document for EM.
.sp 2
.ti -4
\fB\n+r.\fR Tanenbaum, A.S.
Implications of Structured Programming for Machine Architecture.
.I "Comm. ACM~21" ,
3 (March 1978), 237-246.
.PP
The background and motivation for the design of EM.
This early version emphasized the idea of interpreting the intermediate
code (then called EM-1) rather than compiling it.

View file

@ -1,303 +0,0 @@
.\" $Header$
.wh 0 hd
.wh 60 fo
.de hd
'sp 5
..
.de fo
'bp
..
.nr e 0 1
.de ER
.br
.ne 20
.sp 2
.in 5
.ti -5
ERROR \\n+e:
..
.de PS
.sp
.nf
.in +5
..
.de PE
.sp
.fi
.in -5
..
.sp 3
.ce
UNIX version 7 bugs
.sp 3
This document describes the UNIX version 7 errors fixed at the
Vrije Universiteit, Amsterdam.
Several of these are discovered at the VU.
Others are quoted from a list of bugs distributed by BellLabs.
.sp
For each error the differences between the original and modified
source files are given,
as well as a test program.
.ER
C optimizer bug for unsigned comparison
.sp
The following C program caused an IOT trap, while it should not
(compile with 'cc -O prog.c'):
.PS
unsigned i = 0;
main() {
register j;
j = -1;
if (i > 40000)
abort();
}
.PE
BellLabs suggests to make the following patch in c21.c:
.PS
/* modified /usr/src/cmd/c/c21.c */
189 if (r==0) {
190 /* next 2 lines replaced as indicated by
191 * Bell Labs bug distribution ( v7optbug )
192 p->back->back->forw = p->forw;
193 p->forw->back = p->back->back;
194 End of lines changed */
195 if (p->forw->op==CBR
196 || p->forw->op==SXT
197 || p->forw->op==CFCC) {
198 p->back->forw = p->forw;
199 p->forw->back = p->back;
200 } else {
201 p->back->back->forw = p->forw;
202 p->forw->back = p->back->back;
203 }
204 /* End of new lines */
205 decref(p->ref);
206 p = p->back->back;
207 nchange++;
208 } else if (r>0) {
.PE
Use the previous program to test before and after the modification.
.ER
The loader fails for large data or text portions
.sp
The loader 'ld' produces a "local symbol botch" error
for the following C program.
.PS
int big1[10000] = {
1
};
int big2[10000] = {
2
};
main() {
printf("loader is fine\\n");
}
.PE
We have made the following fix:
.PS
/* original /usr/src/cmd/ld.c */
113 struct {
114 int fmagic;
115 int tsize;
116 int dsize;
117 int bsize;
118 int ssize;
119 int entry;
120 int pad;
121 int relflg;
122 } filhdr;
/* modified /usr/src/cmd/ld.c */
113 /*
114 * The original Version 7 loader had problems loading large
115 * text or data portions.
116 * Why not include <a.out.h> ???
117 * then they would be declared unsigned
118 */
119 struct {
120 int fmagic;
121 unsigned tsize; /* not int !!! */
122 unsigned dsize; /* not int !!! */
123 unsigned bsize; /* not int !!! */
124 unsigned ssize; /* not int !!! */
125 unsigned entry; /* not int !!! */
126 unsigned pad; /* not int !!! */
127 unsigned relflg; /* not int !!! */
128 } filhdr;
.PE
.ER
Floating point registers
.sp
When a program is swapped to disk if it needs more memory,
then the floating point registers were not saved, so that
it may have different registers when it is restarted.
A small assembly program demonstrates this for the status register.
If the error is not fixed, then the program generates an IOT error.
A "memory fault" is generated if all is fine.
.PS
start: ldfps $7400
1: stfps r0
mov r0,-(sp)
cmp r0,$7400
beq 1b
4
.PE
You have to dig into the kernel to fix it.
The following patch will do:
.PS
/* original /usr/sys/sys/slp.c */
563 a2 = malloc(coremap, newsize);
564 if(a2 == NULL) {
565 xswap(p, 1, n);
566 p->p_flag |= SSWAP;
567 qswtch();
568 /* no return */
569 }
/* modified /usr/sys/sys/slp.c */
590 a2 = malloc(coremap, newsize);
591 if(a2 == NULL) {
592 #ifdef FPBUG
593 /*
594 * copy floating point register and status,
595 * but only if you must switch processes
596 */
597 if(u.u_fpsaved == 0) {
598 savfp(&u.u_fps);
599 u.u_fpsaved = 1;
600 }
601 #endif
602 xswap(p, 1, n);
603 p->p_flag |= SSWAP;
604 qswtch();
605 /* no return */
606 }
.PE
.ER
Floating point registers.
.sp
A similar problem arises when a process forks.
The child will have random floating point registers as is
demonstrated by the following assembly language program.
The child process will die by an IOT trap and the father prints
the message "child failed".
.PS
exit = 1.
fork = 2.
write = 4.
wait = 7.
start: ldfps $7400
sys fork
br child
sys wait
tst r1
bne bad
stfps r2
cmp r2,$7400
beq start
4
child: stfps r2
cmp r2,$7400
beq ex
4
bad: clr r0
sys write;mess;13.
ex: clr r0
sys exit
.data
mess: <child failed\\n>
.PE
The same file slp.c should be patched as follows:
.PS
/* original /usr/sys/sys/slp.c */
499 /*
500 * When the resume is executed for the new process,
501 * here's where it will resume.
502 */
503 if (save(u.u_ssav)) {
504 sureg();
505 return(1);
506 }
507 a2 = malloc(coremap, n);
508 /*
509 * If there is not enough core for the
510 * new process, swap out the current process to generate the
511 * copy.
512 */
/* modified /usr/sys/sys/slp.c */
519 /*
520 * When the resume is executed for the new process,
521 * here's where it will resume.
522 */
523 if (save(u.u_ssav)) {
524 sureg();
525 return(1);
526 }
527 #ifdef FPBUG
528 /* copy the floating point registers and status to child */
529 if(u.u_fpsaved == 0) {
530 savfp(&u.u_fps);
531 u.u_fpsaved = 1;
532 }
533 #endif
534 a2 = malloc(coremap, n);
535 /*
536 * If there is not enough core for the
537 * new process, swap out the current process to generate the
538 * copy.
539 */
.PE
.ER
/usr/src/libc/v6/stat.c
.sp
Some system calls are changed from version 6 to version 7.
A library of system call entries, that make a version 6 UNIX look like
a version 7 system, is provided to enable you to run some
useful version 7 utilities, like 'tar', on UNIX-6.
The entry for 'stat' contained two bugs:
the 24-bit file size was incorrectly converted to 32 bits
(sign extension of bit 15)
and the uid/gid fields suffered from sign extension.
.sp
Transferring your files from version 6 to version 7 using 'tar'
will fail for all files for which
.sp
( (size & 0100000) != 0 )
.sp
These two errors are fixed if stat.c is modified as follows:
.PS
/* original /usr/src/libc/v6/stat.c */
11 char os_size0;
12 short os_size1;
13 short os_addr[8];
49 buf->st_nlink = osbuf.os_nlinks;
50 buf->st_uid = osbuf.os_uid;
51 buf->st_gid = osbuf.os_gid;
52 buf->st_rdev = 0;
/* modified /usr/src/libc/v6/stat.c */
11 char os_size0;
12 unsigned os_size1;
13 short os_addr[8];
49 buf->st_nlink = osbuf.os_nlinks;
50 buf->st_uid = osbuf.os_uid & 0377;
51 buf->st_gid = osbuf.os_gid & 0377;
52 buf->st_rdev = 0;
.PE

View file

@ -1,753 +0,0 @@
.\" $Header$
.ll 72
.wh 0 hd
.wh 60 fo
.de hd
'sp 5
..
.de fo
'bp
..
.tr ~
. PARAGRAPH
.de PP
.sp
..
. CHAPTER
.de CH
.br
.ne 15
.sp 3
.in 0
\\fB\\$1\\fR
.in 5
.PP
..
. SUBCHAPTER
.de SH
.br
.ne 10
.sp
.in 5
\\fB\\$1\\fR
.in 10
.PP
..
. INDENT START
.de IS
.sp
.in +5
..
. INDENT END
.de IE
.in -5
.sp
..
. DOUBLE INDENT START
.de DS
.sp
.in +5
.ll -5
..
. DOUBLE INDENT END
.de DE
.ll +5
.in -5
.sp
..
. EQUATION START
.de EQ
.sp
.nf
..
. EQUATION END
.de EN
.fi
.sp
..
. TEST
.de TT
.ti -5
Test~\\$1:~
.br
..
. IMPLEMENTATION 1
.de I1
.br
Implementation~1:
..
. IMPLEMENTATION 2
.de I2
.br
Implementation~2:
..
.de CS
.br
~-~\\
..
.br
.fi
.sp 5
.ce
\fBPascal Validation Suite Report\fR
.CH "Pascal processor identification"
The ACK-Pascal compiler produces code for an EM machine
as defined in [1].
It is up to the implementor of the EM machine whether errors like
integer overflow, undefined operand and range bound error are recognized or not.
Therefore it depends on the EM machine implementation whether these errors
are recognized in Pascal programs or not.
The validation suite results of all known implementations are given.
.PP
There does not (yet) exist a hardware EM machine.
Therefore, EM programs must be interpreted, or translated into
instructions for a target machine.
The following implementations currently exist:
.IS
.I1
an interpreter running on a PDP-11 (using UNIX).
The normal mode of operation for this interpreter is to check
for undefined integers, overflow, range errors etc.
.sp
.I2
a translator into PDP-11 instructions (using UNIX).
Less checks are performed than in the interpreter, because the translator
is intended to speed up the execution of well-debugged programs.
.IE
.CH "Test Conditions"
Tester: E.G. Keizer
.br
Date: October 1983
.br
Validation Suite version: 3.0
.PP
The final test run is made with a slightly
modified validation suite.
.SH "Erroneous programs"
Some test did not conform to the standard proposal of February 1979.
It is this version of the standard proposal that is used
by the authors of the validation suite.
.IS
.TT 6.6.3.7-4
The semicolon between high and integer on line 17 is replaced
by a colon.
.sp
.TT 6.7.2.2-13
The div operator on line 14 replaced by mod.
.CH "Conformance tests"
Number of tests passed = 150
.br
Number of tests failed = 6
.SH "Details of failed tests"
.IS
.TT 6.1.2-1
Character sequences starting with the 8 characters 'procedur'
or 'function' are
erroneously classified as the word-symbols 'procedure' and 'function'.
.sp
.TT 6.1.3-2
Identifiers identical in the first eight characters, but
differing in ninth or higher numbered characters are treated as
identical.
.sp
.TT 6.5.1-1
ACK-Pascal requires all formal program parameters to be
declared with type \fIfile\fP.
.sp
.TT 6.6.6.5-1
Gives run-time error eof seen at call to eoln.
A have a hunch that this is a error in the suit.
.sp
.TT 6.6.4.1-1
Redefining the names of some standard procedures leads to incorrect
behaviour of the runtime system.
In this case it crashes without a sensible error message.
.sp
.TT 6.9.3.5.1-1
This test can not be translated by our compiler because two
non-identical variables are used in the same block with the same first eight
characters.
The test passed after replacement of one of those names.
.IE
.CH "Deviance tests"
Number of deviations correctly detected = 120
.br
Number of tests not detecting deviations = 20
.SH "Details of deviations"
The following tests are compiled without a proper error
indication although they do
not conform to the standard.
.IS
.TT 6.1.6-5
ACK-Pascal allows labels in the range 0..32767.
A warning is produced when testing for deviations from the
standard.
.sp
.TT 6.1.8-5
A missing space between a number and a word symbol is not
detected.
.sp
.TT 6.2.2-8
.TT 6.3-6
.TT 6.4.1-3
.TT 6.6.1-3
.TT 6.6.1-4
Undetected scope error. The scope of an identifier should start at the
beginning of the block in which it is declared.
In the ACK-Pascal compiler the scope starts just after the declaration,
however.
.sp
.TT 6.4.3.3-7
The values of fields from one variant are accessible from
another variant.
The correlation is exact.
.sp
.TT 6.6.3.3-4
The passing as a variable parameter of the selector of a
variant part is not detected.
A runtime error is produced because the variant selector is not
initialized.
.sp
.TT 6.8.2.4-2
.TT 6.8.2.4-3
.TT 6.8.2.4-4
.TT 6.8.2.4-5
.TT 6.8.2.4-6
The ACK-Pascal compiler does not restrict the places from where
you may jump to a label by means of a goto-statement.
.sp
.TT 6.8.3.9-5
.TT 6.8.3.9-6
.TT 6.8.3.9-7
.TT 6.8.3.9-16
There are no errors produced for assignments to a variable
in use as control-variable of a for-statement.
.TT 6.8.3.9-8
.TT 6.8.3.9-9
Use of a controlled variable after leaving the loop without
intervening initialization is not detected.
.IE
.CH "Error handling"
The results depend on the EM implementation.
.sp
Number of errors correctly detected =
.in +5
.I1
32
.I2
17
.in -5
Number of errors not detected =
.in +5
.I1
21
.I2
36
.in -5
Number of errors incorrectly detected =
.in +5
.I1
2
.I2
2
.in -5
.SH "Details of errors not detected"
The following test fails because the ACK-Pascal compiler only
generates a warning that does not prevent to run the tests.
.IS
.TT 6.6.2-8
A warning is produced if there is no assignment to a function-identifier.
.IE
With this test the ACK-Pascal compiler issues an error message for a legal
construct not directly related to the error to be detected.
.IS
.TT 6.5.5-2
Program does not compile.
Buffer variable of text file is not allowed as variable
parameter.
.IE
The following errors are not detected at all.
.IS
.TT 6.2.1-11
.I2
The use of an undefined integer is not caught as an error.
.sp
.TT 6.4.3.3-10
.TT 6.4.3.3-11
.TT 6.4.3.3-12
.TT 6.4.3.3-13
The notion of 'current variant' is not implemented, not even if a tagfield
is present.
.sp
.TT 6.4.5-15
.TT 6.4.6-9
.TT 6.4.6-10
.TT 6.4.6-11
.TT 6.5.3.2-2
.I2
Subrange bounds are not checked.
.sp
.TT 6.4.6-12
.TT 6.4.6-13
.TT 6.7.2.4-4
If the base-type of a set is a subrange, then the set elements are not checked
against the bounds of the subrange.
Only the host-type of this subrange-type is relevant for ACK-Pascal.
.sp
.TT 6.5.4-1
.I2
Nil pointers are not detected.
.sp
.TT 6.5.4-2
.I2
Undefined pointers are not detected.
.sp
.TT 6.5.5-3
Changing the file position while the window is in use as actual variable
parameter or as an element of the record variable list of a with-statement
is not detected.
.sp
.TT 6.6.2-9
An undefined function result is not detected,
because it is never used in an expression.
.sp
.TT 6.6.5.3-6
.TT 6.6.5.3-7
Disposing a variable while it is in use as actual variable parameter or
as an element of the record variable list of a with-statement is not detected.
.sp
.TT 6.6.5.3-8
.TT 6.6.5.3-9
.TT 6.6.5.3-10
It is not detected that a record variable, created with the variant form
of new, is used as an operand in an expression or as the variable in an
assignment or as an actual value parameter.
.sp
.TT 6.6.5.3-11
Use of a variable that is not reinitialized after a dispose is
not detected.
.sp
.TT 6.6.6.4-4
.TT 6.6.6.4-5
.TT 6.6.6.4-7
.I2
There are no range checks for pred, succ and chr.
.sp
.TT 6.6.6.5-6
ACK-Pascal considers a rewrite of a file as a defining
occurence.
.sp
.TT 6.7.2.2-8
.TT 6.7.2.2-9
.TT 6.7.2.2-10
.TT 6.7.2.2-12
.I2
Division by 0 or integer overflow is not detected.
.sp
.TT 6.8.3.9-18
The use of the some control variable in two nested for
statements in not detected.
.sp
.TT 6.8.3.9-19
Access of a control variable after leaving the loop results in
the final-value, although an error should be produced.
.sp
.TT 6.9.3.2-3
The program stops with a file not open error.
The rewrite before the write is missing in the program.
.sp
.TT 6.9.3.2-4
.TT 6.9.3.2-5
Illegal FracDigits values are not detected.
.CH "Implementation dependence"
Number of tests run = 14
.br
Number of tests incorrectly handled = 0
.SH "Details of implementation dependence"
.IS
.TT 6.1.9-5
Alternate comment delimiters are implemented
.sp
.TT 6.1.9-6
The equivalent symbols @ for ^, (. for [ and .) for ] are not
implemented.
.sp
.TT 6.4.2.2-10
Maxint = 32767
.sp
.TT 6.4.3.4-5
Only elements with non-negative ordinal value are allowed in sets.
.sp
.TT 6.6.6.1-1
Standard procedures and functions are not allowed as parameters.
.sp
.TT 6.6.6.2-11
Details of the machine characteristics regarding real numbers:
.IS
.nf
beta = 2
t = 56
rnd = 1
ngrd = 0
machep = -56
negep = -56
iexp = 8
minexp = -128
maxexp = 127
eps = 1.387779e-17
epsneg = 1.387779e-17
xmin = 2.938736e-39
xmax = 1.701412e+38
.fi
.IE
.sp
.TT 6.7.2.3-3
.TT 6.7.2.3-4
All operands of boolean expressions are evaluated.
.sp
.TT 6.8.2.2-1
.TT 6.8.2.2-2
The expression in an assignment statement is evaluated
before the variable selection if this involves pointer
dereferencing or array indexing.
.sp
.TT 6.8.2.3-2
Actual parameters are evaluated in reverse order.
.sp
.TT 6.9.3.2-6
The default width for integer, Boolean and real are 6, 5 and 13.
.sp
.TT 6.9.3.5.1-2
The number of digits written in an exponent is 2.
.sp
.TT 6.9.3.6-1
The representations of true and false are (~true) and (false).
The parenthesis serve to indicate width.
.IE
.CH "Quality measurement"
Number of tests run = 60
.br
Number of tests handled incorrectly = 1
.SH "Results of tests"
Several test perform operations on reals on indicate the error
introduced by these operations.
For each of these tests the following two quality measures are extracted:
.sp
.in +5
maxRE:~~maximum relative error
.br
rmsRE:~~root-mean-square relative error
.in -5
.sp 2
.IS
.TT 1.2-1
.I1
25 thousand Whetstone instructions per second.
.I2
169 thousand Whetstone instructions per second.
.sp
.TT 1.2-2
The value of (TRUEACC-ACC)*2^56/100000 is 1.4 .
This is well within the bounds specified in [3].
.br
The GAMM measure is:
.I1
238 microseconds
.I2
26.3 microseconds.
.sp
.TT 1.2-3
The number of procedure calls calculated in this test exceeds
the maximum integer value.
The program stops indicating overflow.
.sp
.TT 6.1.3-3
The number of significant characters for identifiers is 8.
.sp
.TT 6.1.5-8
There is no maximum to the line length.
.sp
.TT 6.1.5-9
The error message "too many digits" is given for numbers larger
than maxint.
.sp
.TT 6.1.5-10
.TT 6.1.5-11
.TT 6.1.5-12
Normal values are allowed for real constants and variables.
.sp
.TT 6.1.7-14
A reasonably large number of strings is allowed.
.sp
.TT 6.1.8-6
No warning is given for possibly unclosed comments.
.sp
.TT 6.2.1-12
.TT 6.2.1-13
.TT 6.2.1-14
.TT 6.2.1-15
.TT 6.5.1-2
Large lists of declarations are possible in each block.
.sp
.TT 6.4.3.2-6
An 'array[integer] of' is not allowed.
.sp
.TT 6.4.3.2-7
.TT 6.4.3.2-8
Large values are allowed for arrays and indices.
.sp
.TT 6.4.3.3-14
Large amounts of case-constant values are allowed in variants.
.sp
.TT 6.4.3.3-15
Large amounts of record sections can appear in the fixed part of
a record.
.sp
.TT 6.4.3.3-16
Large amounts of variants are allowed in a record.
.TT 6.4.3.4-4
Size and speed of Warshall's algorithm depend on the
implementation of EM:
.IS
.I1
.br
size: 122 bytes
.br
speed: 5.2 seconds
.sp
.I2
.br
size: 196 bytes
.br
speed: 0.7 seconds
.IE
.TT 6.5.3.2-3
Deep nesting of array indices is allowed.
.sp
.TT 6.5.3.2-4
.TT 6.5.3.2-5
Arrays can have at least 8 dimensions.
.sp
.TT 6.6.1-8
Deep static nesting of procedure is allowed.
.sp
.TT 6.6.3.1-6
Large amounts of formal parameters are allowed.
.sp
.TT 6.6.5.3-12
Dispose is fully implemented.
.sp
.TT 6.6.6.2-6
Test sqrt(x): no errors.
The error is within acceptable bounds.
.in +5
maxRE:~~2~**~-55.50
.br
rmsRE:~~2~**~-57.53
.in -5
.sp
.TT 6.6.6.2-7
Test arctan(x): may cause underflow or overflow errors.
The error is within acceptable bounds.
.in +5
.br
maxRE:~~2~**~-55.00
.br
rmsRE:~~2~**~-56.36
.in -5
.sp
.TT 6.6.6.2-8
Test exp(x): may cause underflow or overflow errors.
The error is not within acceptable bounds.
.in +5
maxRE:~~2~**~-50.03
.br
rmsRE:~~2~**~-51.03
.in -5
.sp
.TT 6.6.6.2-9
Test sin(x): may cause underflow errors.
The error is not within acceptable bounds.
.in +5
maxRE:~~2~**~-38.20
.br
rmsRE:~~2~**~-43.68
.in -5
.sp
Test cos(x): may cause underflow errors.
The error is not within acceptable bounds.
.in +5
maxRE:~~2~**~-41.33
.br
rmsRE:~~2~**~-46.62
.in -5
.sp
.TT 6.6.6.2-10
Test ln(x):
The error is not within acceptable bounds.
.in +5
maxRE:~~2~**~-54.05
.br
rmsRE:~~2~**~-55.77
.in -5
.sp
.TT 6.7.1-3
.TT 6.7.1-4
.TT 6.7.1-5
Complex nested expressions are allowed.
.sp
.TT 6.7.2.2-14
Test real division:
The error is within acceptable bounds.
.in +5
maxRE:~~0
.br
rmsRE:~~0
.in -5
.sp
.TT 6.7.2.2-15
Operations of reals in the integer range are exact.
.sp
.TT 6.7.3-1
.TT 6.8.3.2-1
.TT 6.8.3.4-2
.TT 6.8.3.5-15
.TT 6.8.3.7-4
.TT 6.8.3.8-3
.TT 6.8.3.9-20
.TT 6.8.3.10-7
Static deep nesting of function calls,
compound statements, if statements, case statements, repeat
loops, while loops, for loops and with statements is possible.
.sp
.TT 6.8.3.2-2
Large amounts of statements are allowed in a compound
statement.
.sp
.TT 6.8.3.5-12
The compiler requires case constants to be compatible with
the case selector.
.sp
.TT 6.8.3.5-13
.TT 6.8.3.5-14
Large case statements are possible.
.sp
.TT 6.9-2
Recursive IO on the same file is well-behaved.
.sp
.TT 6.9.1-6
The reading of real values from a text file is done with
sufficient accuracy.
.in +5
maxRE:~~2~**~-54.61
.br
rmsRE:~~2~**~-56.32
.in -5
.sp
.TT 6.9.1-7
.TT 6.9.2-2
.TT 6.9.3-3
.TT 6.9.4-2
Read, readln, write and writeln may have large amounts of
parameters.
.sp
.TT 6.9.1-8
The loss of precision for reals written on a text file and read
back is:
.in +5
maxRE:~~2~**~-53.95
.br
rmsRE:~~2~**~-55.90
.in -5
.sp
.TT 6.9.3-2
File IO buffers without trailing marker are correctly flushed.
.sp
.TT 6.9.3.5.2-2
Reals are written with sufficient accuracy.
.in +5
maxRE:~~0
.br
rmsRE:~~0
.in -5
.IE
.CH "Level 1 conformance tests"
Number of test passed = 4
.br
Number of tests failed = 1
.SH "Details of failed tests"
.IS
.TT 6.6.3.7-4
An expression indicated by parenthesis whose
value is a conformant array is not allowed.
.IE
.CH "Level 1 deviance tests"
Number of deviations correctly detected = 4
.br
Number of tests not detecting deviations = 0
.IE
.CH "Level 1 error handling"
The results depend on the EM implementation.
.sp
Number of errors correctly detected =
.in +5
.I1
1
.I2
0
.in -5
Number of errors not detected =
.in +5
.I1
0
.I2
1
.in -5
.SH "Details of errors not detected"
.IS
.TT 6.6.3.7-9
.I2
Subrange bounds are not checked.
.IE
.CH "Level 1 quality measurement"
Number of tests run = 1
.SH "Results of test"
.IS
.TT 6.6.3.7-10
Large conformant arrays are allowed.
.IE
.CH "Extensions"
Number of tests run = 3
.SH Details of test failed
.IS
.TT 6.1.9-7
The alternative relational operators are not allowed.
.sp
.TT 6.1.9-8
The alternative symbols for colon, semicolon and assignment are
not allowed.
.sp
.TT 6.8.3.5-16
The otherwise selector in case statements is not allowed.
.IE
.CH "References"
.ti -5
[1]~~\
A.S.Tanenbaum, E.G.Keizer, J.W.Stevenson, Hans van Staveren,
"Description of a machine architecture for use with block structured
languages",
Informatica rapport IR-81.
.ti -5
[2]~~\
ISO standard proposal ISO/TC97/SC5-N462, dated February 1979.
The same proposal, in slightly modified form, can be found in:
A.M.Addyman e.a., "A draft description of Pascal",
Software, practice and experience, May 1979.
An improved version, received March 1980,
is followed as much as possible for the
current ACK-Pascal.
.ti -5
[3]~~\
B. A. Wichman and J du Croz,
A program to calculate the GAMM measure, Computer Journal,
November 1979.

View file

@ -1,19 +0,0 @@
tested: last
set -x ;\
for i in `awk '{for(i=\$$1;i<=127;i++)print i}' last ` ;\
do \
echo $$i; \
echo $$i >last; \
select $$i tests > test.e; \
ack test.e; \
a.out \
: ok; \
done
rm -f test.e a.out
>tested
last: tests test.h select
echo 0 >last
select: select.c
cc -O -n -o select select.c

View file

@ -1,136 +0,0 @@
This directory contains test programs for EM implementations.
The test programs are all part of the file "tests".
Each individual test program looks like:
TEST 004: test ...
... ; data declarations etc.
MAIN nlocal
... ; part of the body of MAIN
PROC
... ; subroutines used by this test
The PROC part is optional, so the smallest test program looks like:
TEST 000: null test
MAIN 0
The keywords used by "select", like TEST, MAIN, PROC, HOL, OK and ERRLAB,
all consist of upper case letters and start in column one.
A convention for test numbers is to use 3 digit numbers, possibly left
padded with zero's.
A program, called "select", is provided to combine a range of tests
into a single test program.
"Select" expects a range as argument, like 0-127, or -127, or 0-.
Tests that have a TEST number in that range are included.
"Select" also expects the file from which the tests should
be selected as an argument.
If no argument is given, or only a range argument, select expects
the tests to slect from on standard input.
To prevent name clashes, some rules must be obeyed:
- data label names, procedure names and instruction label numbers
must be unique over all tests. A good habit is to use the
three digit test number as suffix.
- only keyword of "select" may start with uppercase letters in column
one, to allow for expansion in the future.
- because only a single 'hol' pseudo is allowed, "select" must
generate the 'hol' pseudo. An individual test may request
some 'hol' space by a special HOL line, starting in column one
and followed by a single number, the number of bytes needed.
This number must consists of digits only, no constant symbols,
because "select" must compute the maximum, so before the
preprocessor has replaced the constant symbols by their values.
- a similar problem is caused by the number of bytes of local
storage for 'main'. An individual test may specify the number
of bytes it needs as parameter to the MAIN line.
Again, the number must consist of digits only.
Test programs print a sequence of integers greater than 1.
This sequence is terminated by the number 1 as soon as an error is detected.
If all tests are performed correctedly the number 0 is printed.
To allow test programs to print integers without the full machinery of
conversion and i/o routines, the EM instruction 'nop' is used.
Each time this instruction is executed, the current line number as
maintained by the 'lin' instruction must be printed, followed by a
newline, at least during debugging.
The following abbrevation may be used in test programs:
OK -> lin n
nop
Numbers are automatically assigned in order of static appearance.
As soon as an error is detected you must branch to label 1, by instructions
like 'bra *1' and 'zne *1'.
Label 1 is automatically provided in the main routine.
If you jump to label 1 in a subroutine, then that subroutine must
end with ERRLAB, like in:
PROC
pro $test,0
...
bra *1
...
ret 0
ERRLAB
end
An option to "select" is to generate 'fil' instructions whenever a
new test starts.
This is useful if 'nop' prints the 'fil' string as well as the 'lin' number.
This 'f' option is on by default, off if a '-f' flag is given.
The EM file generated by "select" includes "test.h".
"test.h" may contain definitions of the following symbols:
W2S: the size of double precision integers, if implemented.
FS: the size of single precision floats, if implemented.
F2S: the size of double precision floats, if implemented.
The value of these symbols, if defined, must be the size of the object involved.
Two other symbols are used:
EM_PSIZE: pointer size
EM_WSIZE: word size
The machine dependent translation program, like 8086 and vax2, give
definitions of these symbols while calling the EM encode program.
Because these size names occur quite often, they may be abbreviated:
WS -> EM_WSIZE
PS -> EM_PSIZE
Before running the tests in the file "tests", it is wise to test
the necessary basic functions with some simple tests like
TEST 000: null
MAIN 0
and
TEST 001: ok
MAIN 0
OK
and
TEST 998: error
MAIN 0
bra *1
and
TEST 999: test lni
MAIN 0
lin 1
lni
loe 0
loc 2
bne *1
OK
The first two of these are part of "tests" as well. The last two are
not included in "tests" intensionally, because they would fail.
The last tests fails because it references the ABS block which is
inaccessable after an 'hol' pseudo.
Proceed as follows for each of these basic tests:
- make a file called 'basic' containing the test
- run select:
select basic >basic.e
- compile by
machine basic.e
- and load and run
where machine should be replaced by the name of program
used to compile EM programs for the current machine.

View file

@ -1 +0,0 @@
0

View file

@ -1,10 +0,0 @@
trap "" 1 2
while read x
do
case $x in
0) exit 0;;
bad) exit 1;;
esac
done
exit 1

View file

@ -1,249 +0,0 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
#include <stdio.h>
#include <assert.h>
#include <signal.h>
#define LINSIZ 100
int sigs[] = {
SIGHUP,
SIGINT,
SIGQUIT,
SIGTERM,
0
};
char *prog;
char line[LINSIZ];
int nlocals = 0;
int nhol = 0;
int nerrors = 0;
int oknum = 2;
int fflag = 1;
int low = 0;
int high = 999;
FILE *file1;
FILE *file2;
FILE *file3;
char name1[] = "/usr/tmp/f1XXXXXX";
char name2[] = "/usr/tmp/f2XXXXXX";
char name3[] = "/usr/tmp/f3XXXXXX";
stop() {
unlink(name1);
unlink(name2);
unlink(name3);
exit(nerrors);
}
main(argc,argv) char **argv; {
register *p;
register char *s;
prog = *argv++; --argc;
mktemp(name1);
mktemp(name2);
mktemp(name3);
for (p = sigs; *p; p++)
if (signal(*p, stop) == SIG_IGN)
signal(*p, SIG_IGN);
while (argc > 0 && argv[0][0] == '-') {
switch (argv[0][1]) {
case 'f':
fflag ^= 1;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
high = atoi(&argv[0][1]);
break;
default:
usage();
break;
}
argc--;
argv++;
}
if (argc > 0 && argv[0][0] >= '0' && argv[0][0] <= '9') {
s = argv[0];
do
low = low*10 + *s++ - '0';
while (*s >= '0' && *s <= '9');
if (*s == 0)
high = low;
else if (*s++ == '-') {
high = atoi(s);
if (high == 0)
high = 999;
} else
fatal("bad range %s", argv[0]);
argc--;
argv++;
}
if (argc > 1)
usage();
if (argc == 1 && freopen(argv[0], "r", stdin) == NULL)
fatal("cannot open %s", argv[0]);
if ((file1 = fopen(name1, "w")) == NULL)
fatal("cannot create %s", name1);
if ((file2 = fopen(name2, "w")) == NULL)
fatal("cannot create %s", name2);
if ((file3 = fopen(name3, "w")) == NULL)
fatal("cannot create %s", name3);
if (getline())
while (select())
;
fclose(file1);
fclose(file2);
fclose(file3);
combine();
stop();
}
select() {
register FILE *f;
int i;
if (sscanf(line, "TEST %d", &i) != 1)
fatal("bad test identification(%s)", line);
if (i < low || i > high) {
while (getline())
if (line[0] == 'T')
return(1);
return(0);
}
fprintf(file2, "; %s\n", line);
if (fflag) {
fprintf(file1, ".%03d\n", i);
fprintf(file1, " con \"tst%03d\"\n", i);
fprintf(file2, " fil .%03d\n", i);
}
f = file1;
while (getline()) {
switch (line[0]) {
case 'T':
return(1);
case 'M':
if (sscanf(line, "MAIN%d", &i) != 1 || i%4 != 0)
break;
if (i > nlocals)
nlocals = i;
f = file2;
continue;
case 'P':
if (strcmp(line, "PROC") != 0)
break;
f = file3;
continue;
case 'H':
if (f != file1 ||
sscanf(line, "HOL%d", &i) != 1 ||
i%4 != 0)
break;
if (i > nhol)
nhol = i;
continue;
case 'O':
if (strcmp(line, "OK") != 0)
break;
fprintf(f, " lin %d\n nop\n", oknum++);
continue;
case 'E':
if (f != file3 || strcmp(line, "ERRLAB") != 0)
break;
fprintf(f, "1\n lin 1\n nop\n loc 1\n loc 1\n mon\n");
continue;
default:
putline(f);
continue;
}
fatal("bad line (%s)", line);
}
return(0);
}
combine() {
printf("#define WS EM_WSIZE\n");
printf("#define PS EM_PSIZE\n");
printf("#include \"test.h\"\n");
printf(" mes 2,WS,PS\n");
printf(" mes 1\n");
printf(" mes 4,300\n");
if (nhol)
printf(" hol %d,0,0\n", nhol);
copy(name1);
printf(" exp $m_a_i_n\n");
printf(" pro $m_a_i_n,%d\n", nlocals);
printf(" loc 123\n");
printf(" loc -98\n");
copy(name2);
printf(" loc -98\n");
printf(" bne *1\n");
printf(" loc 123\n");
printf(" bne *1\n");
printf(" lin 0\n");
printf(" nop\n");
printf(" loc 0\n");
printf(" ret WS\n");
printf("1\n");
printf(" lin 1\n");
printf(" nop\n");
printf(" loc 1\n");
printf(" ret WS\n");
printf(" end\n");
copy(name3);
}
copy(s) char *s; {
if (freopen(s, "r", stdin) == NULL)
fatal("cannot reopen %s", s);
while (getline())
putline(stdout);
}
getline() {
register len;
if (fgets(line, LINSIZ, stdin) == NULL)
return(0);
len = strlen(line);
if (line[len-1] != '\n')
fatal("line too long(%s)", line);
line[len-1] = 0;
return(1);
}
putline(f) FILE *f; {
fprintf(f, "%s\n", line);
}
fatal(s, a1, a2, a3, a4) char *s; {
fprintf(stderr, "%s: ", prog);
fprintf(stderr, s, a1, a2, a3, a4);
fprintf(stderr, " (fatal)\n");
nerrors++;
stop();
}
usage() {
fprintf(stderr, "usage: %s -f [[low]-[high]] [testcollection]\n", prog);
nerrors++;
stop();
}

View file

@ -1,28 +0,0 @@
#define WS EM_WSIZE
#define PS EM_PSIZE
#include "test.h"
mes 2,WS,PS
mes 1
mes 4,300
.000
con "tst000"
exp $m_a_i_n
pro $m_a_i_n,0
loc 123
loc -98
; TEST 000: empty
fil .000
loc -98
bne *1
loc 123
bne *1
lin 0
nop
loc 0
ret WS
1
lin 1
nop
loc 1
ret WS
end

View file

File diff suppressed because it is too large Load diff

View file

@ -1,26 +0,0 @@
d=..
h=$d/h
c=$d/util/data
FILES= \
$h/em_spec.h \
$h/em_pseu.h \
$h/em_mnem.h \
$c/em_flag.c \
$c/em_pseu.c \
$c/em_mnem.c
$(FILES): em_table
new_table $h $c
install: $(FILES)
opr:
make pr ^ opr
pr:
@pr Makefile em_table new_table pop_push traps
clean:
-rm -f *.old
cmp : # do nothing

View file

@ -1,175 +0,0 @@
magic 173
fmnem 1
nmnem 149
fpseu 150
npseu 30
filb0 180
nilb0 60
fcst0 0
zcst0 120
ncst0 240
fspec 240
nspec 16
ilb1 240
ilb2 241
dlb1 242
dlb2 243
dnam 244
cst2 245
cst4 246
cst8 247
doff 248
pnam 249
scon 250
icon 251
ucon 252
fcon 253
cend 255
bss 0
con 1
end 2
exa 3
exc 4
exp 5
hol 6
ina 7
inp 8
mes 9
pro 10
rom 11
aar w- -p-a-p+p
adf w- -a-a+a
adi w- -a-a+a
adp f- -p+p
ads w- -a-p+p
adu w- -a-a+a
and w- -a-a+a
asp f- -a
ass w- -a-x
beq bc -w-w
bge bc -w-w
bgt bc -w-w
ble bc -w-w
blm z- -p-p
bls w- -a-p-p
blt bc -w-w
bne bc -w-w
bra bt 0
cai -p -p
cal pp 0
cff -- -w-w-y+x
cfi -- -w-w-y+x
cfu -- -w-w-y+x
cif -- -w-w-y+x
cii -- -w-w-y+x
ciu -- -w-w-y+x
cmf w- -a-a+w
cmi w- -a-a+w
cmp -- -p-p+w
cms w- -a-a+w
cmu w- -a-a+w
com w- -a-a+a
csa wt -p-a
csb wt -p-a
cuf -- -w-w-y+x
cui -- -w-w-y+x
cuu -- -w-w-y+x
dch -- -p+p
dec -- -w+w
dee g- 0
del l- 0
dup s- -a+a+a
dus w- -a-x+x+x
dvf w- -a-a+a
dvi w- -a-a+a
dvu w- -a-a+a
exg w- -a-a+a+a
fef w- -a+a+w
fif w- -a-a+a+a
fil g- 0
gto gt -p-?
inc -- -w+w
ine g- 0
inl l- 0
inn w- -w-a+w
ior w- -a-a+a
lae g- +p
lal l- +p
lar w- -p-a-p+?
ldc d- +d
lde g- +d
ldf f- -p+d
ldl l- +d
lfr s- +a
lil l- +w
lim -- +w
lin n- 0
lni -- 0
loc c- +w
loe g- +w
lof f- -p+w
loi o- -p+a
lol l- +w
lor r- +p
los w- -a-p+x
lpb -- -p+p
lpi p- +p
lxa n- +p
lxl n- +p
mlf w- -a-a+a
mli w- -a-a+a
mlu w- -a-a+a
mon -- -?+?
ngf w- -a+a
ngi w- -a+a
nop -- 0
rck w- -p-a+a
ret zt -a-?
rmi w- -a-a+a
rmu w- -a-a+a
rol w- -w-a+a
ror w- -w-a+a
rtt -t -?
sar w- -p-a-p-?
sbf w- -a-a+a
sbi w- -a-a+a
sbs w- -p-p+a
sbu w- -a-a+a
sde g- -d
sdf f- -p-d
sdl l- -d
set w- -w+a
sig -- -p-p+p+p
sil l- -w
sim -- -w
sli w- -w-a+a
slu w- -w-a+a
sri w- -w-a+a
sru w- -w-a+a
ste g- -w
stf f- -p-w
sti o- -p-a
stl l- -w
str r- -p
sts w- -a-p-x
teq -- -w+w
tge -- -w+w
tgt -- -w+w
tle -- -w+w
tlt -- -w+w
tne -- -w+w
trp -p -w+?
xor w- -a-a+a
zeq bc -w
zer w- +a
zge bc -w
zgt bc -w
zle bc -w
zlt bc -w
zne bc -w
zre g- 0
zrf w- +a
zrl l- 0

View file

@ -1,352 +0,0 @@
aar mwPo 1 34
adf sP 1 35
adi mwPo 2 36
adp 2 38
adp mPo 2 39
adp sP 1 41
adp sN 1 42
ads mwPo 1 43
and mwPo 1 44
asp mwPo 5 45
asp swP 1 50
beq 2 51
beq sP 1 52
bge sP 1 53
bgt sP 1 54
ble sP 1 55
blm sP 1 56
blt sP 1 57
bne sP 1 58
bra 2 59
bra sN 2 60
bra sP 2 62
cal mPo 28 64
cal sP 1 92
cff - 93
cif - 94
cii - 95
cmf sP 1 96
cmi mwPo 2 97
cmp - 99
cms sP 1 100
csa mwPo 1 101
csb mwPo 1 102
dec - 103
dee sw 1 104
del swN 1 105
dup mwPo 1 106
dvf sP 1 107
dvi mwPo 1 108
fil 2 109
inc - 110
ine w2 111
ine sw 1 112
inl mwN 3 113
inl swN 1 116
inn sP 1 117
ior mwPo 1 118
ior sP 1 119
lae 2 120
lae sw 7 121
lal P2 128
lal N2 129
lal mP 1 130
lal mN 1 131
lal swP 1 132
lal swN 2 133
lar mwPo 1 135
ldc mP 1 136
lde w2 137
lde sw 1 138
ldl mP 1 139
ldl swN 1 140
lfr mwPo 2 141
lfr sP 1 143
lil swN 1 144
lil swP 1 145
lil mwP 2 146
lin 2 148
lin sP 1 149
lni - 150
loc 2 151
loc mP 34 0
loc mN 1 152
loc sP 1 153
loc sN 1 154
loe w2 155
loe sw 5 156
lof 2 161
lof mwPo 4 162
lof sP 1 166
loi 2 167
loi mPo 1 168
loi mwPo 4 169
loi sP 1 173
lol wP2 174
lol wN2 175
lol mwP 4 176
lol mwN 8 180
lol swP 1 188
lol swN 1 189
lxa mPo 1 190
lxl mPo 2 191
mlf sP 1 193
mli mwPo 2 194
rck mwPo 1 196
ret mwP 2 197
ret sP 1 199
rmi mwPo 1 200
sar mwPo 1 201
sbf sP 1 202
sbi mwPo 2 203
sdl swN 1 205
set sP 1 206
sil swN 1 207
sil swP 1 208
sli mwPo 1 209
ste w2 210
ste sw 3 211
stf 2 214
stf mwPo 2 215
stf sP 1 217
sti mPo 1 218
sti mwPo 4 219
sti sP 1 223
stl wP2 224
stl wN2 225
stl mwP 2 226
stl mwN 5 228
stl swN 1 233
teq - 234
tgt - 235
tlt - 236
tne - 237
zeq 2 238
zeq sP 2 239
zer sP 1 241
zge sP 1 242
zgt sP 1 243
zle sP 1 244
zlt sP 1 245
zne sP 1 246
zne sN 1 247
zre w2 248
zre sw 1 249
zrl mwN 2 250
zrl swN 1 252
zrl wN2 253
aar e2 0
aar e- 1
adf e2 2
adf e- 3
adi e2 4
adi e- 5
ads e2 6
ads e- 7
adu e2 8
adu e- 9
and e2 10
and e- 11
asp ew2 12
ass e2 13
ass e- 14
bge e2 15
bgt e2 16
ble e2 17
blm e2 18
bls e2 19
bls e- 20
blt e2 21
bne e2 22
cai e- 23
cal e2 24
cfi e- 25
cfu e- 26
ciu e- 27
cmf e2 28
cmf e- 29
cmi e2 30
cmi e- 31
cms e2 32
cms e- 33
cmu e2 34
cmu e- 35
com e2 36
com e- 37
csa e2 38
csa e- 39
csb e2 40
csb e- 41
cuf e- 42
cui e- 43
cuu e- 44
dee ew2 45
del ewP2 46
del ewN2 47
dup e2 48
dus e2 49
dus e- 50
dvf e2 51
dvf e- 52
dvi e2 53
dvi e- 54
dvu e2 55
dvu e- 56
fef e2 57
fef e- 58
fif e2 59
fif e- 60
inl ewP2 61
inl ewN2 62
inn e2 63
inn e- 64
ior e2 65
ior e- 66
lar e2 67
lar e- 68
ldc e2 69
ldf e2 70
ldl ewP2 71
ldl ewN2 72
lfr e2 73
lil ewP2 74
lil ewN2 75
lim e- 76
los e2 77
los e- 78
lor esP 1 79
lpi e2 80
lxa e2 81
lxl e2 82
mlf e2 83
mlf e- 84
mli e2 85
mli e- 86
mlu e2 87
mlu e- 88
mon e- 89
ngf e2 90
ngf e- 91
ngi e2 92
ngi e- 93
nop e- 94
rck e2 95
rck e- 96
ret e2 97
rmi e2 98
rmi e- 99
rmu e2 100
rmu e- 101
rol e2 102
rol e- 103
ror e2 104
ror e- 105
rtt e- 106
sar e2 107
sar e- 108
sbf e2 109
sbf e- 110
sbi e2 111
sbi e- 112
sbs e2 113
sbs e- 114
sbu e2 115
sbu e- 116
sde e2 117
sdf e2 118
sdl ewP2 119
sdl ewN2 120
set e2 121
set e- 122
sig e- 123
sil ewP2 124
sil ewN2 125
sim e- 126
sli e2 127
sli e- 128
slu e2 129
slu e- 130
sri e2 131
sri e- 132
sru e2 133
sru e- 134
sti e2 135
sts e2 136
sts e- 137
str esP 1 138
tge e- 139
tle e- 140
trp e- 141
xor e2 142
xor e- 143
zer e2 144
zer e- 145
zge e2 146
zgt e2 147
zle e2 148
zlt e2 149
zne e2 150
zrf e2 151
zrf e- 152
zrl ewP2 153
dch e- 154
exg esP 1 155
exg e2 156
exg e- 157
lpb e- 158
gto e2 159
ldc 4 0
lae 4 1
lal P4 2
lal N4 3
lde w4 4
ldf 4 5
ldl wP4 6
ldl wN4 7
lil wP4 8
lil wN4 9
loc 4 10
loe w4 11
lof 4 12
lol wP4 13
lol wN4 14
lpi 4 15
adp 4 16
asp w4 17
beq 4 18
bge 4 19
bgt 4 20
ble 4 21
blm 4 22
blt 4 23
bne 4 24
bra 4 25
cal 4 26
dee w4 27
del wP4 28
del wN4 29
fil 4 30
gto 4 31
ine w4 32
inl wP4 33
inl wN4 34
lin 4 35
sde 4 36
sdf 4 37
sdl wP4 38
sdl wN4 39
sil wP4 40
sil wN4 41
ste w4 42
stf 4 43
stl wP4 44
stl wN4 45
zeq 4 46
zge 4 47
zgt 4 48
zle 4 49
zlt 4 50
zne 4 51
zre w4 52
zrl wP4 53
zrl wN4 54

View file

@ -1,71 +0,0 @@
h=${1-.}
d=${2-.}
set `grep fpseu em_table`
p=$2
set `grep fmnem em_table`
m=$2
ed - em_table <<'A' > X
1,/^$/g/ /s// /gp
A
ed - em_table <<'A' | awk '{print $1,$2+'$p'}' > Y
1,/^$/d
1,/^$/g/ /s// /gp
A
ed - em_table <<'A' | awk '{print $0,'$m'+i++}' > Z
1,/^$/d
1,/^$/d
1,/^$/g/ /s// /gp
A
i=`wc -l <Y`
echo 'lpseu' `expr $i + $p - 1` >>X
i=`wc -l <Z`
echo 'lmnem' `expr $i + $m - 1` >>X
ed - X <<'A' > $h/em_spec.h
g/^/s//#define sp_/p
A
ed - Y <<'A' > $h/em_pseu.h
g/\(.*\) \(.*\)/s//#define ps_\1 \2/p
A
ed - Z <<'A' > $h/em_mnem.h
g/ .* /s// /
g/\(.*\) \(.*\)/s//#define op_\1 \2/p
A
(
echo 'char em_pseu[][4] = {'
ed - Y <<'A'
g/\(...\).*/s// "\1",/p
A
echo '};'
) > $d/em_pseu.c
(
echo 'char em_mnem[][4] = {'
ed - Z <<'A'
g/\(...\).*/s// "\1",/p
A
echo '};'
) > $d/em_mnem.c
(
echo '#include <em_flag.h>
char em_flag[] = {'
ed - Z <<'A' | tr a-z A-Z
g/^... /s///
g/ .*/s///
g/\(.\)\(.\)/s//PAR_\1 | FLO_\2/
g/-/s//NO/g
g/.*/s// &,/p
A
echo '};'
) > $d/em_flag.c
rm X Y Z

View file

@ -1,289 +0,0 @@
non-standard feature used
identifier '%s' declared twice
end of file encountered
bad line directive
unsigned real: digit of fraction expected
unsigned real: digit of exponent expected
unsigned real: too many digits (>72)
unsigned integer: too many digits (>72)
unsigned integer: overflow (>32767)
string constant: must not exceed one line
string constant: at least one character expected
string constant: double quotes not allowed (see c option)
string constant: too long (>72 chars)
bad character
identifier '%s' not declared
location counter overflow: arrays too big
location counter overflow: arrays too big
arraysize too big
variable '%s' never used
variable '%s' never assigned
the files contained in '%s' are not closed automatically
constant expected
constant: only integers and reals may be signed
constant: out of bounds
simple type expected
enumerated type: element identifier expected
enumerated type: ',' or ')' expected
enumerated type: ',' expected
enumerated type: ')' expected
subrange type: type must be scalar, but not real
subrange type: '..' expected
subrange type: type of lower and upper bound incompatible
subrange type: lower bound exceeds upper bound
array type: '[' expected
conformant array: low bound identifier expected
conformant array: '..' expected
conformant array: high bound identifier expected
conformant array: ':' expected
conformant array: index type identifier expected
array type: index type not bounded
array type: index separator or ']' expected
array type: index separator expected
array type: ']' expected
array type: 'of' expected
record variant part: tag type identifier expected
record variant part: tag type identifier expected
record variant part: type must be bounded
record variant part: 'of' expected
record variant: type of case label and tag incompatible
record variant: multiple defined case label
record variant: ',' or ':' expected
record variant: ',' expected
record variant: ':' expected
record variant: '(' expected
record variant: ')' expected
record variant part: ';' or end of variant list expected
record variant part: ';' expected
record variant part: end of variant list expected
record variant part: there must be a variant for each tag value
field list: record section expected
record section: field identifier expected
record section: ',' or ':' expected
record section: ',' expected
record section: ':' expected
field list: ';' or end of record section list expected
field list: ';' expected
field list: end of record section list expected
type expected
type: simple and pointer type may not be packed
pointer type: type identifier expected
pointer type: type identifier expected
record type: 'end' expected
set type: 'of' expected
set type: too many elements in set
set type: bad subrange of integer
set of integer: the i option dictates the number of bits (default 16)
set type: base type not bounded
file type: 'of' expected
file type: files within files not allowed
var parameter: type identifier or conformant array expected
var parameter: type identifier expected
label declaration: unsigned integer expected
label declaration: label '%i' multiple declared
label declaration: ',' or ';' expected
label declaration: ',' expected
label declaration: ';' expected
const declaration: constant identifier expected
const declaration: '=' expected
const declaration: ';' expected
const declaration: constant identifier or 'type', 'var', 'procedure', 'function' or 'begin' expected
type declaration: type identifier expected
type declaration: '=' expected
type declaration: ';' expected
type declaration: type identifier or 'var', 'procedure', 'function' or 'begin' expected
var declaration: var identifier expected
var declaration: ',' or ':' expected
var declaration: ',' expected
var declaration: ':' expected
var declaration: ';' expected
var declaration: var identifier or 'procedure', 'function' or 'begin' expected
parameter list: 'var','procedure','function' or identifier expected
parameter list: parameter identifier expected
parameter list: ',' or ':' expected
parameter list: ',' expected
parameter list: ':' expected
parameter list: type identifier expected
parameter list: ';' or ')' expected
parameter list: ';' expected
proc/func declaration: proc/func identifier expected
proc/func declaration: previous declaration of '%s' was not forward
proc/func declaration: parameter list expected
parameterlist: ')' expected
func declaration: ':' expected
func declaration: result type identifier expected
func declaration: result type must be scalar, subrange or pointer
proc/func declaration: ';' expected
proc/func declaration: block or directive expected
proc/func declaration: '%s' unknown directive
proc/func declaration: '%s' again forward declared
proc/func declaration: ';' expected
indexed variable: '[' only allowed following array variables
indexed variable: index type not compatible with declaration
indexed variable: ',' or ']' expected
indexed variable: ',' expected
assignment: standard function not allowed as destination
assignment: cannot store the function result
assignment: formal parameter function not allowed as destination
assignment: function identifier may not be de-referenced
variable: '[', '.', '^' or end of variable expected
indexed variable: ']' expected
field designator: field identifier expected
field designator: '.' only allowed following record variables
field designator: no field '%s' in this record
referenced variable: '^' not allowed following zero-terminated strings
referenced variable: '^' only allowed following pointer or file variables
variable: var or field identifier expected
call: too many actual parameters supplied
call: proc/func identifier expected
call: standard proc/func may not be used as parameter
call: parameter lists of actual and formal proc/func incompatible
call: type of actual and formal value parameter not compatible
call: array parameter not conformable
call: type of actual and formal variable parameter not similar
call: packed elements not allowed as variable parameter
call: ',' or ')' expected
call: too few actual parameters supplied
read(ln): type must be integer, char or real
write(ln): type must be integer, char, real, string or boolean
write(ln): ':', ',' or ')' expected
write(ln): field width must be integer
write(ln): ':', ',' or ')' expected
write(ln): precision must be integer
write(ln): precision may only be specified for reals
read/write: too few actual parameters supplied
read/write: standard input/output not mentioned in program heading
read/write: ',' or ')' expected
read/write: type of parameter not the same as that of the file elements
read/write: parameter list expected
readln/writeln: standard input/output not mentioned in program heading
readln/writeln: only allowed on text files
new/dispose: C-type strings not allowed here
new/dispose: ',' or ')' expected
new/dispose: too many actual parameters supplied
new/dispose: type of tagfield value is incompatible with declaration
call: '(' or end of call expected
standard proc/func: parameter list expected
standard input/output not mentioned in program heading
file variable expected
pointer variable expected
pack: ',' expected
pack: ',' expected
unpack: ',' expected
unpack: ',' expected
standard proc/func: parameter type incompatible with specification
eoln/page: text file variable expected
pack/unpack: array types are incompatible
pack/unpack: only for arrays
abs: integer or real expected
sqr: integer or real expected
ord: type must be scalar or subrange, but not real
pred/succ: type must be scalar or subrange, but not real
trunc/round: real argument required
call: ')' expected
expression: left and right operand are incompatible
set: incompatible elements
set: base type must be bounded or of type integer
set: base type upper bound exceeds maximum set element number
set: element out of range
set: ']' or element list expected
set: '..', ',' or ']' expected
set: ',' or ']' expected
set: ',' expected
factor expected
factor: ')' expected
factor: type of factor must be boolean
set: ']' expected
term: multiplying operator or end of term expected
term: '*' only defined for integers, reals and sets
term: '/' only defined for integers and reals
term: 'div' only defined for integers
term: 'mod' only defined for integers
term: 'and' only defined for booleans
simple expression: only integers and reals may be signed
simple expression: adding operator or end of simple expression expected
simple expression: '+' only defined for integers, reals and sets
simple expression: '-' only defined for integers, reals and sets
simple expression: 'or' only defined for booleans
expression: relational operator or end of expression expected
expression: set expected
expression: left operand of 'in' not compatible with base type of right operand
expression: only '=' and '<>' allowed on pointers
expression: '<' and '>' not allowed on sets
expression: comparison of arrays only allowed for strings
expression: comparison of records not allowed
expression: comparison of files not allowed
assignment: ':=' expected
assignment: left and right hand side incompatible
goto statement: unsigned integer expected
goto statement: label '%i' not declared
if statement: type of expression must be boolean
if statement: 'then' expected
if statement: 'else' or end of if statement expected
case statement: type must be scalar or subrange, but not real
case statement: 'of' expected
case statement: incompatible case label
case statement: multiple defined case label
case statement: ',' or ':' expected
case statement: ',' expected
case statement: ':' expected
case statement: ';' or 'end' expected
case statement: ';' expected
case statement: 'end' expected
repeat statement: ';' or 'until' expected
repeat statement: ';' expected
repeat statement: 'until' expected
repeat statement: type of expression must be boolean
while statement: type of expression must be boolean
while statement: 'do' expected
for statement: type of bound and control variable incompatible
for statement: control variable expected
for statement: control variable must be local
for statement: type must be scalar or subrange, but not real
for statement: ':=' expected
for statement: 'to' or 'downto' expected
for statement: upper bound not assignment compatible
for statement: 'do' expected
with statement: record variable expected
with statement: ',' or 'do' expected
with statement: ',' expected
with statement: 'do' expected
assertion: type of expression must be boolean
statement expected
label '%i' not declared
label '%i' multiple defined
statement: ':' expected
unlabeled statement expected
compound statement: ';' or 'end' expected
compound statement: ';' expected
compound statement: 'end' expected
case statement: 'end' expected
body: ';' or 'end' expected
body: ';' expected
body: label '%i' declared, but never defined
program parameter '%s' not declared
function '%s' never assigned
block: declaration or body expected
block: 'const', 'type', 'var', 'procedure', 'function' or 'begin' expected
block: 'type', 'var', 'procedure', 'function' or 'begin' expected
block: 'var', 'procedure', 'function' or 'begin' expected
block: 'procedure', 'function' or 'begin' expected
block: unsatisfied forward proc/func declaration(s)
block: 'begin' expected
block: 'end' expected
program heading: 'program' expected
program heading: program identifier expected
program heading: file identifier list expected
program heading: file identifier expected
program heading: ',' or ')' expected
program heading: ',' expected
program heading: maximum number of file arguments exceeded (12)
program heading: ')' expected
program heading: ';' expected
program: '.' expected
'program' expected
module: 'const', 'type', 'var', 'procedure' or 'function' expected
module: 'type', 'var', 'procedure' or 'function' expected
module: 'var', 'procedure' or 'function' expected
module: 'procedure' or 'function' expected
garbage at end of program

View file

@ -1,107 +0,0 @@
array bound error
range bound error
set bound error
integer overflow
real overflow
real underflow
divide by 0
divide by 0.0
undefined integer
real undefined
conversion error
error 11
error 12
error 13
error 14
error 15
stack overflow
heap error
illegal instruction
odd or zero byte count
case error
memory fault
bad pointer
bad program counter
bad external address
bad monitor call
bad line number
error 27
error 28
error 29
error 30
error 31
error 32
error 33
error 34
error 35
error 36
error 37
error 38
error 39
error 40
error 41
error 42
error 43
error 44
error 45
error 46
error 47
error 48
error 49
error 50
error 51
error 52
error 53
error 54
error 55
error 56
error 57
error 58
error 59
error 60
error 61
error 62
error 63
more args expected
error in exp
error in ln
error in sqrt
assertion failed
array bound error in pack
array bound error in unpack
only positive j in 'i mod j'
file not yet open
dispose error
error 74
error 75
error 76
error 77
error 78
error 79
error 80
error 81
error 82
error 83
error 84
error 85
error 86
error 87
error 88
error 89
error 90
error 91
error 92
error 93
error 94
error 95
not writable
not readable
end of file
truncated
reset error
rewrite error
close error
read error
write error
digit expected
non-ASCII char read

View file

@ -1,15 +0,0 @@
description of third column of em_table:
-: pop item indicated by next character
+: push item indicated by next character
0: no effect on the stack
characters describing items:
w: target machine word (1, 2 or 4)
d: double target machine word (2, 4 or 8)
p: target machine address
a: item with size specified in argument
x: item with size specified by top item of stack
y: item with size specified by second item on stack
?: one or more items of unknown size

View file

@ -1,28 +0,0 @@
~ Array bound error
~ Range bound error
~ Set bound error
~ Integer overflow
~ Floating overflow
~ Floating underflow
~ Divide by 0
~ Divide by 0.0
~ Integer undefined
~ Floating undefined
~ Conversion error
* Stack overflow
* Heap overflow
* Illegal instruction
* Illegal odd or zero argument
* Case error
* Addressing non existent memory
* Bad pointer used
* Program counter out of range
* Bad argument of LAE
* Bad monitor call
* Argument of LIN too high
* Bad GTO descriptor

View file

@ -1,32 +0,0 @@
rm -f ../bin/x_tpath x_tpath
echo "echo $$" >../bin/x_tpath
rm -f x_tpath
chmod +x ../bin/x_tpath
case x`(x_tpath) 2>/dev/null`
in
x$$)
STAT=0 ;;
x)
(cd ../bin ; echo Sorry, `pwd` is not in your shell PATH" ($PATH)")
STAT=1 ;;
*)
echo "Sorry, there is something wrong with your PATH ($PATH)" ;;
esac
echo "echo l_$$" >x_tpath
chmod +x x_tpath
case x`(x_tpath) 2>/dev/null`
in
xl_$$)
;;
x)
(cd ../bin ; echo Sorry, . is not in your shell PATH" ($PATH)")
STAT=2 ;;
x$$)
echo Sorry, . is not in your PATH" ($PATH)" or after the ACK bin directory
STAT=3 ;;
*)
echo "Sorry, there is something wrong with your PATH ($PATH)"
STAT=4 ;;
esac
rm -f ../bin/x_tpath x_tpath
exit $STAT

View file

@ -1,7 +0,0 @@
if (ack_sys ) >/dev/null 2>&1
then
exit 0
else
echo "You need to run 'first' first"
exit 1
fi

View file

@ -1,7 +0,0 @@
/* Intended as a common directory for ALL temporary files */
#define TMP_DIR "/usr/tmp"
/* Access to the ACK tree and parts thereof */
#define EM_DIR "/usr/em" /* The root directory for EM stuff */
#define RTERR_PATH "etc/pc_rt_errors"
#define ACK_PATH "lib/descr"

View file

@ -1,133 +0,0 @@
: check $PATH first
if sh ckpath
then :
else
exit 1
fi
: set ACK HOME Directory in ../h/em_path.h
rm -f em_path.h
sed -e "/^#define[ ]*EM_DIR/s@\".*\"@\"`cd .. ; pwd`\"@" <../h/em_path.h >em_path.h
if cmp ../h/em_path.h em_path.h >/dev/null 2>&1
then
: Don't touch ../h/em_path.h, it's already correct
else
rm -f ../h/em_path.h
if mv em_path.h ../h >/dev/null 2>&1
then : success
else
echo "Sorry, can't replace ../h/em_path.h"
exit 7
fi
fi
: remove non-system as and ld from descr files
if (ack_sys) >/dev/null 2>&1
then
: echo Your system is: `ack_sys`.
else
echo -n "Give me the name of your system, the current choice is:
pdp_v7 PDP11 with sep I/D and version 7
vax_bsd4_1a VAX11 with BSD4.1a
vax_bsd4_1c VAX11 with BSD4.1c
vax_bsd4_2 VAX11 with BSD4.2
pc_ix IBM PC with PC/IX
m68_unisoft Motorola 68000 with Unisoft UNIX
m68_pmds Philips PMDS
ANY Neither of the above
system type: "
if read SYSNAME
then
echo echo "$SYSNAME" >../bin/ack_sys
chmod +x ../bin/ack_sys
case `ack_sys` in
pdp_v7|vax_bsd4_1[ac]|vax_bsd4_2|pc_ix|m68_unisoft|m68_pmds) ;;
*) echo None of the software especially intended for
the named systems will work ;;
esac
else
echo Sorry, got EOF when reading system name.
exit 8
fi
fi
echo -n "Your system is `ack_sys`, are you satisfied with that? (y/n) "
if read YESNO
then
case $YESNO in
j*|y*) ;;
n*) echo Ok, I will give you another chance....
rm -f ../bin/ack_sys
exec sh $0
;;
*) echo "I do not understand your answer ($YESNO). Bye"
exit 9
;;
esac
else
echo Sorry, got EOF when reading your answer.
exit 9
fi
: "Take action according to the system used"
: 'Prevent the use of the system assembler on for certain systems'
case `ack_sys` in
vax_bsd*) RMD=pdp ;;
pdp_*) RMD="vax2 vax4" ;;
*) RMD="pdp vax2 vax4" ;;
esac
for i in $RMD
do
( cd ../lib/$i
if grep '^name as$' descr >/dev/null 2>&1
then
cp descr descr.orig
ed - descr <<'ABC'
/^name as$/;/^end$/d
/^name ld$/;/^end$/d
w
q
ABC
fi
)
done
: 'Set the default machine in ../h/local.h'
case `ack_sys` in
pdp_v7) ACM=pdp ;;
vax_bsd4_1[ac]) ACM=vax2 ;;
vax_bsd4_2) ACM=vax2 ;;
pc_ix) ACM=ix ;;
m68_unisoft) ACM=m68k2 ;;
m68_pmds) ACM=pmds ;;
esac
rm -f local.h
sed /ACKM/s/'".*"'/'"'$ACM'"'/ <../h/local.h >local.h
if cmp -s ../h/local.h local.h
then :
else
cp local.h ../h
rm -f local.h
fi
echo "Your default machine to compile for is $ACM"
case `ack_sys` in
vax_bsd4_*)
echo 'Installing the include directory in lib/vax2'
( cd ../lib/vax2 ; sh fetch_inc )
echo Done
case `ack_sys` in
vax_bsd4_1a) VERS=BSD41a ;;
vax_bsd4_1c) VERS=BSD41c ;;
vax_bsd4_2) VERS=BSD42 ;;
*) echo "Unknown VAX BSD version, look at mach/vax[24]/libem"
break ;;
esac
for i in vax2 vax4
do (
cd ../mach/$i/libem
ed - system.h <<ABC
g/^#/s/.*/\/* & *\//
/$VERS/s/^.*#/#/
/$VERS/s/*\/.*$//
w
q
ABC
) done
echo 'mach/vax[24]/libem/system.h reflects your BSD version.'
esac

View file

@ -1,7 +0,0 @@
/* collection of options, selected by including or excluding 'defines' */
/* Version number of the EM object code */
# define VERSION 3 /* 16 bits number */
/* The default machine used by ack, acc, apc */
# define ACKM "vax2"

View file

@ -1,10 +0,0 @@
install cmp:
opr:
make pr | opr
pr:
@pr Makefile *.h
clean:
-rm -f *.old

View file

@ -1,12 +0,0 @@
#define ARMAG 0177545
struct ar_hdr {
char ar_name[14];
long ar_date;
char ar_uid;
char ar_gid;
int ar_mode;
long ar_size;
};
#define AR_TOTAL 26
#define AR_SIZE 22

View file

@ -1 +0,0 @@
#define as_magic (sp_magic|(14<<8))

View file

@ -1,29 +0,0 @@
#include <stdio.h>
/* $Header$ */
/* BASIC file io definitions */
extern FILE *_chanrd;
extern FILE *_chanwr;
extern int _chann;
/* BASIC file descriptor table */
/* Channel assignment:
-1 terminal IO
0 data file
1-15 user files
*/
/* FILE MODES:*/
#define IMODE 1
#define OMODE 2
#define RMODE 3
typedef struct {
char *fname;
FILE *fd;
int pos;
int mode;
int reclength;
}Filedesc;
extern Filedesc _fdtable[16];

View file

@ -1,17 +0,0 @@
#
/* $Header$ */
/* Strings are allocated in a fixed string descriptor table
** This mechanism is used to avoid string copying as much as possible
*/
typedef struct{
char *strval;
int strcount;
int strlength;
} String;
String *_newstr() ;
#define MAXSTRING 1024

View file

@ -1,156 +0,0 @@
/* offsets of interesting fields in EM-pattern */
#define PO_HASH 0
#define PO_NEXT 1
#define PO_MATCH 3
#define ILLHASH 0177777
/* Escapes in printstrings */
#define PR_TOK '\001'
#define PR_TOKFLD '\002'
#define PR_EMINT '\003'
#define PR_EMSTR '\004'
#define PR_ALLREG '\005'
#define PR_SUBREG '\006'
/*
* In case this list gets longer remember to keep out printable nonprintables
* like \t \n \r and the like.
*/
/* Commands for codegenerator, in low order 5 bits of byte */
#define DO_NEXTEM 0
#define DO_MATCH 1
#define DO_XMATCH 2
#define DO_XXMATCH 3
#define DO_REMOVE 4
#define DO_DEALLOCATE 5
#define DO_REALLOCATE 6
#define DO_ALLOCATE 7
#define DO_LOUTPUT 8
#define DO_ROUTPUT 9
#define DO_MOVE 10
#define DO_ERASE 11
#define DO_TOKREPLACE 12
#define DO_EMREPLACE 13
#define DO_COST 14
#define DO_RETURN 15
#define DO_COERC 16
#define DO_PRETURN 17
#define DO_RREMOVE 18
typedef struct instance {
int in_which;
# define IN_COPY 1
# define IN_RIDENT 2
# define IN_ALLOC 3
# define IN_DESCR 4
# define IN_REGVAR 5
int in_info[TOKENSIZE+1];
} inst_t,*inst_p;
typedef struct {
int c_size; /* index in enode-table */
int c_time; /* dito */
} cost_t,*cost_p;
typedef struct {
int m_set1; /* number of tokenexpr in move: from */
int m_expr1; /* optional expression */
int m_set2; /* number of tokenexpr in move: to */
int m_expr2; /* optional expression */
int m_cindex; /* code index to really do it */
cost_t m_cost; /* associated cost */
} move_t, *move_p;
typedef struct {
int set_size;
short set_val[SETSIZE];
} set_t,*set_p;
struct exprnode {
short ex_operator;
short ex_lnode;
short ex_rnode;
};
typedef struct exprnode node_t;
typedef struct exprnode *node_p;
typedef struct { /* to stack coercions */
int c1_texpno; /* token expression number */
int c1_expr; /* boolean expression */
int c1_prop; /* property of register needed */
int c1_codep; /* code index */
cost_t c1_cost; /* cost involved */
} c1_t,*c1_p;
#ifdef MAXSPLIT
typedef struct { /* splitting coercions */
int c2_texpno; /* token expression number */
int c2_nsplit; /* split factor */
int c2_repl[MAXSPLIT]; /* replacement instances */
int c2_codep; /* code index */
} c2_t,*c2_p;
#endif MAXSPLIT
typedef struct { /* one to one coercions */
int c3_texpno; /* token expression number */
int c3_prop; /* property of register needed */
int c3_repl; /* replacement instance */
int c3_codep; /* code index */
} c3_t,*c3_p;
/*
* contents of .ex_operator
*/
#define EX_TOKFIELD 0
#define EX_ARG 1
#define EX_CON 2
#define EX_ALLREG 3
#define EX_SAMESIGN 4
#define EX_SFIT 5
#define EX_UFIT 6
#define EX_ROM 7
#define EX_NCPEQ 8
#define EX_SCPEQ 9
#define EX_RCPEQ 10
#define EX_NCPNE 11
#define EX_SCPNE 12
#define EX_RCPNE 13
#define EX_NCPGT 14
#define EX_NCPGE 15
#define EX_NCPLT 16
#define EX_NCPLE 17
#define EX_OR2 18
#define EX_AND2 19
#define EX_PLUS 20
#define EX_CAT 21
#define EX_MINUS 22
#define EX_TIMES 23
#define EX_DIVIDE 24
#define EX_MOD 25
#define EX_LSHIFT 26
#define EX_RSHIFT 27
#define EX_NOT 28
#define EX_COMP 29
#define EX_COST 30
#define EX_STRING 31
#define EX_DEFINED 32
#define EX_SUBREG 33
#define EX_TOSTRING 34
#define EX_UMINUS 35
#define EX_REG 36
#define EX_LOWW 37
#define EX_HIGHW 38
#define EX_INREG 39
#define EX_REGVAR 40
#define getint(a,b) \
if ((a=((*(b)++)&BMASK)) >= 128) {\
a = ((a-128)<<BSHIFT) | (*(b)++&BMASK); \
}

View file

@ -1,154 +0,0 @@
/* $Header$ */
/* offsets of interesting fields in EM-pattern */
#define PO_HASH 0
#define PO_NEXT 1
#define PO_MATCH 3
#define ILLHASH 0177777
/* Commands for codegenerator, in low order 5 bits of byte */
#define DO_NEXTEM 0
#define DO_MATCH 1
#define DO_XMATCH 2
#define DO_XXMATCH 3
#define DO_REMOVE 4
#define DO_DEALLOCATE 5
#define DO_REALLOCATE 6
#define DO_ALLOCATE 7
#define DO_MOVE 10
#define DO_ERASE 11
#define DO_TOKREPLACE 12
#define DO_EMREPLACE 13
#define DO_COST 14
#define DO_RETURN 15
#define DO_COERC 16
#define DO_PRETURN 17
#define DO_RREMOVE 18
#define DO_INSTR 19
#define DO_TEST 20
#define DO_DLINE 21
#define DO_SETCC 22
#ifndef MAXATT
#define MAXATT TOKENSIZE
#endif
typedef struct instance {
short in_which;
# define IN_COPY 1
# define IN_MEMB 2
# define IN_RIDENT 3
# define IN_ALLOC 4
# define IN_DESCR 5
# define IN_S_DESCR 6
# define IN_D_DESCR 7
short in_info[MAXATT+1];
} inst_t,*inst_p;
typedef struct set {
short set_size;
short set_val[SETSIZE];
} set_t,*set_p;
typedef struct {
short m_set1; /* number of tokenexpr in move: from */
short m_expr1; /* optional expression */
short m_set2; /* number of tokenexpr in move: to */
short m_expr2; /* optional expression */
short m_cindex; /* code index to really do it */
} move_t, *move_p;
typedef struct {
short t_set; /* number of tokenexpr in test */
short t_expr; /* optional expression */
short t_cindex; /* code index to really do it */
} test_t, *test_p;
struct exprnode {
short ex_operator;
short ex_lnode;
short ex_rnode;
};
typedef struct exprnode node_t;
typedef struct exprnode *node_p;
/*
* contents of .ex_operator
*/
#define EX_TOKFIELD 0
#define EX_ARG 1
#define EX_CON 2
#define EX_ALLREG 3
#define EX_SAMESIGN 4
#define EX_SFIT 5
#define EX_UFIT 6
#define EX_ROM 7
#define EX_NCPEQ 8
#define EX_SCPEQ 9
#define EX_RCPEQ 10
#define EX_NCPNE 11
#define EX_SCPNE 12
#define EX_RCPNE 13
#define EX_NCPGT 14
#define EX_NCPGE 15
#define EX_NCPLT 16
#define EX_NCPLE 17
#define EX_OR2 18
#define EX_AND2 19
#define EX_PLUS 20
#define EX_CAT 21
#define EX_MINUS 22
#define EX_TIMES 23
#define EX_DIVIDE 24
#define EX_MOD 25
#define EX_LSHIFT 26
#define EX_RSHIFT 27
#define EX_NOT 28
#define EX_COMP 29
#define EX_STRING 31
#define EX_DEFINED 32
#define EX_SUBREG 33
#define EX_TOSTRING 34
#define EX_UMINUS 35
#define EX_REG 36
#define EX_LOWW 37
#define EX_HIGHW 38
#define EX_INREG 39
#define EX_REGVAR 40
typedef struct { /* to stack coercions */
short c1_texpno; /* token expression number */
short c1_expr; /* boolean expression */
short c1_prop; /* property of register needed */
short c1_codep; /* code index */
} c1_t,*c1_p;
#ifdef MAXSPLIT
typedef struct { /* splitting coercions */
short c2_texpno; /* token expression number */
short c2_expr; /* optional boolean expression */
short c2_nsplit; /* split factor */
short c2_repl[MAXSPLIT];/* replacement instances */
short c2_codep; /* code index */
} c2_t,*c2_p;
#endif MAXSPLIT
typedef struct { /* one to one coercions */
short c3_texpno; /* token expression number */
short c3_expr; /* boolean expression */
short c3_prop; /* property of register needed */
short c3_repl; /* replacement instance */
short c3_codep; /* code index */
} c3_t,*c3_p;
#define getint(a,b) \
if ((a=((*(b)++)&BMASK)) >= 128) {\
a = ((a-128)<<BSHIFT) | (*(b)++&BMASK); \
}

View file

@ -1,30 +0,0 @@
#define LINO_AD 0
#define FILN_AD 4
#define LINO (*(int *)(_hol0()+LINO_AD))
#define FILN (*(char **)(_hol0()+FILN_AD))
#define EARRAY 0
#define ERANGE 1
#define ESET 2
#define EIOVFL 3
#define EFOVFL 4
#define EFUNFL 5
#define EIDIVZ 6
#define EFDIVZ 7
#define EIUND 8
#define EFUND 9
#define ECONV 10
#define ESTACK 16
#define EHEAP 17
#define EILLINS 18
#define EODDZ 19
#define ECASE 20
#define EMEMFLT 21
#define EBADPTR 22
#define EBADPC 23
#define EBADLAE 24
#define EBADMON 25
#define EBADLIN 26
#define EBADGTO 27

View file

@ -1,12 +0,0 @@
/*
* The various different hints as given after a mes ms_ego
*
* Yet to be stabilized
*/
#define ego_live 0 /* ,offset,size,regno */
#define ego_dead 1 /* ,offset,size,regno */
#define ego_assoc 2 /* ,offset,size,regno */
#define ego_unass 3 /* ,offset,size,regno */
#define ego_init 4 /* ,offset,size,regno */
#define ego_update 5 /* ,offset,size,regno */

View file

@ -1,25 +0,0 @@
/* flags */
#define EM_PAR 0017 /* parameter type */
#define EM_FLO 0060 /* flow information */
/* types */
#define PAR_NO 0000 /* no parameter */
#define PAR_C 0001 /* constant */
#define PAR_D 0002 /* double word constant */
#define PAR_N 0003 /* numeric (>=0) */
#define PAR_F 0004 /* address offset */
#define PAR_L 0005 /* addressing locals/parameters */
#define PAR_G 0006 /* addressing globals */
#define PAR_W 0007 /* size: word multiple, fits word, possibly indirect */
#define PAR_S 0010 /* size: word multiple */
#define PAR_Z 0011 /* size: zero or word multiple */
#define PAR_O 0012 /* size: word multiple or word fraction */
#define PAR_P 0013 /* procedure name */
#define PAR_B 0014 /* branch: instruction label */
#define PAR_R 0015 /* register number (0,1,2) */
/* flow */
#define FLO_NO 0000 /* straight on */
#define FLO_C 0020 /* conditional branch */
#define FLO_P 0040 /* procedure: call and return */
#define FLO_T 0060 /* terminate: no return */

View file

@ -1,21 +0,0 @@
/*
* mnemonics for the message numbers in EM
*/
#define ms_err 0 /* Compilation error occurred, ignore rest of module */
#define ms_opt 1 /* Disable optimization please */
#define ms_emx 2 /* Wordsize and pointersize assumed */
#define ms_reg 3 /* Hint for possible register usage from frontend */
#define ms_src 4 /* Number of source lines in this module */
#define ms_flt 5 /* Floating point used */
#define ms_com 6 /* Comment to be retained in compact code */
#define ms_ret 7 /* Reserved */
#define ms_ext 8 /* List of exported symbols from this library module */
#define ms_par 9 /* Number of bytes of parameters accessed */
#define ms_ego 10 /* Hint from EM Global Optimizer */
#define ms_gto 11 /* Dangerous procedure, uses nonlocal goto */
/*
* for details about ms_reg, see em_reg.h
* for details about ms_ego, see em_ego.h
*/

View file

@ -1,8 +0,0 @@
#define ptyp(x) (1<<(x-sp_fspec))
#define cst_ptyp 0000140
#define sym_ptyp 0000034
#define arg_ptyp 0000574
#define con_ptyp 0036000
#define val_ptyp 0037777
#define any_ptyp 0137777

View file

@ -1,10 +0,0 @@
/*
* mes ms_reg,offset,size,type,priority
*
* Here are the defines for type
*/
#define reg_any 0 /* Unspecified type */
#define reg_loop 1 /* loop control variable */
#define reg_pointer 2 /* pointer variable */
#define reg_float 3 /* floating point variable */

120
h/out.h
View file

@ -1,120 +0,0 @@
/* $Header$ */
/*
* output format for ACK assemblers
*/
#ifndef ushort
#define ushort unsigned short
#endif ushort
struct outhead {
ushort oh_magic; /* magic number */
ushort oh_stamp; /* version stamp */
ushort oh_flags; /* several format flags */
ushort oh_nsect; /* number of outsect structures */
ushort oh_nrelo; /* number of outrelo structures */
ushort oh_nname; /* number of outname structures */
long oh_nemit; /* sum of all os_flen */
long oh_nchar; /* size of string area */
};
#define O_MAGIC 0x0201 /* magic number of output file */
#define O_STAMP 0 /* version stamp */
#ifdef JOHAN
#define HF_BREV 0x0001 /* high order byte lowest address */
#define HF_WREV 0x0002 /* high order word lowest address */
#endif JOHAN
#define HF_LINK 0x0004 /* unresolved references left */
#define HF_8086 0x0008 /* os_base specially encoded */
struct outsect {
long os_base; /* startaddress in machine */
long os_size; /* section size in machine */
long os_foff; /* startaddress in file */
long os_flen; /* section size in file */
long os_lign; /* section alignment */
};
struct outrelo {
char or_type; /* type of reference */
char or_sect; /* referencing section */
ushort or_nami; /* referenced symbol index */
long or_addr; /* referencing address */
};
struct outname {
union {
char *on_ptr; /* symbol name (in core) */
long on_off; /* symbol name (in file) */
} on_u;
#define on_mptr on_u.on_ptr
#define on_foff on_u.on_off
ushort on_type; /* symbol type */
ushort on_desc; /* debug info */
long on_valu; /* symbol value */
};
/*
* relocation type bits
*/
#define RELSZ 0x07 /* relocation length */
#define RELO1 1 /* 1 byte */
#define RELO2 2 /* 2 bytes */
#define RELO4 4 /* 4 bytes */
#define RELPC 0x08 /* pc relative */
#ifndef JOHAN
#define RELBR 0x10 /* High order byte lowest address. */
#define RELWR 0x20 /* High order word lowest address. */
#endif JOHAN
/*
* section type bits and fields
*/
#define S_TYP 0x007F /* undefined, absolute or relative */
#define S_EXT 0x0080 /* external flag */
#define S_ETC 0x7F00 /* for symbolic debug, bypassing 'as' */
/*
* S_TYP field values
*/
#define S_UND 0x0000 /* undefined item */
#define S_ABS 0x0001 /* absolute item */
#define S_MIN 0x0002 /* first user section */
#define S_MAX S_TYP /* last user section */
/*
* S_ETC field values
*/
#define S_SCT 0x0100 /* section names */
#define S_LIN 0x0200 /* hll source line item */
#define S_FIL 0x0300 /* hll source file item */
#define S_MOD 0x0400 /* ass source file item */
#ifndef JOHAN
#define S_COM 0x1000 /* Common name. */
#endif JOHAN
/*
* structure format strings
*/
#define SF_HEAD "22222244"
#define SF_SECT "44444"
#define SF_RELO "1124"
#define SF_NAME "4224"
/*
* structure sizes (bytes in file; add digits in SF_*)
*/
#define SZ_HEAD 20
#define SZ_SECT 20
#define SZ_RELO 8
#define SZ_NAME 12
/*
* file access macros
*/
#define BADMAGIC(x) ((x).oh_magic!=O_MAGIC)
#define OFF_SECT(x) SZ_HEAD
#define OFF_EMIT(x) (OFF_SECT(x) + ((long)(x).oh_nsect * SZ_SECT))
#define OFF_RELO(x) (OFF_EMIT(x) + (x).oh_nemit)
#define OFF_NAME(x) (OFF_RELO(x) + ((long)(x).oh_nrelo * SZ_RELO))
#define OFF_CHAR(x) (OFF_NAME(x) + ((long)(x).oh_nname * SZ_NAME))

View file

@ -1,22 +0,0 @@
#define EARGC 64
#define EEXP 65
#define ELOG 66
#define ESQT 67
#define EASS 68
#define EPACK 69
#define EUNPACK 70
#define EMOD 71
#define EBADF 72
#define EFREE 73
#define EWRITEF 96
#define EREADF 97
#define EEOF 98
#define EFTRUNC 99
#define ERESET 100
#define EREWR 101
#define ECLOSE 102
#define EREAD 103
#define EWRITE 104
#define EDIGIT 105
#define EASCII 106

View file

@ -1,19 +0,0 @@
#define WRBIT 0100000
#define TXTBIT 040000
#define EOFBIT 020000
#define ELNBIT 010000
#define WINDOW 04000
#define MAGIC 0252
struct file {
char *ptr;
unsigned flags;
char *fname;
int ufd;
int size;
int count;
int buflen;
char bufadr[512];
};
#define EXTFL(z) ((struct file *)(_hbase + _extfl[z]))

View file

@ -1,21 +0,0 @@
/* fundamental */
#define sz_byte 1
#define sz_bool 1
#define sz_char 1
/* fixed for the time being */
#define sz_word 2
#define sz_int 2
#define sz_long 4
/* variable (see pc.c) */
#define sz_addr sizes[0]
#define sz_real sizes[1]
#define sz_head sizes[2]
#define sz_buff sizes[3]
#define sz_mset sizes[4]
#define sz_iset sizes[5]
#define sz_last 5
#define sz_proc 2*sz_addr

View file

@ -1,25 +0,0 @@
/* $Header$ */
#ifndef SYMDEF
# define SYMDEF "__.SYMDEF"
#endif SYMDEF
/*
* Structure of the SYMDEF table of contents for an archive.
* SYMDEF begins with a long giving the number of ranlib
* structures that immediately follow, and then continues with a string
* table consisting of a long giving the number of bytes of
* strings that follow and then the strings themselves.
*/
struct ranlib {
union {
char *ran__ptr; /* symbol name (in core) */
long ran__off; /* symbol name (in file) */
} ran_u;
#define ran_ptr ran_u.ran__ptr
#define ran_off ran_u.ran__off
long ran_pos; /* library member is at this position */
};
#define SZ_RAN 8
#define SF_RAN "44"

View file

@ -1 +0,0 @@
typedef char jmp_buf[256];

View file

@ -1,42 +0,0 @@
/*
* Error codes
*/
#define EPERM 1
#define ENOENT 2
#define ESRCH 3
#define EINTR 4
#define EIO 5
#define ENXIO 6
#define E2BIG 7
#define ENOEXEC 8
#define EBADF 9
#define ECHILD 10
#define EAGAIN 11
#define ENOMEM 12
#define EACCES 13
#define EFAULT 14
#define ENOTBLK 15
#define EBUSY 16
#define EEXIST 17
#define EXDEV 18
#define ENODEV 19
#define ENOTDIR 20
#define EISDIR 21
#define EINVAL 22
#define ENFILE 23
#define EMFILE 24
#define ENOTTY 25
#define ETXTBSY 26
#define EFBIG 27
#define ENOSPC 28
#define ESPIPE 29
#define EROFS 30
#define EMLINK 31
#define EPIPE 32
/* math software */
#define EDOM 33
#define ERANGE 34
#define EQUOT 35

View file

@ -1,21 +0,0 @@
#define NSIG 17
#define SIGHUP 1 /* hangup */
#define SIGINT 2 /* interrupt */
#define SIGQUIT 3 /* quit */
#define SIGILL 4 /* illegal instruction (not reset when caught) */
#define SIGTRAP 5 /* trace trap (not reset when caught) */
#define SIGIOT 6 /* IOT instruction */
#define SIGEMT 7 /* EMT instruction */
#define SIGFPE 8 /* floating point exception */
#define SIGKILL 9 /* kill (cannot be caught or ignored) */
#define SIGBUS 10 /* bus error */
#define SIGSEGV 11 /* segmentation violation */
#define SIGSYS 12 /* bad argument to system call */
#define SIGPIPE 13 /* write on a pipe with no one to read it */
#define SIGALRM 14 /* alarm clock */
#define SIGTERM 15 /* software termination signal from kill */
int (*signal())();
#define SIG_DFL (int (*)())0
#define SIG_IGN (int (*)())1

View file

@ -1,34 +0,0 @@
tail_bc.a
abs.c
asc.c
asrt.c
atn.c
chr.c
conversion.c
hlt.c
mki.c
oct.c
peek.c
power.c
exp.c
log.c
print.c
io.c
random.c
read.c
return.c
sgn.c
sin.c
fif.e
sqt.c
fef.e
stop.c
string.c
salloc.c
swap.c
trace.c
write.c
file.c
error.c
trap.c
setline.e

View file

@ -1,10 +0,0 @@
/* $Header$ */
long _abl(i) long i;
{
return( i>=0?i:-i);
}
double _abr(f) double f;
{
return( f>=0.0?f: -f);
}

View file

@ -1,11 +0,0 @@
#include "bc_string.h"
/* $Header$ */
int _asc(str)
String *str;
{
if(str==0 || str->strval==0)
error(3);
return( *str->strval);
}

View file

@ -1,9 +0,0 @@
/* $Header$ */
asrt(b)
{
if(!b){
printf("ASSERTION ERROR\n");
abort();
}
}

View file

@ -1,93 +0,0 @@
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/* $Header$ */
/* Author: J.W. Stevenson */
/*
floating-point arctangent
atan returns the value of the arctangent of its
argument in the range [-pi/2,pi/2].
there are no error returns.
coefficients are #5077 from Hart & Cheney. (19.56D)
*/
static double sq2p1 = 2.414213562373095048802e0;
static double sq2m1 = .414213562373095048802e0;
static double pio2 = 1.570796326794896619231e0;
static double pio4 = .785398163397448309615e0;
static double p4 = .161536412982230228262e2;
static double p3 = .26842548195503973794141e3;
static double p2 = .11530293515404850115428136e4;
static double p1 = .178040631643319697105464587e4;
static double p0 = .89678597403663861959987488e3;
static double q4 = .5895697050844462222791e2;
static double q3 = .536265374031215315104235e3;
static double q2 = .16667838148816337184521798e4;
static double q1 = .207933497444540981287275926e4;
static double q0 = .89678597403663861962481162e3;
/*
xatan evaluates a series valid in the
range [-0.414...,+0.414...].
*/
static double
xatan(arg)
double arg;
{
double argsq;
double value;
argsq = arg*arg;
value = ((((p4*argsq + p3)*argsq + p2)*argsq + p1)*argsq + p0);
value = value/(((((argsq + q4)*argsq + q3)*argsq + q2)*argsq + q1)*argsq + q0);
return(value*arg);
}
static double
satan(arg)
double arg;
{
if(arg < sq2m1)
return(xatan(arg));
else if(arg > sq2p1)
return(pio2 - xatan(1/arg));
else
return(pio4 + xatan((arg-1)/(arg+1)));
}
/*
atan makes its argument positive and
calls the inner routine satan.
*/
double
_atn(arg)
double arg;
{
if(arg>0)
return(satan(arg));
else
return(-satan(-arg));
}

View file

@ -1,17 +0,0 @@
#include "bc_string.h"
/* $Header$ */
String *_chr(i)
int i;
{
String *s;
char buf[2];
if( i<0 || i>127)
error(3);
buf[0]=i;
buf[1]=0;
s= _newstr(buf);
return(s);
}

View file

@ -1,40 +0,0 @@
/* $Header$ */
int _cint(f) double f;
{
int r;
if( f<-32768 || f>32767) error(4);
if(f<0)
r= f-0.5;
else r= f+0.5;
return(r);
}
double _trunc(f)
double f;
{
long d;
d=f;
f=d;
return( f );
}
double _fcint(f) double f;
{
long r;
if(f<0){
r= -f;
r= -r -1;
}else r= f;
f=r;
return(f);
}
int _fix(f)
double f;
{
int r;
if( f<-32768.0 || f>32767.0) error(4);
r= _sgn(f) * _fcint((f>0.0? f : -f));
return(r);
}

View file

@ -1,63 +0,0 @@
/* $Header$ */
/* error takes an error value in the range of 0-255 */
/* and generates a trap */
char *errortable[255]={
/* 0 */ "",
/* 1 */ "RETURN without GOSUB",
/* 2 */ "Out of data",
/* 3 */ "Illegal function call",
/* 4 */ "Overflow",
/* 5 */ "Out of memory",
/* 6 */ "Undefined line ",
/* 7 */ "Subscript out of range",
/* 8 */ "Redimensioned array",
/* 9 */ "Division by zero",
/* 10 */ "Illegal indirect",
/* 11 */ "Type mismatch",
/* 12 */ "Out of string space",
/* 13 */ "String too long",
/* 14 */ "String formula too complex",
/* 15 */ "Can't continue",
/* 16 */ "Undefined user function",
/* 17 */ "No resume",
/* 18 */ "Resume without error",
/* 19 */ "Unprintable error",
/* 20 */ "Missing operand",
/* 21 */ "Line buffer overflow",
/* 22 */ "FOR without NEXT",
/* 23 */ "WHILE without WEND",
/* 24 */ "WEND without WHILE",
/* 25 */ "Field overflow",
/* 26 */ "Internal error",
/* 27 */ "Bad file number",
/* 28 */ "File not found",
/* 29 */ "Bad file mode",
/* 30 */ "File already open",
/* 31 */ "Disk IO error",
/* 32 */ "File already exists",
/* 33 */ "Disk full",
/* 34 */ "Input past end",
/* 35 */ "Bad record number",
/* 36 */ "Bad file name",
/* 37 */ "Direct statement in file",
/* 38 */ "Too many files",
/* 39 */ "File not open",
/* 40 */ "Syntax error in data",
0
};
error(index)
int index;
{
extern int _errsym;
extern int _erlsym;
_setline();
if( index<0 || index >40 )
printf("LINE %d:ERROR %d: Unprintable error\n",_erlsym,index);
else printf("LINE %d:ERROR %d: %s\n",_erlsym,index,errortable[index]);
_errsym= index;
_trap();
}

Some files were not shown because too many files have changed in this diff Show more