980 lines
29 KiB
Plaintext
980 lines
29 KiB
Plaintext
|
.NH 1
|
||
|
How lint checks
|
||
|
.NH 2
|
||
|
The first pass first pass data structure
|
||
|
.PP
|
||
|
The data structure of
|
||
|
.I cem
|
||
|
is changed a little and some structures have been added.
|
||
|
.NH 3
|
||
|
The changes
|
||
|
.NH 4
|
||
|
Idf descriptor
|
||
|
.PP
|
||
|
A member
|
||
|
.ft CW
|
||
|
id_line
|
||
|
.R
|
||
|
is added
|
||
|
to the
|
||
|
.I idf
|
||
|
selector.
|
||
|
This line number is used for some warnings.
|
||
|
.NH 4
|
||
|
Def descriptor
|
||
|
.PP
|
||
|
The
|
||
|
.I def
|
||
|
selector is extended with the members
|
||
|
.ft CW
|
||
|
df_set
|
||
|
.R and
|
||
|
df_line.
|
||
|
.R
|
||
|
The
|
||
|
.ft CW
|
||
|
df_used
|
||
|
.R
|
||
|
member did exist already, but was only used for code generation.
|
||
|
This usage is eliminated so it can be used by
|
||
|
.I lint.
|
||
|
The meaning of these members should be clear.
|
||
|
.NH 3
|
||
|
The additions
|
||
|
.NH 4
|
||
|
Lint_stack_entry descriptor
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct lint_stack_entry {
|
||
|
struct lint_stack_entry *next;
|
||
|
struct lint_stack_entry *previous;
|
||
|
short ls_class;
|
||
|
int ls_level;
|
||
|
struct state *ls_current;
|
||
|
union {
|
||
|
struct state *S_if;
|
||
|
struct state *S_end;
|
||
|
struct switch_states switch_state;
|
||
|
} ls_states;
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
.PP
|
||
|
Structure to simulate a stacking mechanism.
|
||
|
.IP \f(CWnext\fP 15
|
||
|
Pointer to the entry on top of this one.
|
||
|
.IP \f(CWprevious\fP
|
||
|
Pointer to the entry beneath this one.
|
||
|
.IP \f(CWls_class\fP
|
||
|
The class of statement this entry belongs to.
|
||
|
Possible classes are \f(CWIF\fP, \f(CWWHILE\fP, \f(CWDO\fP,
|
||
|
\f(CWFOR\fP, \f(CWSWITCH\fP and \f(CWCASE\fP.
|
||
|
.IP \f(CWls_level\fP
|
||
|
The level the corresponding statement is nested.
|
||
|
.IP \f(CWls_current\fP
|
||
|
A pointer to the state descriptor which describes the state
|
||
|
of the function (the state of the automatic variables, if the next
|
||
|
statement can be reached, et cetera) if control passes the
|
||
|
flow of control to the part of the program currently parsed.
|
||
|
The initialization of this state is as follows
|
||
|
.RS
|
||
|
.IP
|
||
|
If \f(CWls_class\fP in [\f(CWIF\fP, \f(CWSWITCH\fP] the state
|
||
|
after parsing the conditional expression.
|
||
|
.IP
|
||
|
If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP] the state
|
||
|
after parsing the code between the brackets.
|
||
|
.IP
|
||
|
If \f(CWls_class\fP in [\f(CWDO\fP, \f(CWCASE\fP] the state at
|
||
|
entrance of the statement after the \f(CWDO\fP or \f(CWCASE\fP
|
||
|
token.
|
||
|
.RE
|
||
|
.IP \f(CWls_states\fP 15
|
||
|
Union of pointers to state descriptors containing different information
|
||
|
for different values of \f(CWls_class\fP.
|
||
|
.RS
|
||
|
.IP
|
||
|
If \f(CWls_class\fP is \f(CWIF\fP and in case of parsing an else part,
|
||
|
\f(CWls_states.S_if\fP points to the state that is reached after the
|
||
|
if part.
|
||
|
.IP
|
||
|
If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP, \f(CWDO\fP]
|
||
|
then \f(CWls_states.S_end\fP contains a conservative description
|
||
|
of the state of the program after `jumping'
|
||
|
to the end of the statement after the \f(CWWHILE\fP, \f(CWDO\fP
|
||
|
or \f(CWFOR\fP token.
|
||
|
I.e. the state at reaching a break (not inside a switch) or
|
||
|
continue statement.
|
||
|
.IP
|
||
|
If ls_class is \f(CWSWITCH\fP, \f(CWls_states\fP is used as a structure
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct switch_states {
|
||
|
struct state S_case;
|
||
|
struct state S_break;
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
containing two pointers to state descriptors.
|
||
|
\f(CWls_states.switch_state.S_case\fP contains
|
||
|
a conservative description
|
||
|
of the state of the program after \f(CWcase ... case\fP
|
||
|
parts are parsed.
|
||
|
\f(CWls_states.switch_state.S_break\fP the state after parsing
|
||
|
all the \f(CWcase ... break\fP parts.
|
||
|
The reason for \f(CWls_states.switch_state.default_met\fP should be
|
||
|
self-explanatory.
|
||
|
.IP
|
||
|
In case \f(CWls_class\fP is \f(CWCASE\fP, \f(CWls_states\fP is not used.
|
||
|
.RE
|
||
|
.NH 4
|
||
|
State descriptor
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct state {
|
||
|
struct state *next;
|
||
|
struct auto_def *st_auto_list;
|
||
|
int st_nrchd;
|
||
|
int st_warned;
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
.IP \f(CWst_auto_list\fP 15
|
||
|
Pointer to a list of definitions of the automatic variables whose
|
||
|
scope contain the current position in the program.
|
||
|
.IP \f(CWst_nrchd\fP
|
||
|
True if the next statement can't be reached.
|
||
|
.IP \f(CWst_warned\fP
|
||
|
True if a warning has already been given.
|
||
|
.NH 4
|
||
|
Auto_def descriptor
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct auto_def {
|
||
|
struct auto_def *next;
|
||
|
struct idf *ad_idf;
|
||
|
struct def *ad_def;
|
||
|
int ad_used;
|
||
|
int ad_set;
|
||
|
int ad_maybe_set;
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
.IP \f(CWnext\fP 15
|
||
|
Points to the next auto_definition of the list.
|
||
|
.IP \f(CWad_idf\fP
|
||
|
Pointer to the idf descriptor associated with this auto_definition.
|
||
|
.IP \f(CWad_def\fP
|
||
|
Ditto for def descriptor.
|
||
|
.IP \f(CWad_used\fP
|
||
|
Indicates the state of this automatic variable.
|
||
|
Ditto for \f(CWad_set\fP and \f(CWad_maybe_set\fP.
|
||
|
Only one of \f(CWad_set\fP and \f(CWad_maybe_set\fP may be true.
|
||
|
.NH 4
|
||
|
Expr_state descriptor
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct expr_state {
|
||
|
struct expr_state *next;
|
||
|
struct idf *es_idf;
|
||
|
arith es_offset;
|
||
|
int es_used;
|
||
|
int es_set;
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
.PP
|
||
|
This structure is introduced to keep track of which variables,
|
||
|
array entries and structure members (union members) are set
|
||
|
and/or used in evaluating an expression.
|
||
|
.IP \f(CWnext\fP 15
|
||
|
Pointer to the next descriptor of this list.
|
||
|
.IP \f(CWes_idf\fP
|
||
|
Pointer to the idf descriptor this descriptor belongs to.
|
||
|
.IP \f(CWes_offset\fP
|
||
|
In case of an array, a structure or union, this member contains
|
||
|
the offset the compiler would generate for locating the array
|
||
|
entry or structure/union member.
|
||
|
.IP \f(CWes_used\fP
|
||
|
True if the indicated memory location is used in evaluating the
|
||
|
expression.
|
||
|
.IP \f(CWes_set\fP
|
||
|
Ditto for set.
|
||
|
.NH 4
|
||
|
Outdef descriptor
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct outdef {
|
||
|
int od_class;
|
||
|
char *od_name;
|
||
|
char *od_file;
|
||
|
unsigned int od_line;
|
||
|
int od_nrargs;
|
||
|
struct tp_entry *od_entry;
|
||
|
int od_returns;
|
||
|
struct type *od_type;
|
||
|
};
|
||
|
.DE
|
||
|
.R
|
||
|
.PP
|
||
|
As structures of this type are not allocated dynamically by a
|
||
|
storage allocator, it contains no next member.
|
||
|
An outdef can be given to to \f(CWoutput_def()\fP to be passed to the
|
||
|
second pass.
|
||
|
Basically this forms the interface with the second pass.
|
||
|
.IP \f(CWod_class\fP 15
|
||
|
Indicates what kind of definition it is.
|
||
|
Possible classes are \f(CWEFDF\fP, \f(CWEVDF\fP, \f(CWSFDF\fP,
|
||
|
\f(CWSVDF\fP, \f(CWLFDF\fP, \f(CWLVDF\fP,
|
||
|
\f(CWEFDC\fP, \f(CWEVDC\fP, \f(CWIFDC\fP, \f(CWFC\fP, \f(CWVU\fP.
|
||
|
([\f(CWE\fPxternal, \f(CWS\fPtatic, \f(CWL\fPibrary, \f(CWI\fPmplicit]
|
||
|
[\f(CWF\fPunction, \f(CWV\fPariable]
|
||
|
[\f(CWD\fPe\f(CWF\fPinition, \f(CWD\fPe\f(CWC\fPlaration,
|
||
|
\f(CWC\fPall, \f(CWU\fPsage])
|
||
|
.IP \f(CWod_name\fP
|
||
|
The name of the function or variable.
|
||
|
.IP \f(CWod_file\fP
|
||
|
The file this definition comes from.
|
||
|
.IP \f(CWod_nrargs\fP
|
||
|
If \f(CWod_class\fP is one of \f(CWEFDF\fP, \f(CWSFDF\fP or
|
||
|
\f(CWLFDF\fP, this member contains the
|
||
|
number of arguments this function has.
|
||
|
If the function was preceded by the pseudocomment
|
||
|
\f(CW/*\ VARARGS\ */\fP,
|
||
|
\f(CWod_nrargs\fP gets the value \f(CW-1-n\fP.
|
||
|
.IP \f(CWod_entry\fP
|
||
|
A pointer to a list of \f(CWod_nrargs\fP cells, each containing a
|
||
|
pointer to the type descriptor of an argument. (\f(CW-1-od_nrargs\fP
|
||
|
cells if
|
||
|
\f(CWod_nrargs < 0\fP.)
|
||
|
\f(CWTp_entry\fP is defined as
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct tp_entry {
|
||
|
struct tp_entry *next; /* pointer to next cell */
|
||
|
struct type *te_type; /* an argument type */
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
.IP \f(CWod_returns\fP 15
|
||
|
For classes \f(CWEFDF\fP, \f(CWSFDF\fP and \f(CWLFDF\fP this
|
||
|
member tells if the function returns an expression or not.
|
||
|
In case \f(CWod_class\fP is \f(CWFC\fP it is true if the value
|
||
|
of the function is used, false otherwise.
|
||
|
For other classes this member is not used.
|
||
|
.IP \f(CWod_type\fP
|
||
|
A pointer to the type of the function or variable defined or
|
||
|
declared.
|
||
|
Not used for classes \f(CWFC\fP and \f(CWVU\fP.
|
||
|
.NH 2
|
||
|
The first pass checking mechanism
|
||
|
.PP
|
||
|
In the description of the implementation of the pass one
|
||
|
warnings, it is assumed that the reader is familiar with the
|
||
|
\fILLgen\fP parser generator, as described in [6].
|
||
|
.NH 3
|
||
|
Used and/or set variables
|
||
|
.PP
|
||
|
To be able to give warnings like
|
||
|
.ft CW
|
||
|
%s used before set
|
||
|
.R
|
||
|
and
|
||
|
.ft CW
|
||
|
%s set but not used in function %s
|
||
|
.R
|
||
|
, there needs to be a way to keep track of the state of a variable.
|
||
|
A first approach to do this was by adding two fields to the
|
||
|
\fIdef\fP selector:
|
||
|
.ft CW
|
||
|
df_set
|
||
|
.R
|
||
|
and
|
||
|
.ft CW
|
||
|
df_used.
|
||
|
.R
|
||
|
While parsing the program, each time an expression was met
|
||
|
this expression was analyzed and the fields of each \fIdef\fP
|
||
|
selector were possibly set during this analysis.
|
||
|
This analysis was done by passing each expression to a
|
||
|
function
|
||
|
.ft CW
|
||
|
lint_expr
|
||
|
.R
|
||
|
, which walks the expression tree in a way similar to the function
|
||
|
\f(CWEVAL\fP in the file \fIeval.c\fP of the original
|
||
|
.I
|
||
|
cem
|
||
|
.R
|
||
|
compiler.
|
||
|
This approach has one big disadvantage: it is impossible to keep
|
||
|
track of the flow of control of the program.
|
||
|
No warning will be given for the program fragment of figure 3.
|
||
|
.KF
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
func()
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
if (cond)
|
||
|
i = 0;
|
||
|
else
|
||
|
use(i); /* i may be used before set */
|
||
|
}
|
||
|
.I
|
||
|
.DE
|
||
|
.br
|
||
|
.ce
|
||
|
figure\ 3.
|
||
|
.R
|
||
|
.KE
|
||
|
.PP
|
||
|
It is clear that it would be nice having
|
||
|
.I lint
|
||
|
warn for this construction.
|
||
|
.PP
|
||
|
This was done in the second approach.
|
||
|
When there was a choice between two statements, each statement
|
||
|
was parsed with its own copy of the state at entrance of the
|
||
|
.I
|
||
|
choosing statement.
|
||
|
.R
|
||
|
A state consisted of the state of the automatic variables
|
||
|
(including register variables).
|
||
|
In addition to the possibilities of being used and set,
|
||
|
a variable could be \fImaybe set\fP.
|
||
|
These states were passed between the statement parsing routines
|
||
|
using the \fILLgen\fP parameter mechanism.
|
||
|
At the end of a choosing statement, the two states were merged
|
||
|
into one state, which became the state after this statement.
|
||
|
The construction of figure 4 was now detected, but switch
|
||
|
statements still gave problems and continue and break statements
|
||
|
were not understood.
|
||
|
The main problem of a switch statement is, that the closing bracket
|
||
|
(`\f(CW)\fP') has to be followed by a \fIstatement\fP.
|
||
|
The syntax shows no choice of statements, as is the case with
|
||
|
if, while, do and for statements.
|
||
|
Using the \fILLgen\fP parameter mechanism, it is not a trivial
|
||
|
task to parse the different case parts of a switch statement
|
||
|
with the same initial state and to merge the results into one
|
||
|
state.
|
||
|
This observation led to the third and final approach, as described
|
||
|
next.
|
||
|
.PP
|
||
|
Instead of passing the state of the program through the statements
|
||
|
parsing routines using the \fILLgen\fP parameters, a special stack is
|
||
|
introduced, the
|
||
|
.I lint_stack.
|
||
|
When a choosing statement is parsed, an entry is pushed on the stack
|
||
|
containing the information that is needed to keep track of the
|
||
|
state of the program.
|
||
|
Each entry contains a description of the
|
||
|
.I current
|
||
|
state of the program and a field that indicates what part of the
|
||
|
program the parser is currently parsing.
|
||
|
For all the possible choosing statements I describe the actions
|
||
|
to be taken.
|
||
|
.PP
|
||
|
At entrance of an if statement, an entry is pushed on the stack
|
||
|
with the current state being a copy of the current state of the
|
||
|
stack element one below.
|
||
|
The class of this entry is \f(CWIF\fP.
|
||
|
At reaching the else part, the current state is moved to
|
||
|
another place in this stack entry (to \f(CWS_IF\fP), and a new copy
|
||
|
of the current state at entrance of this if statement is made.
|
||
|
At the end of the else part, the two states are merged into
|
||
|
one state, the new current state, and the \f(CWIF\fP entry is
|
||
|
popped from the stack.
|
||
|
If there is no else part, then the state that is reached after
|
||
|
parsing the if part is merged with the current state at entrance
|
||
|
of the if statement into the new current state.
|
||
|
.PP
|
||
|
At entrance of a while statement a \f(CWWHILE\fP entry is pushed
|
||
|
on the stack containing a copy of the current state.
|
||
|
If a continue or break statement is met in the while statement,
|
||
|
the state at reaching this continue or break statement is
|
||
|
merged with a special state in the \f(CWWHILE\fP entry, called
|
||
|
\f(CWS_END\fP.
|
||
|
(If \f(CWS_END\fP did not yet contain a state, the state is copied
|
||
|
to \f(CWS_END\fP.)
|
||
|
At the end of the while statement this \f(CWS_END\fP is merged with the
|
||
|
current state, which result is merged with the state at entrance
|
||
|
of the while statement into the new current state.
|
||
|
.PP
|
||
|
A for statement is treated similarly.
|
||
|
A do statement is treated the same way too, except that \f(CWS_END\fP
|
||
|
isn't merged with the state at entrance of the do statement,
|
||
|
but becomes the new current state.
|
||
|
.PP
|
||
|
For switch statements a \f(CWSWITCH\fP entry is pushed on the stack.
|
||
|
Apart from the current state, this entry contains two other
|
||
|
states, \f(CWS_BREAK\fP and \f(CWS_CASE\fP.
|
||
|
\f(CWS_BREAK\fP initially contains no state, \f(CWS_CASE\fP
|
||
|
initially contains a
|
||
|
copy of the current state at entrance of the switch statement.
|
||
|
After parsing a case label, a \f(CWCASE\fP entry is pushed on the stack,
|
||
|
containing a copy of the current state.
|
||
|
If, after zero or more statements, we meet another case label,
|
||
|
the state at reaching this case label is merged with \f(CWS_CASE\fP
|
||
|
of the \f(CWSWITCH\fP entry below and a new copy of the state
|
||
|
at entrance
|
||
|
of the switch statement is put in the \f(CWCASE\fP entry.
|
||
|
If we meet a break statement, we merge the current state with
|
||
|
\f(CWS_BREAK\fP of the \f(CWSWITCH\fP entry below and pop the
|
||
|
\f(CWCASE\fP entry.
|
||
|
In addition to this, the occurrence of a default statement
|
||
|
inside the switch statement is recorded in the \f(CWSWITCH\fP entry.
|
||
|
At the end of the switch statement we check if we have met a
|
||
|
default statement.
|
||
|
If not, \f(CWS_BREAK\fP is merged with the current state at entrance
|
||
|
of the switch statement. (Because it is possible that no case
|
||
|
label will be chosen.)
|
||
|
Next the \f(CWS_CASE\fP is `special_merged' with \f(CWS_BREAK\fP
|
||
|
into the new current state.
|
||
|
For more details about these merge functions see the sources.
|
||
|
.PP
|
||
|
With the approach described above,
|
||
|
.I lint
|
||
|
is aware of the flow
|
||
|
of control in the program.
|
||
|
There still are some doubtful constructions
|
||
|
.I lint
|
||
|
will not detect and there are some constructions (although rare)
|
||
|
for which
|
||
|
.I lint
|
||
|
gives an incorrect warning (see figure 4).
|
||
|
.KF
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
for (;;) {
|
||
|
if (cond) {
|
||
|
i = 0;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
use(i);
|
||
|
/* lint warns: maybe i used before set
|
||
|
* although the fragment is correct
|
||
|
*/
|
||
|
}
|
||
|
.DE
|
||
|
.br
|
||
|
.I
|
||
|
.ce
|
||
|
figure\ 4.
|
||
|
.R
|
||
|
.KE
|
||
|
.PP
|
||
|
A nice advantage of the method is, that the parser stays clear,
|
||
|
i.e. it isn't extended with extra parameters which must pass the
|
||
|
states.
|
||
|
In this way the parser still is very readable and we have a nice
|
||
|
interface with
|
||
|
.I lint
|
||
|
using function calls.
|
||
|
.NH 3
|
||
|
Undefined evaluation orders
|
||
|
.PP
|
||
|
In expressions the values of some variables are used and some
|
||
|
variables are set.
|
||
|
Of course, the same holds for subexpressions.
|
||
|
The compiler is allowed to choose the order of evaluation of
|
||
|
subexpressions involving a commutative and associative operator
|
||
|
(\f(CW*\fP, \f(CW+\fP, \f(CW&\fP, \f(CW|\fP, \f(CW^\fP),
|
||
|
the comma in a parameter list or an assignment operator.
|
||
|
In section 3.4 it is made clear that this will lead to
|
||
|
statements with ambiguous semantics.
|
||
|
.PP
|
||
|
The way these constructs are detected is rather straight forward.
|
||
|
The function which parses an expression (\f(CWlint_expr\fP)
|
||
|
returns a linked
|
||
|
list containing information telling which variables are set and
|
||
|
which variables are used.
|
||
|
A variable is indicated by its
|
||
|
.I idf
|
||
|
descriptor and an
|
||
|
.I offset.
|
||
|
This offset is needed for discriminating entries of the same
|
||
|
array and members of the same structure or union, so it is
|
||
|
possible to warn about the statement
|
||
|
.ft CW
|
||
|
a[b[0]]\ =\ b[0]++;.
|
||
|
.R
|
||
|
When \f(CWlint_expr\fP meets a commutative operator (with respect to the
|
||
|
evaluation order), it calls itself recursively with the operands
|
||
|
of the operator as expression.
|
||
|
The returned results are checked for undefined evaluation orders
|
||
|
and are put together.
|
||
|
This is done by the function \f(CWcheck_and_merge\fP.
|
||
|
.NH 3
|
||
|
Useless statements
|
||
|
.PP
|
||
|
Statements which compute a value that is not used,
|
||
|
are said to have a \fInull effect\fP.
|
||
|
Examples are \f(CWx = 2, 3;\fP, \f(CWf() + g();\fP and
|
||
|
\f(CW*p++;\fP.
|
||
|
(\f(CW*\fP and \f(CW++\fP have the same precedence and associate
|
||
|
from right to left.)
|
||
|
.PP
|
||
|
A conditional expression computes a value too.
|
||
|
If this value isn't used, it is better to use an if-else
|
||
|
statement.
|
||
|
So, if
|
||
|
.I lint
|
||
|
sees
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
b ? f() : g();
|
||
|
.R
|
||
|
.DE
|
||
|
.LP
|
||
|
it warns \f(CWuse if-else construction\fP.
|
||
|
.NH 3
|
||
|
Not-reachable statements
|
||
|
.PP
|
||
|
The algorithm to detect not-reachable statements (including not
|
||
|
reachable initializations) is as follows.
|
||
|
Statements after a label and a case statement and the compound
|
||
|
statement of a function are always reachable.
|
||
|
Other statements are not-reachable after:
|
||
|
.QS
|
||
|
.RS
|
||
|
.IP - 1
|
||
|
a goto statement
|
||
|
.IP -
|
||
|
a return statement
|
||
|
.IP -
|
||
|
a break statement
|
||
|
.IP -
|
||
|
a continue statement
|
||
|
.IP -
|
||
|
a switch statement
|
||
|
.IP -
|
||
|
an endless loop (a while, do or for loop with a conditional
|
||
|
which always evaluates to true and without a break statement)
|
||
|
.IP -
|
||
|
an if-else statement of which both if part and else part
|
||
|
end up in a not-reachable state
|
||
|
.IP -
|
||
|
a switch statement of which all \f(CWcase ... break\fP parts
|
||
|
(including
|
||
|
a \f(CWdefault ... break\fP part) end up in a not-reachable state
|
||
|
.IP -
|
||
|
the pseudocomment \f(CW/*\ NOTREACHED\ */\fP
|
||
|
.RE
|
||
|
.QE
|
||
|
.PP
|
||
|
The algorithm is easily implemented using the \f(CWst_nrchd\fP selector
|
||
|
in the
|
||
|
.I state
|
||
|
descriptor.
|
||
|
The \f(CWst_warned\fP selector is used to prevent superfluous warnings.
|
||
|
To detect an endless loop, after a while (<true>), for (..;<true>;..)
|
||
|
and do part the current state of the stack entry beneath the top one
|
||
|
is set to not reached.
|
||
|
If, in the statement following, a break statement is met, this same
|
||
|
state is set to reached.
|
||
|
If the while (<cond>) part of the do statement is met, this state
|
||
|
is set to reached if <cond> doesn't evaluates to true.
|
||
|
The detection of not-reachable statements after a switch statement
|
||
|
is done in a similar way.
|
||
|
In addition it is checked if a default statement isn't met, in
|
||
|
which case the statement after the switch statement can be reached.
|
||
|
The warning \f(CWstatement not reached\fP is not given for compound
|
||
|
statements.
|
||
|
If
|
||
|
.I lint
|
||
|
did, it would warn for the compound statement in a switch statement,
|
||
|
which would be incorrect.
|
||
|
.PP
|
||
|
Not-reachable statements are still interpreted by
|
||
|
.I lint.
|
||
|
I.e. when
|
||
|
.I lint
|
||
|
warns that some statement can't be reached, it assumes this is
|
||
|
not what the programmer really wants and it ignores this fact.
|
||
|
In this way a lot of useless warnings are prevented in the case of
|
||
|
a not-reachable statement.
|
||
|
See figure 5.
|
||
|
.KF
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
for (;;) {
|
||
|
/* A loop in which the programmer
|
||
|
* forgot to introduce a conditional
|
||
|
* break statement.
|
||
|
* Suppose i is not used in this part.
|
||
|
*/
|
||
|
}
|
||
|
/* some more code in which i is used */
|
||
|
}
|
||
|
/* The warning "statement not reached" highlights the bug.
|
||
|
* An additional warning "i unused in function %s" is
|
||
|
* formally correct, but doesn't provide the programmer
|
||
|
* with useful information.
|
||
|
*/
|
||
|
.DE
|
||
|
.I
|
||
|
.ce
|
||
|
figure\ 5.
|
||
|
.R
|
||
|
.KE
|
||
|
.NH 3
|
||
|
Functions returning expressions and just returning
|
||
|
.PP
|
||
|
Each time a return statement is met,
|
||
|
.I lint
|
||
|
checks if an expression is returned or not.
|
||
|
If a function has a return with expression and a return without
|
||
|
expression,
|
||
|
.I lint
|
||
|
warns
|
||
|
.ft CW
|
||
|
function %s has return(e); and return;.
|
||
|
.R
|
||
|
If the flow of control can
|
||
|
.I
|
||
|
fall through
|
||
|
.R
|
||
|
the end of the compound statement of a function, this indicates
|
||
|
an implicit return statement without an expression.
|
||
|
If the end of the compound statement of the function can be reached,
|
||
|
.I lint
|
||
|
introduces this implicit return statement without expression.
|
||
|
.PP
|
||
|
Sometimes the programmer knows for sure that all case parts inside
|
||
|
a switch statement include all possible cases, so he doesn't
|
||
|
introduce a default statement.
|
||
|
This can lead to an incorrect warning.
|
||
|
Figure 6 shows how to prevent this warning.
|
||
|
.KF
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
func()
|
||
|
{
|
||
|
switch (cond) {
|
||
|
case 0: return(e0);
|
||
|
case 1: return(e1);
|
||
|
}
|
||
|
/* NOTREACHED */
|
||
|
}
|
||
|
/* no warning: "function func has return(e); and return; */
|
||
|
.DE
|
||
|
.I
|
||
|
.ce
|
||
|
figure\ 6.
|
||
|
.R
|
||
|
.KE
|
||
|
.PP
|
||
|
The pseudocomment \f(CW/*\ NOTREACHED\ */\fP can also be used to tell
|
||
|
.I lint
|
||
|
that some function doesn't return. See figure 7.
|
||
|
.KS
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
func()
|
||
|
{
|
||
|
switch (cond) {
|
||
|
case 0: return(e0);
|
||
|
case 1: return(e1);
|
||
|
default: error(); /* calls exit or abort */
|
||
|
/* NOTREACHED */
|
||
|
}
|
||
|
}
|
||
|
/* no warning: "function func has return(e); and return;" */
|
||
|
.I
|
||
|
.DE
|
||
|
.ce
|
||
|
figure\ 7.
|
||
|
.R
|
||
|
.KE
|
||
|
.NH 3
|
||
|
Output definitions for the second pass
|
||
|
.PP
|
||
|
The first pass can only process one program file.
|
||
|
To be able to process a program that spreads over more than one file,
|
||
|
the first pass outputs definitions that are processed by a second
|
||
|
pass.
|
||
|
The format of such a definition is different for different classes:
|
||
|
.PP
|
||
|
For class in {EFDF, SFDF, LFDF}
|
||
|
.DS C
|
||
|
<name>:<class>:<file>:<line>:<nr of args>:<type of args>:<returns value>:<type>
|
||
|
.DE
|
||
|
.LP
|
||
|
A negative \fInr of args\fP indicates that the function can be called with
|
||
|
a varying number of arguments.
|
||
|
.PP
|
||
|
For class = FC
|
||
|
.DS C
|
||
|
<name>:<class>:<file>:<line>:<value is used>:<type>
|
||
|
.DE
|
||
|
.LP
|
||
|
The \fIvalue is used\fP part can have three meanings:
|
||
|
the value of the function is ignored;
|
||
|
the value of the function is used;
|
||
|
the value of the function is cast to type \fIvoid\fP.
|
||
|
.PP
|
||
|
For other classes
|
||
|
.DS C
|
||
|
<name>:<class>:<file>:<line>:<type>
|
||
|
.DE
|
||
|
.LP
|
||
|
Definitions of class VU (Variable Usage) are only output for \fIused\fP
|
||
|
global variables.
|
||
|
.PP
|
||
|
Structure and union types that are output to the intermediate file
|
||
|
are simplified.
|
||
|
(The following occurrences of \fIstructure\fP should be
|
||
|
read as \fIstructure or union\fP and \fIstruct\fP as \fIstruct or
|
||
|
union\fP.)
|
||
|
Structures that are identified by a \fIstructure tag\fP are output
|
||
|
to the intermediate file as \f(CWstruct <tag>\fP.
|
||
|
Structures without a structure tag are output as
|
||
|
\f(CWstruct {<mems>}\fP with \f(CW<mems>\fP a semicolon-separated
|
||
|
list of types of the members of this structure.
|
||
|
An alternative way would be to output the complete structure definition.
|
||
|
However, this gives practical problems.
|
||
|
It is allowed to define some object of a structure type with a
|
||
|
structure tag, without this structure being defined at that place.
|
||
|
The first approach leaves errors, such as in figure 8, undetected.
|
||
|
.KF
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
"a.c" "b.c"
|
||
|
|
||
|
struct str { struct str {
|
||
|
float f; int i;
|
||
|
} s; };
|
||
|
|
||
|
main() func(s)
|
||
|
{ struct str s;
|
||
|
func(s); {}
|
||
|
}
|
||
|
.I
|
||
|
.DE
|
||
|
.ce
|
||
|
figure\ 8.
|
||
|
.R
|
||
|
.KE
|
||
|
.PP
|
||
|
To be able to detect these errors, the first pass should also output
|
||
|
definitions of structure tags.
|
||
|
The example of figure 8 would then get a warning like
|
||
|
.ft CW
|
||
|
structure str defined inconsistently
|
||
|
.R
|
||
|
.PP
|
||
|
More information on these definitions in section 4.3 and 4.4.
|
||
|
.NH 3
|
||
|
Generating libraries
|
||
|
.PP
|
||
|
.I Lint
|
||
|
knows the library `-lc', `-lm' and `-lcurses'.
|
||
|
If a program uses some other library, it is possible to generate
|
||
|
a corresponding \fIlint library\fP.
|
||
|
To do this, precede all the C source files of this library by
|
||
|
the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP.
|
||
|
Then feed these files one by one to the first pass of
|
||
|
.I lint
|
||
|
collecting the standard output in a file and ignoring the warnings.
|
||
|
The resulting file contains library definitions of the functions
|
||
|
and external variables defined in the library sources, and not more
|
||
|
than that.
|
||
|
If this file is called `llib-l\fIname\fP.ln
|
||
|
.I lint
|
||
|
can be told to search the library by passing it as argument in
|
||
|
the command line `-llib-l\fIname\fP.ln.
|
||
|
The implementation of this feature is simple.
|
||
|
.PP
|
||
|
As soon as the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP is met,
|
||
|
only function and variable definitions are output with class LFDF
|
||
|
and LVDF respectively.
|
||
|
Other definitions, which otherwise would have been output, are
|
||
|
discarded.
|
||
|
.PP
|
||
|
Instead of generating a special lint library file, one can make a
|
||
|
file containing the library definitions and starting with
|
||
|
\f(CW/* LINTLIBRARY */\fP.
|
||
|
This file can then be passed to
|
||
|
.I lint
|
||
|
just by its name.
|
||
|
This method isn't as efficient as the first one.
|
||
|
.NH 3
|
||
|
Interpreting the pseudocomments
|
||
|
.PP
|
||
|
The interpretation of the pseudocomments is done by the lexical
|
||
|
analyzer, because this part of the program already took care of the
|
||
|
comments.
|
||
|
At first sight this seems very easy: as soon as some pseudocomment
|
||
|
is met, raise the corresponding flag.
|
||
|
Unfortunately this doesn't work.
|
||
|
The lexical analyzer is a \fIone token look ahead scanner\fP.
|
||
|
This causes the above procedure to raise the flags one token too
|
||
|
soon.
|
||
|
A solution to get the right effect is to reserve two flags per
|
||
|
pseudocomment.
|
||
|
The first is set as soon as the corresponding pseudocomment is
|
||
|
scanned.
|
||
|
At the returning of each token this flag is moved to the second flag.
|
||
|
The delay in this way achieved makes the pseudocomments have effect
|
||
|
at the correct place.
|
||
|
.NH 2
|
||
|
The second pass data structure
|
||
|
.NH 3
|
||
|
Inp_def descriptor
|
||
|
.DS B
|
||
|
.ft CW
|
||
|
struct inp_def {
|
||
|
struct inp_def *next;
|
||
|
int id_class;
|
||
|
char id_name[NAMESIZE];
|
||
|
char id_file[FNAMESIZE];
|
||
|
unsigned int id_line;
|
||
|
int id_nrargs;
|
||
|
char argtps[ARGSTPSSIZE];
|
||
|
int id_returns;
|
||
|
char id_type[TYPESIZE];
|
||
|
int id_called;
|
||
|
int id_used;
|
||
|
int id_ignored;
|
||
|
int id_voided;
|
||
|
};
|
||
|
.R
|
||
|
.DE
|
||
|
.PP
|
||
|
This description is almost similar to the \fIoutdef\fP descriptor as
|
||
|
described in 4.1.2.5.
|
||
|
There are some differences too.
|
||
|
.IP \f(CWnext\fP 15
|
||
|
As structures of this type are allocated dynamically, this field
|
||
|
is added so the same memory allocator as used in the first pass can be
|
||
|
used.
|
||
|
.LP
|
||
|
\f(CWid_called
|
||
|
.br
|
||
|
id_used
|
||
|
.br
|
||
|
id_ignored\fP
|
||
|
.IP \f(CWid_voided\fP 15
|
||
|
Some additional fields only used for function definitions.Their
|
||
|
meaning should be clear.
|
||
|
.PP
|
||
|
The other fields have the same meaning as the corresponding fields
|
||
|
in the \fIoutdef\fP descriptor.
|
||
|
Some attention should be paid to \f(CWid_argtps\fP and \f(CWid_type\fP.
|
||
|
These members have type \f(CWarray of char\fP, in contrast to
|
||
|
their counterparts in the \fIoutdef\fP descriptor.
|
||
|
The only operation performed on types is a check on equality.
|
||
|
Types are output by the first pass as a string describing the type.
|
||
|
The type of \f(CWi\fP in \f(CWint *i();\fP e.g. is output as
|
||
|
\f(CWint *()\fP.
|
||
|
Such a string is best put in an \f(CWarray of char\fP to be compared
|
||
|
easily.
|
||
|
.NH 2
|
||
|
The second pass checking mechanism
|
||
|
.PP
|
||
|
After all the definitions that are output by the first pass are
|
||
|
sorted by name, the definitions belonging to one name are ordered
|
||
|
as follows.
|
||
|
.QS
|
||
|
.RS
|
||
|
.IP - 1
|
||
|
external definitions
|
||
|
.IP -
|
||
|
static definitions
|
||
|
.IP -
|
||
|
library definitions
|
||
|
.IP -
|
||
|
declarations
|
||
|
.IP -
|
||
|
function calls
|
||
|
.IP -
|
||
|
variable usages
|
||
|
.RE
|
||
|
.QE
|
||
|
.PP
|
||
|
The main program of the second pass is easily explained.
|
||
|
For all different names, do the following.
|
||
|
First read the definitions.
|
||
|
If there is more than one definition, check for conflicts.
|
||
|
Then read the declarations, function calls and variable usages and
|
||
|
check them against the definitions.
|
||
|
After having processed all the declarations, function calls and
|
||
|
variable usages, check the definitions to see if they are used
|
||
|
correctly.
|
||
|
The next three paragraphs will explain the three most important
|
||
|
functions of the program.
|
||
|
.NH 3
|
||
|
Read_defs()
|
||
|
.PP
|
||
|
This function reads all definitions belonging to the same name.
|
||
|
Only one external definition is allowed, so if there are more, a
|
||
|
warning is given.
|
||
|
In different files it is allowed to define static functions or
|
||
|
variables with the same name.
|
||
|
So if a static function is read, \f(CWread_defs\fP checks if there isn't
|
||
|
already an external definition, and if not it puts the static
|
||
|
definition in the list of static definitions, to be used later.
|
||
|
If no external or static definitions are met, a library definition is
|
||
|
taken as definition.
|
||
|
If a function or a variable is defined with the same name as a function
|
||
|
or a variable in a library (which is allowed)
|
||
|
.I lint
|
||
|
gives a warning.
|
||
|
Of course it is also possible that there is no definition at all.
|
||
|
In that case \f(CWcheck\fP will warn.
|
||
|
.NH 3
|
||
|
Check()
|
||
|
.PP
|
||
|
\f(CWCheck\fP verifies declarations, function calls and variable
|
||
|
usages against the definitions.
|
||
|
For each of these entries the corresponding definition is looked up.
|
||
|
As there may be more than one static definition, first a static
|
||
|
definition from the same file as the entry is searched.
|
||
|
If not present, the external definition (which may be a library
|
||
|
definition) is taken as definition.
|
||
|
If no definition can be found and the current entry is an external
|
||
|
declaration,
|
||
|
.I lint
|
||
|
warns.
|
||
|
However in the case of an implicit function declaration
|
||
|
.I lint
|
||
|
will not warn, because
|
||
|
we will get a warning \f(CW%s used but not defined\fP later on.
|
||
|
Next a check is done if the declarations are consistent with their
|
||
|
definitions.
|
||
|
After the declarations, the function calls and variable usages are
|
||
|
verified against their corresponding definitions.
|
||
|
If no definition exists,
|
||
|
.I lint
|
||
|
warns.
|
||
|
Else the field \f(CWid_called\fP is set to 1.
|
||
|
(For variable definitions this should be interpreted as \fIused\fP.)
|
||
|
For variable usages this will be all.
|
||
|
If we are processing a function call we also check the number and types
|
||
|
of the arguments and we warn for function values which are used from
|
||
|
functions that don't return a value.
|
||
|
For each function call we administrate if a function value is used,
|
||
|
ignored or voided.
|
||
|
.NH 3
|
||
|
Check_usage()
|
||
|
.PP
|
||
|
Checks if the external definition and static definitions are used
|
||
|
correctly.
|
||
|
If a function or variable is defined but never used,
|
||
|
.I lint
|
||
|
warns, except for library definitions.
|
||
|
Functions, which return a value but whose value is always or
|
||
|
sometimes ignored, get a warning.
|
||
|
(A function value which is voided (cast to void) is not ignored,
|
||
|
but it isn't used either.)
|
||
|
.bp
|