From c5275f3786f8c9f9e4c63bdd39401bbec4f7f9f4 Mon Sep 17 00:00:00 2001 From: ceriel Date: Mon, 30 Sep 1991 17:58:00 +0000 Subject: [PATCH] Added --- doc/lint/.distr | 15 + doc/lint/Makefile | 9 + doc/lint/abstract | 18 + doc/lint/appendix_A | 58 +++ doc/lint/appendix_B | 52 +++ doc/lint/chap1 | 34 ++ doc/lint/chap2 | 57 +++ doc/lint/chap3 | 294 +++++++++++++ doc/lint/chap4 | 979 ++++++++++++++++++++++++++++++++++++++++++++ doc/lint/chap5 | 107 +++++ doc/lint/chap6 | 107 +++++ doc/lint/chap7 | 139 +++++++ doc/lint/chap8 | 56 +++ doc/lint/chap9 | 48 +++ doc/lint/contents | 59 +++ doc/lint/frontpage | 14 + 16 files changed, 2046 insertions(+) create mode 100644 doc/lint/.distr create mode 100644 doc/lint/Makefile create mode 100644 doc/lint/abstract create mode 100644 doc/lint/appendix_A create mode 100644 doc/lint/appendix_B create mode 100644 doc/lint/chap1 create mode 100644 doc/lint/chap2 create mode 100644 doc/lint/chap3 create mode 100644 doc/lint/chap4 create mode 100644 doc/lint/chap5 create mode 100644 doc/lint/chap6 create mode 100644 doc/lint/chap7 create mode 100644 doc/lint/chap8 create mode 100644 doc/lint/chap9 create mode 100644 doc/lint/contents create mode 100644 doc/lint/frontpage diff --git a/doc/lint/.distr b/doc/lint/.distr new file mode 100644 index 000000000..4e797758b --- /dev/null +++ b/doc/lint/.distr @@ -0,0 +1,15 @@ +Makefile +abstract +appendix_A +appendix_B +chap1 +chap2 +chap3 +chap4 +chap5 +chap6 +chap7 +chap8 +chap9 +contents +frontpage diff --git a/doc/lint/Makefile b/doc/lint/Makefile new file mode 100644 index 000000000..3a17c50b3 --- /dev/null +++ b/doc/lint/Makefile @@ -0,0 +1,9 @@ +# $Header$ + +FP = frontpage + +DOC = abstract contents chap1 chap2 chap3 chap4 chap5 chap6 chap7\ + chap8 chap9 appendix_A appendix_B + +../lint.doc: $(FP) $(DOC) + cat $(FP) $(DOC) > ../lint.doc diff --git a/doc/lint/abstract b/doc/lint/abstract new file mode 100644 index 000000000..01b05e603 --- /dev/null +++ b/doc/lint/abstract @@ -0,0 +1,18 @@ +.TL +Lint, a C Program Checker +.AU +Frans Kunst +.AI +Vrije Universiteit +Amsterdam +.AB +This document describes an implementation of a program which +does an extensive consistency and plausibility check on a set +of C program files. +This may lead to warnings which help the programmer to debug +the program, to remove useless code and to improve his style. +The program has been used to test itself and has found +bugs in sources of some heavily used code. +.AE +.LP +.bp diff --git a/doc/lint/appendix_A b/doc/lint/appendix_A new file mode 100644 index 000000000..caaf7594d --- /dev/null +++ b/doc/lint/appendix_A @@ -0,0 +1,58 @@ +.ds +.SH +Appendix A +.LP +.SH +The warnings +.LP +.SH +Pass one warnings +.DS +.ft CW +%s may be used before set +maybe %s used before set +%s unused in function %s +%s set but not used in function %s +argument %s unused in function %s +static [variable, function] %s unused +%s declared extern but never used + +long conversion may lose accuracy +comparison of unsigned with negative constant +unsigned comparison with 0? +degenerate unsigned comparison +nonportable character comparison +possible pointer alignment problem + +%s evaluation order undefined + +null effect +constant in conditional context +use if-else construction +while (0) ? +do ... while (0) ? +[case, default] statement in strange context + +function %s has return(e); and return; +statement not reached +function %s declared %s but no value returned + +declare the VARARGS args you want checked! +.ft P +.DE +.SH +Pass two warnings +.DS +.ft CW +%s variable # of args +%s arg %d used inconsistently +%s multiply defined +%s value declared inconsistently +%s used but not defined +%s defined (%s(%d)) but never used +%s declared but never defined +%s value is used but none is returned +%s returns value which is [sometimes, always] ignored +%s also defined in library +.ft P +.DE diff --git a/doc/lint/appendix_B b/doc/lint/appendix_B new file mode 100644 index 000000000..362dbd2d4 --- /dev/null +++ b/doc/lint/appendix_B @@ -0,0 +1,52 @@ +.SH +Appendix B +.TL +The Ten Commandments for C Programmers +.AU +Henry Spencer +.IP 1 +Thou shalt run \fIlint\fR frequently and study its pronouncements with +care, for verily its perception and judgement oft exceed thine. +.IP 2 +Thou shalt not follow the NULL pointer, for chaos and madness await thee at +its end. +.IP 3 +Thou shalt cast all function arguments to the expected type if they are not +of that type already, even when thou art convinced that this is unnecessary, +lest they take cruel vengeance upon thee when thou least expect it. +.IP 4 +If thy header files fail to declare the return types of thy library functions, +thou shalt declare them thyself with the most meticulous care, +lest grievous harm befall thy program. +.IP 5 +Thou shalt check the array bounds of all strings (indeed, all arrays), +for surely where thou typest ``foo'' someone someday shall type +``supercalifragilisticexpialidocious''. +.IP 6 +If a function be advertised to return an error code in the event of +difficulties, thou shalt check for that code, yea, even though the checks +triple the size of thy code and produce aches in thy typing fingers, +for if thou thinkest ``it cannot happen to me'', +the gods shall surely punish thee for thy arrogance. +.IP 7 +Thou shalt study thy libraries and strive not to re-invent them without cause, +that thy code may be short and readable and thy days pleasant and productive. +.IP 8 +Thou shalt make thy program's purpose and structure +clear to thy fellow man by using the +One True Brace Style, +even if thou likest it not, +for thy creativity is better used in solving problems than in creating +beautiful new impediments to understanding. +.IP 9 +Thy external identifiers shall be unique in the first six characters, +though this harsh discipline be irksome and the years of its necessity +stretch before thee seemingly without end, +lest thou tear thy hair out and go mad on that fateful day when +thou desirest to make thy program run on an old system. +.IP 10 +Thou shalt foreswear, renounce, and abjure the vile heresy which claimeth +that ``All the world's a VAX'', and have no commerce with the +benighted heathens who cling to this barbarous belief, +that the days of thy program may be long even though the days of thy +current machine be short. diff --git a/doc/lint/chap1 b/doc/lint/chap1 new file mode 100644 index 000000000..580e9b6b1 --- /dev/null +++ b/doc/lint/chap1 @@ -0,0 +1,34 @@ +.NH 1 +Introduction +.PP +C [1][2] is a dangerous programming language. +The programmer is allowed to do almost anything, as long as +the syntax of the program is correct. +This has a reason. In this way it is possible to make a fast +compiler which produces fast code. +The compiler will be fast because it doesn't do much checking +at compile time. +The code is fast because the compiler doesn't generate run time +checks. +The programmer should protect himself against producing error +prone code. +One way to do that is to obey the +.I +Ten Commandments for C programmers +.R +[appendix B]. +This document describes an implementation of the +.I lint +program, as referred to in Commandment 1. +It is a common error to run +.I lint +only after a few hours of debugging and some +bug can't be found. +.I Lint +should be run when large pieces of new code are accepted by the +compiler and as soon as bugs arise. +Even for working programs it is useful to run +.I lint, +because it can find constructions that may lead to problems in +the future. +.bp diff --git a/doc/lint/chap2 b/doc/lint/chap2 new file mode 100644 index 000000000..aac7c33bb --- /dev/null +++ b/doc/lint/chap2 @@ -0,0 +1,57 @@ +.NH +Outline of the program +.PP +The program can be divided into three parts. A first pass, which +parses C program files and outputs definitions, a second pass which +processes the definitions and a driver, +which feeds the set of files to the first pass and +directs its output to the second pass. Both passes produce the +warnings on standard error output, which are redirected to standard +output by the driver. +.PP +The first pass is based on an existing C front end, called +.I cem +[3]. +.I Cem +is part of the Amsterdam Compiler Kit (ACK), as described in [4]. +.PP +Most of the code of +.I cem +is left unchanged. This has several reasons. A lot of work, which +is done by +.I cem +, must also be done by +.I lint. +E.g. the lexical analysis, the macro expansions, +the parsing part and the semantical analysis. +Only the code generation part is turned off. +An advantage of this approach is, that a person who understands +.I cem +will not have to spend to much time in understanding +.I lint. +.PP +All changes and extensions to +.I cem +can be turned off by not defining the compiler directive +.ft CW +LINT. +.R +Compiling should then result in the original C compiler. +.PP +The second pass is a much less complex program. +It reads simple definitions generated by the first pass and +checks their consistency. +This second pass gives warnings +about wrong usage of function arguments, their results and +about external variables, which are used and defined in more +than one file. +.PP +The driver is a shell program, to be executed by the +.UX +shell +.I sh. +It executes the two passes and let them communicate through a +filter (sort). +Actually it is simplex communication: the first pass only talks to +the second pass through the filter. +.bp diff --git a/doc/lint/chap3 b/doc/lint/chap3 new file mode 100644 index 000000000..333529c16 --- /dev/null +++ b/doc/lint/chap3 @@ -0,0 +1,294 @@ +.NH +What lint checks +.NH 2 +Set, used and unused variables +.PP +We make a distinction between two classes of variables: +the class of automatic variables (including register variables) +and the other variables. +The other variables, global variables, static variables, formal +parameters et cetera, are assumed to have a defined value. +Global variables e.g., are initialized by the compiled code at +zeros; formal parameters have a value which is equal to the value +of the corresponding actual parameter. +These variables can be used without explicitly initializing them. +The initial value of automatic variables is undefined (if they are +not initialized at declaration). +These variables should be set before they are used. +A variable is set by +.IP +.RS +.IP 1. +an assignment (including an initialization) +.IP 2. +taking the address +.RE +.PP +The first case is clear. The second case is plausible. +It would take to much effort (if at all possible) to check +if a variable is set through one of its aliases. +Because +.I lint +should not warn about correct constructs, it does this conservative +approach. +Structures (and unions) can also be set by setting at +least one member. +Again a conservative approach. +An array can be set by using its name (e.g. as actual parameter +of a function call). +.I Lint +warns for usage as +.I rvalue +of automatic variables which are not set. +.PP +A variable is used if +.IP +.RS +.IP 1. +it is used as a +.I rvalue +.IP 2 +its address is taken +.IP +Arrays and structures (and unions) are also used if one entry +or one member respectively is used. +.RE +.PP +When a variable is never used in the part of the program where it is +visible, a warning is given. +For variables declared at the beginning of a compound statement, +a check is made at the end of this statement. +For formal parameters a check is made at the end of the function +definition. +At the end of a file this is done for global static definitions. +For external variables a warning can be given when all the files +are parsed. +.NH 2 +Flow of control +.PP +The way +.I lint +keeps track of the flow of control is best explained by means of +an example. +See the program of figure 1. +.KF +.DS B +.ft CW +if (cond) + /* a statement which is executed if cond is true, + * the if-part + */ +else + /* the else-part */ +.DE +.br +.ce +.I +figure\ 1. +.R +.KE +.PP +After evaluation of \f(CWcond\fP, two things can happen. +The if-part is executed or the else-part is executed (but not both). +Variables which are set in the if-part but not in the else-part, +need not be set after the if statement, and vice versa. +.I Lint +detects this and assumes these variables after the if statement to +be \fImaybe set\fR. +(See figure 2.) +.KF +.DS B +.ft CW +int cond; + +main() +{ + int i, j; + + if (cond) { + i = 0; + j = 0; + } + else + use(i); /* i may be used before set */ + use(j); /* maybe j used before set */ +} +.DE +.br +.ce +.I +figure 2. +.R +.KE +.PP +If both the if-part and the else-part are never left (i.e. they +contain an endless loop or a return statement), +.I lint +knows that the if statement is never left too. +Besides the if statement, +.I lint +knows the possible flows of control in while, do, for and +switch statements. +It also detects some endless loops like \f(CWwhile(1)\fP, +\f(CWdo ... while (1)\fP, \f(CWfor (;;)\fP. +.NH 2 +Functions +.PP +Most C compilers will not complain if a function is called with actual +parameters of a different type than the function expects. +Using a function in one file as a function of +type +.I A +while defining it in another file as a function of type +.I B +is also allowed by most compilers. +It needs no explanation that this can lead to serious trouble. +.PP +.I Lint +checks if functions are called with the correct number of arguments, +if the types of the actual parameters correspond with the types of +the formal parameters and if function values are used in a way +consistently with their declaration. +When the result of a function is used, a check is made to see if +the function returns a value. +When a function returns a value, +.I lint +checks if the values of all calls of this function are used. +.NH 2 +Undefined evaluation order +.PP +The semantics of C do not define evaluation orders for some +constructs, which, at first sight, seem well defined. +The evaluation order of the expression +.ft CW +a[i]\ =\ i++; +.R +e.g., is undefined. +It can be translated to something with the semantics of +.ft CW +a[i]\ =\ i; i++; +.R +which is what probably was meant, or +.ft CW +a[i+1]\ =\ i; i++;. +.R +An easier example to explain why, is +.ft CW +j\ =\ a[i]\ +\ i++;. +.R +`\f(CW+\fR' Is a so called +.I commutative +operator (with respect to the evaluation order) , as is `\f(CW=\fR'. +This allows the compiler to choose which term to evaluate first. +It is easy to see, that it makes a difference for the value of +.ft CW +j, +.R +which order is chosen. +The expression +.ft CW +i++ +.R +is said to have +.I +side effects. +.R +It affects the value of +.ft CW +i. +.R +Because this value is used in the other term, this gives a conflict. +.PP +A function call with reference to a variable as argument can have +side effects to. +Therefor, the evaluation order of +.ft CW +i +.R +in the expression +.ft CW +f(&i)\ +\ i +.R +is undefined. +When a function is called with an array as argument, this array +can be affected by the function, because only the address of the +array is passed to the function. +(In Pascal a copy of the array is passed to the function if the +formal parameter is not declared \fIvar\fP.) +So the evaluation order of +.ft CW +a +.R +in the expression +.ft CW +f(a)\ +\ a[0] +.R +is undefined. +This one is not yet detected by +.I lint. +.PP +Global variables can still cause trouble. +If function +.ft CW +f +.R +affects the global variable +.ft CW +i, +.R +the value of the expression +.ft CW +f()\ +\ i +.R +is undefined, because the evaluation order of \f(CWi\fP is undefined. +.PP +The evaluation order of the arguments of a function is not +defined, so the expression +.ft CW +f(i,\ i++) +.R +gives a warning +.ft CW +i evaluation order undefined. +.R +.NH 2 +Pointer alignment problems +.PP +For pointers to objects of different types there are different +alignment restrictions. +On some machines pointers to type char can have both odd and even +values, whereas pointers to type int should contain an even address. +.I Lint +could warn for all pointer conversions. +This is not what +.I lint +does. +.I Lint +assumes that some pointers are more restricted than others, and +that pointers of some types can safely be converted to a pointer +of a less restrictive type. +The order of restriction is as follows (`\(<=' means +`is not more restricted than') : +.PP +.ce +char \(<= short \(<= int \(<= long +.ce +float \(<= double +.NH 2 +Libraries +.PP +C is a small language. +As a matter of fact it has no i/o routines. +To make it a useful language, C is supported by libraries. +These libraries contain functions and variables that can be used by any +C program. +.I Lint +knows some libraries too. +At this moment it knows the `-\fIlc\fR', `-\fIlm\fR' and +`-\fIlcurses\fR' libraries. +The `-\fIlc\fR' library, containing definitions for functions from +chapter two and three of the \s-2UNIX\s+2 programmers manual, is default. +.I Lint +warns for definitions of functions or global variables with the +same name as a function definition in a library. +.bp diff --git a/doc/lint/chap4 b/doc/lint/chap4 new file mode 100644 index 000000000..009caa3c3 --- /dev/null +++ b/doc/lint/chap4 @@ -0,0 +1,979 @@ +.NH 1 +How lint checks +.NH 2 +The first pass first pass data structure +.PP +The data structure of +.I cem +is changed a little and some structures have been added. +.NH 3 +The changes +.NH 4 +Idf descriptor +.PP +A member +.ft CW +id_line +.R +is added +to the +.I idf +selector. +This line number is used for some warnings. +.NH 4 +Def descriptor +.PP +The +.I def +selector is extended with the members +.ft CW +df_set +.R and +df_line. +.R +The +.ft CW +df_used +.R +member did exist already, but was only used for code generation. +This usage is eliminated so it can be used by +.I lint. +The meaning of these members should be clear. +.NH 3 +The additions +.NH 4 +Lint_stack_entry descriptor +.DS B +.ft CW +struct lint_stack_entry { + struct lint_stack_entry *next; + struct lint_stack_entry *previous; + short ls_class; + int ls_level; + struct state *ls_current; + union { + struct state *S_if; + struct state *S_end; + struct switch_states switch_state; + } ls_states; +}; +.R +.DE +.PP +Structure to simulate a stacking mechanism. +.IP \f(CWnext\fP 15 +Pointer to the entry on top of this one. +.IP \f(CWprevious\fP +Pointer to the entry beneath this one. +.IP \f(CWls_class\fP +The class of statement this entry belongs to. +Possible classes are \f(CWIF\fP, \f(CWWHILE\fP, \f(CWDO\fP, +\f(CWFOR\fP, \f(CWSWITCH\fP and \f(CWCASE\fP. +.IP \f(CWls_level\fP +The level the corresponding statement is nested. +.IP \f(CWls_current\fP +A pointer to the state descriptor which describes the state +of the function (the state of the automatic variables, if the next +statement can be reached, et cetera) if control passes the +flow of control to the part of the program currently parsed. +The initialization of this state is as follows +.RS +.IP +If \f(CWls_class\fP in [\f(CWIF\fP, \f(CWSWITCH\fP] the state +after parsing the conditional expression. +.IP +If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP] the state +after parsing the code between the brackets. +.IP +If \f(CWls_class\fP in [\f(CWDO\fP, \f(CWCASE\fP] the state at +entrance of the statement after the \f(CWDO\fP or \f(CWCASE\fP +token. +.RE +.IP \f(CWls_states\fP 15 +Union of pointers to state descriptors containing different information +for different values of \f(CWls_class\fP. +.RS +.IP +If \f(CWls_class\fP is \f(CWIF\fP and in case of parsing an else part, +\f(CWls_states.S_if\fP points to the state that is reached after the +if part. +.IP +If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP, \f(CWDO\fP] +then \f(CWls_states.S_end\fP contains a conservative description +of the state of the program after `jumping' +to the end of the statement after the \f(CWWHILE\fP, \f(CWDO\fP +or \f(CWFOR\fP token. +I.e. the state at reaching a break (not inside a switch) or +continue statement. +.IP +If ls_class is \f(CWSWITCH\fP, \f(CWls_states\fP is used as a structure +.DS B +.ft CW +struct switch_states { + struct state S_case; + struct state S_break; +}; +.R +.DE +containing two pointers to state descriptors. +\f(CWls_states.switch_state.S_case\fP contains +a conservative description +of the state of the program after \f(CWcase ... case\fP +parts are parsed. +\f(CWls_states.switch_state.S_break\fP the state after parsing +all the \f(CWcase ... break\fP parts. +The reason for \f(CWls_states.switch_state.default_met\fP should be +self-explanatory. +.IP +In case \f(CWls_class\fP is \f(CWCASE\fP, \f(CWls_states\fP is not used. +.RE +.NH 4 +State descriptor +.DS B +.ft CW +struct state { + struct state *next; + struct auto_def *st_auto_list; + int st_nrchd; + int st_warned; +}; +.R +.DE +.IP \f(CWst_auto_list\fP 15 +Pointer to a list of definitions of the automatic variables whose +scope contain the current position in the program. +.IP \f(CWst_nrchd\fP +True if the next statement can't be reached. +.IP \f(CWst_warned\fP +True if a warning has already been given. +.NH 4 +Auto_def descriptor +.DS B +.ft CW +struct auto_def { + struct auto_def *next; + struct idf *ad_idf; + struct def *ad_def; + int ad_used; + int ad_set; + int ad_maybe_set; +}; +.R +.DE +.IP \f(CWnext\fP 15 +Points to the next auto_definition of the list. +.IP \f(CWad_idf\fP +Pointer to the idf descriptor associated with this auto_definition. +.IP \f(CWad_def\fP +Ditto for def descriptor. +.IP \f(CWad_used\fP +Indicates the state of this automatic variable. +Ditto for \f(CWad_set\fP and \f(CWad_maybe_set\fP. +Only one of \f(CWad_set\fP and \f(CWad_maybe_set\fP may be true. +.NH 4 +Expr_state descriptor +.DS B +.ft CW +struct expr_state { + struct expr_state *next; + struct idf *es_idf; + arith es_offset; + int es_used; + int es_set; +}; +.R +.DE +.PP +This structure is introduced to keep track of which variables, +array entries and structure members (union members) are set +and/or used in evaluating an expression. +.IP \f(CWnext\fP 15 +Pointer to the next descriptor of this list. +.IP \f(CWes_idf\fP +Pointer to the idf descriptor this descriptor belongs to. +.IP \f(CWes_offset\fP +In case of an array, a structure or union, this member contains +the offset the compiler would generate for locating the array +entry or structure/union member. +.IP \f(CWes_used\fP +True if the indicated memory location is used in evaluating the +expression. +.IP \f(CWes_set\fP +Ditto for set. +.NH 4 +Outdef descriptor +.DS B +.ft CW +struct outdef { + int od_class; + char *od_name; + char *od_file; + unsigned int od_line; + int od_nrargs; + struct tp_entry *od_entry; + int od_returns; + struct type *od_type; +}; +.DE +.R +.PP +As structures of this type are not allocated dynamically by a +storage allocator, it contains no next member. +An outdef can be given to to \f(CWoutput_def()\fP to be passed to the +second pass. +Basically this forms the interface with the second pass. +.IP \f(CWod_class\fP 15 +Indicates what kind of definition it is. +Possible classes are \f(CWEFDF\fP, \f(CWEVDF\fP, \f(CWSFDF\fP, +\f(CWSVDF\fP, \f(CWLFDF\fP, \f(CWLVDF\fP, +\f(CWEFDC\fP, \f(CWEVDC\fP, \f(CWIFDC\fP, \f(CWFC\fP, \f(CWVU\fP. +([\f(CWE\fPxternal, \f(CWS\fPtatic, \f(CWL\fPibrary, \f(CWI\fPmplicit] +[\f(CWF\fPunction, \f(CWV\fPariable] +[\f(CWD\fPe\f(CWF\fPinition, \f(CWD\fPe\f(CWC\fPlaration, +\f(CWC\fPall, \f(CWU\fPsage]) +.IP \f(CWod_name\fP +The name of the function or variable. +.IP \f(CWod_file\fP +The file this definition comes from. +.IP \f(CWod_nrargs\fP +If \f(CWod_class\fP is one of \f(CWEFDF\fP, \f(CWSFDF\fP or +\f(CWLFDF\fP, this member contains the +number of arguments this function has. +If the function was preceded by the pseudocomment +\f(CW/*\ VARARGS\ */\fP, +\f(CWod_nrargs\fP gets the value \f(CW-1-n\fP. +.IP \f(CWod_entry\fP +A pointer to a list of \f(CWod_nrargs\fP cells, each containing a +pointer to the type descriptor of an argument. (\f(CW-1-od_nrargs\fP +cells if +\f(CWod_nrargs < 0\fP.) +\f(CWTp_entry\fP is defined as +.DS B +.ft CW +struct tp_entry { + struct tp_entry *next; /* pointer to next cell */ + struct type *te_type; /* an argument type */ +}; +.R +.DE +.IP \f(CWod_returns\fP 15 +For classes \f(CWEFDF\fP, \f(CWSFDF\fP and \f(CWLFDF\fP this +member tells if the function returns an expression or not. +In case \f(CWod_class\fP is \f(CWFC\fP it is true if the value +of the function is used, false otherwise. +For other classes this member is not used. +.IP \f(CWod_type\fP +A pointer to the type of the function or variable defined or +declared. +Not used for classes \f(CWFC\fP and \f(CWVU\fP. +.NH 2 +The first pass checking mechanism +.PP +In the description of the implementation of the pass one +warnings, it is assumed that the reader is familiar with the +\fILLgen\fP parser generator, as described in [6]. +.NH 3 +Used and/or set variables +.PP +To be able to give warnings like +.ft CW +%s used before set +.R +and +.ft CW +%s set but not used in function %s +.R +, there needs to be a way to keep track of the state of a variable. +A first approach to do this was by adding two fields to the +\fIdef\fP selector: +.ft CW +df_set +.R +and +.ft CW +df_used. +.R +While parsing the program, each time an expression was met +this expression was analyzed and the fields of each \fIdef\fP +selector were possibly set during this analysis. +This analysis was done by passing each expression to a +function +.ft CW +lint_expr +.R +, which walks the expression tree in a way similar to the function +\f(CWEVAL\fP in the file \fIeval.c\fP of the original +.I +cem +.R +compiler. +This approach has one big disadvantage: it is impossible to keep +track of the flow of control of the program. +No warning will be given for the program fragment of figure 3. +.KF +.DS B +.ft CW +func() +{ + int i; + + if (cond) + i = 0; + else + use(i); /* i may be used before set */ +} +.I +.DE +.br +.ce +figure\ 3. +.R +.KE +.PP +It is clear that it would be nice having +.I lint +warn for this construction. +.PP +This was done in the second approach. +When there was a choice between two statements, each statement +was parsed with its own copy of the state at entrance of the +.I +choosing statement. +.R +A state consisted of the state of the automatic variables +(including register variables). +In addition to the possibilities of being used and set, +a variable could be \fImaybe set\fP. +These states were passed between the statement parsing routines +using the \fILLgen\fP parameter mechanism. +At the end of a choosing statement, the two states were merged +into one state, which became the state after this statement. +The construction of figure 4 was now detected, but switch +statements still gave problems and continue and break statements +were not understood. +The main problem of a switch statement is, that the closing bracket +(`\f(CW)\fP') has to be followed by a \fIstatement\fP. +The syntax shows no choice of statements, as is the case with +if, while, do and for statements. +Using the \fILLgen\fP parameter mechanism, it is not a trivial +task to parse the different case parts of a switch statement +with the same initial state and to merge the results into one +state. +This observation led to the third and final approach, as described +next. +.PP +Instead of passing the state of the program through the statements +parsing routines using the \fILLgen\fP parameters, a special stack is +introduced, the +.I lint_stack. +When a choosing statement is parsed, an entry is pushed on the stack +containing the information that is needed to keep track of the +state of the program. +Each entry contains a description of the +.I current +state of the program and a field that indicates what part of the +program the parser is currently parsing. +For all the possible choosing statements I describe the actions +to be taken. +.PP +At entrance of an if statement, an entry is pushed on the stack +with the current state being a copy of the current state of the +stack element one below. +The class of this entry is \f(CWIF\fP. +At reaching the else part, the current state is moved to +another place in this stack entry (to \f(CWS_IF\fP), and a new copy +of the current state at entrance of this if statement is made. +At the end of the else part, the two states are merged into +one state, the new current state, and the \f(CWIF\fP entry is +popped from the stack. +If there is no else part, then the state that is reached after +parsing the if part is merged with the current state at entrance +of the if statement into the new current state. +.PP +At entrance of a while statement a \f(CWWHILE\fP entry is pushed +on the stack containing a copy of the current state. +If a continue or break statement is met in the while statement, +the state at reaching this continue or break statement is +merged with a special state in the \f(CWWHILE\fP entry, called +\f(CWS_END\fP. +(If \f(CWS_END\fP did not yet contain a state, the state is copied +to \f(CWS_END\fP.) +At the end of the while statement this \f(CWS_END\fP is merged with the +current state, which result is merged with the state at entrance +of the while statement into the new current state. +.PP +A for statement is treated similarly. +A do statement is treated the same way too, except that \f(CWS_END\fP +isn't merged with the state at entrance of the do statement, +but becomes the new current state. +.PP +For switch statements a \f(CWSWITCH\fP entry is pushed on the stack. +Apart from the current state, this entry contains two other +states, \f(CWS_BREAK\fP and \f(CWS_CASE\fP. +\f(CWS_BREAK\fP initially contains no state, \f(CWS_CASE\fP +initially contains a +copy of the current state at entrance of the switch statement. +After parsing a case label, a \f(CWCASE\fP entry is pushed on the stack, +containing a copy of the current state. +If, after zero or more statements, we meet another case label, +the state at reaching this case label is merged with \f(CWS_CASE\fP +of the \f(CWSWITCH\fP entry below and a new copy of the state +at entrance +of the switch statement is put in the \f(CWCASE\fP entry. +If we meet a break statement, we merge the current state with +\f(CWS_BREAK\fP of the \f(CWSWITCH\fP entry below and pop the +\f(CWCASE\fP entry. +In addition to this, the occurrence of a default statement +inside the switch statement is recorded in the \f(CWSWITCH\fP entry. +At the end of the switch statement we check if we have met a +default statement. +If not, \f(CWS_BREAK\fP is merged with the current state at entrance +of the switch statement. (Because it is possible that no case +label will be chosen.) +Next the \f(CWS_CASE\fP is `special_merged' with \f(CWS_BREAK\fP +into the new current state. +For more details about these merge functions see the sources. +.PP +With the approach described above, +.I lint +is aware of the flow +of control in the program. +There still are some doubtful constructions +.I lint +will not detect and there are some constructions (although rare) +for which +.I lint +gives an incorrect warning (see figure 4). +.KF +.DS B +.ft CW +{ + int i; + + for (;;) { + if (cond) { + i = 0; + break; + } + } + use(i); + /* lint warns: maybe i used before set + * although the fragment is correct + */ +} +.DE +.br +.I +.ce +figure\ 4. +.R +.KE +.PP +A nice advantage of the method is, that the parser stays clear, +i.e. it isn't extended with extra parameters which must pass the +states. +In this way the parser still is very readable and we have a nice +interface with +.I lint +using function calls. +.NH 3 +Undefined evaluation orders +.PP +In expressions the values of some variables are used and some +variables are set. +Of course, the same holds for subexpressions. +The compiler is allowed to choose the order of evaluation of +subexpressions involving a commutative and associative operator +(\f(CW*\fP, \f(CW+\fP, \f(CW&\fP, \f(CW|\fP, \f(CW^\fP), +the comma in a parameter list or an assignment operator. +In section 3.4 it is made clear that this will lead to +statements with ambiguous semantics. +.PP +The way these constructs are detected is rather straight forward. +The function which parses an expression (\f(CWlint_expr\fP) +returns a linked +list containing information telling which variables are set and +which variables are used. +A variable is indicated by its +.I idf +descriptor and an +.I offset. +This offset is needed for discriminating entries of the same +array and members of the same structure or union, so it is +possible to warn about the statement +.ft CW +a[b[0]]\ =\ b[0]++;. +.R +When \f(CWlint_expr\fP meets a commutative operator (with respect to the +evaluation order), it calls itself recursively with the operands +of the operator as expression. +The returned results are checked for undefined evaluation orders +and are put together. +This is done by the function \f(CWcheck_and_merge\fP. +.NH 3 +Useless statements +.PP +Statements which compute a value that is not used, +are said to have a \fInull effect\fP. +Examples are \f(CWx = 2, 3;\fP, \f(CWf() + g();\fP and +\f(CW*p++;\fP. +(\f(CW*\fP and \f(CW++\fP have the same precedence and associate +from right to left.) +.PP +A conditional expression computes a value too. +If this value isn't used, it is better to use an if-else +statement. +So, if +.I lint +sees +.DS B +.ft CW +b ? f() : g(); +.R +.DE +.LP +it warns \f(CWuse if-else construction\fP. +.NH 3 +Not-reachable statements +.PP +The algorithm to detect not-reachable statements (including not +reachable initializations) is as follows. +Statements after a label and a case statement and the compound +statement of a function are always reachable. +Other statements are not-reachable after: +.QS +.RS +.IP - 1 +a goto statement +.IP - +a return statement +.IP - +a break statement +.IP - +a continue statement +.IP - +a switch statement +.IP - +an endless loop (a while, do or for loop with a conditional +which always evaluates to true and without a break statement) +.IP - +an if-else statement of which both if part and else part +end up in a not-reachable state +.IP - +a switch statement of which all \f(CWcase ... break\fP parts +(including +a \f(CWdefault ... break\fP part) end up in a not-reachable state +.IP - +the pseudocomment \f(CW/*\ NOTREACHED\ */\fP +.RE +.QE +.PP +The algorithm is easily implemented using the \f(CWst_nrchd\fP selector +in the +.I state +descriptor. +The \f(CWst_warned\fP selector is used to prevent superfluous warnings. +To detect an endless loop, after a while (), for (..;;..) +and do part the current state of the stack entry beneath the top one +is set to not reached. +If, in the statement following, a break statement is met, this same +state is set to reached. +If the while () part of the do statement is met, this state +is set to reached if doesn't evaluates to true. +The detection of not-reachable statements after a switch statement +is done in a similar way. +In addition it is checked if a default statement isn't met, in +which case the statement after the switch statement can be reached. +The warning \f(CWstatement not reached\fP is not given for compound +statements. +If +.I lint +did, it would warn for the compound statement in a switch statement, +which would be incorrect. +.PP +Not-reachable statements are still interpreted by +.I lint. +I.e. when +.I lint +warns that some statement can't be reached, it assumes this is +not what the programmer really wants and it ignores this fact. +In this way a lot of useless warnings are prevented in the case of +a not-reachable statement. +See figure 5. +.KF +.DS B +.ft CW +{ + int i; + + for (;;) { + /* A loop in which the programmer + * forgot to introduce a conditional + * break statement. + * Suppose i is not used in this part. + */ + } + /* some more code in which i is used */ +} +/* The warning "statement not reached" highlights the bug. + * An additional warning "i unused in function %s" is + * formally correct, but doesn't provide the programmer + * with useful information. + */ +.DE +.I +.ce +figure\ 5. +.R +.KE +.NH 3 +Functions returning expressions and just returning +.PP +Each time a return statement is met, +.I lint +checks if an expression is returned or not. +If a function has a return with expression and a return without +expression, +.I lint +warns +.ft CW +function %s has return(e); and return;. +.R +If the flow of control can +.I +fall through +.R +the end of the compound statement of a function, this indicates +an implicit return statement without an expression. +If the end of the compound statement of the function can be reached, +.I lint +introduces this implicit return statement without expression. +.PP +Sometimes the programmer knows for sure that all case parts inside +a switch statement include all possible cases, so he doesn't +introduce a default statement. +This can lead to an incorrect warning. +Figure 6 shows how to prevent this warning. +.KF +.DS B +.ft CW + func() + { + switch (cond) { + case 0: return(e0); + case 1: return(e1); + } + /* NOTREACHED */ + } +/* no warning: "function func has return(e); and return; */ +.DE +.I +.ce +figure\ 6. +.R +.KE +.PP +The pseudocomment \f(CW/*\ NOTREACHED\ */\fP can also be used to tell +.I lint +that some function doesn't return. See figure 7. +.KS +.DS B +.ft CW + func() + { + switch (cond) { + case 0: return(e0); + case 1: return(e1); + default: error(); /* calls exit or abort */ + /* NOTREACHED */ + } + } +/* no warning: "function func has return(e); and return;" */ +.I +.DE +.ce +figure\ 7. +.R +.KE +.NH 3 +Output definitions for the second pass +.PP +The first pass can only process one program file. +To be able to process a program that spreads over more than one file, +the first pass outputs definitions that are processed by a second +pass. +The format of such a definition is different for different classes: +.PP +For class in {EFDF, SFDF, LFDF} +.DS C +::::::: +.DE +.LP +A negative \fInr of args\fP indicates that the function can be called with +a varying number of arguments. +.PP +For class = FC +.DS C +::::: +.DE +.LP +The \fIvalue is used\fP part can have three meanings: +the value of the function is ignored; +the value of the function is used; +the value of the function is cast to type \fIvoid\fP. +.PP +For other classes +.DS C +:::: +.DE +.LP +Definitions of class VU (Variable Usage) are only output for \fIused\fP +global variables. +.PP +Structure and union types that are output to the intermediate file +are simplified. +(The following occurrences of \fIstructure\fP should be +read as \fIstructure or union\fP and \fIstruct\fP as \fIstruct or +union\fP.) +Structures that are identified by a \fIstructure tag\fP are output +to the intermediate file as \f(CWstruct \fP. +Structures without a structure tag are output as +\f(CWstruct {}\fP with \f(CW\fP a semicolon-separated +list of types of the members of this structure. +An alternative way would be to output the complete structure definition. +However, this gives practical problems. +It is allowed to define some object of a structure type with a +structure tag, without this structure being defined at that place. +The first approach leaves errors, such as in figure 8, undetected. +.KF +.DS B +.ft CW + "a.c" "b.c" + +struct str { struct str { + float f; int i; +} s; }; + +main() func(s) +{ struct str s; + func(s); {} +} +.I +.DE +.ce +figure\ 8. +.R +.KE +.PP +To be able to detect these errors, the first pass should also output +definitions of structure tags. +The example of figure 8 would then get a warning like +.ft CW +structure str defined inconsistently +.R +.PP +More information on these definitions in section 4.3 and 4.4. +.NH 3 +Generating libraries +.PP +.I Lint +knows the library `-lc', `-lm' and `-lcurses'. +If a program uses some other library, it is possible to generate +a corresponding \fIlint library\fP. +To do this, precede all the C source files of this library by +the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP. +Then feed these files one by one to the first pass of +.I lint +collecting the standard output in a file and ignoring the warnings. +The resulting file contains library definitions of the functions +and external variables defined in the library sources, and not more +than that. +If this file is called `llib-l\fIname\fP.ln +.I lint +can be told to search the library by passing it as argument in +the command line `-llib-l\fIname\fP.ln. +The implementation of this feature is simple. +.PP +As soon as the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP is met, +only function and variable definitions are output with class LFDF +and LVDF respectively. +Other definitions, which otherwise would have been output, are +discarded. +.PP +Instead of generating a special lint library file, one can make a +file containing the library definitions and starting with +\f(CW/* LINTLIBRARY */\fP. +This file can then be passed to +.I lint +just by its name. +This method isn't as efficient as the first one. +.NH 3 +Interpreting the pseudocomments +.PP +The interpretation of the pseudocomments is done by the lexical +analyzer, because this part of the program already took care of the +comments. +At first sight this seems very easy: as soon as some pseudocomment +is met, raise the corresponding flag. +Unfortunately this doesn't work. +The lexical analyzer is a \fIone token look ahead scanner\fP. +This causes the above procedure to raise the flags one token too +soon. +A solution to get the right effect is to reserve two flags per +pseudocomment. +The first is set as soon as the corresponding pseudocomment is +scanned. +At the returning of each token this flag is moved to the second flag. +The delay in this way achieved makes the pseudocomments have effect +at the correct place. +.NH 2 +The second pass data structure +.NH 3 +Inp_def descriptor +.DS B +.ft CW +struct inp_def { + struct inp_def *next; + int id_class; + char id_name[NAMESIZE]; + char id_file[FNAMESIZE]; + unsigned int id_line; + int id_nrargs; + char argtps[ARGSTPSSIZE]; + int id_returns; + char id_type[TYPESIZE]; + int id_called; + int id_used; + int id_ignored; + int id_voided; +}; +.R +.DE +.PP +This description is almost similar to the \fIoutdef\fP descriptor as +described in 4.1.2.5. +There are some differences too. +.IP \f(CWnext\fP 15 +As structures of this type are allocated dynamically, this field +is added so the same memory allocator as used in the first pass can be +used. +.LP +\f(CWid_called +.br +id_used +.br +id_ignored\fP +.IP \f(CWid_voided\fP 15 +Some additional fields only used for function definitions.Their +meaning should be clear. +.PP +The other fields have the same meaning as the corresponding fields +in the \fIoutdef\fP descriptor. +Some attention should be paid to \f(CWid_argtps\fP and \f(CWid_type\fP. +These members have type \f(CWarray of char\fP, in contrast to +their counterparts in the \fIoutdef\fP descriptor. +The only operation performed on types is a check on equality. +Types are output by the first pass as a string describing the type. +The type of \f(CWi\fP in \f(CWint *i();\fP e.g. is output as +\f(CWint *()\fP. +Such a string is best put in an \f(CWarray of char\fP to be compared +easily. +.NH 2 +The second pass checking mechanism +.PP +After all the definitions that are output by the first pass are +sorted by name, the definitions belonging to one name are ordered +as follows. +.QS +.RS +.IP - 1 +external definitions +.IP - +static definitions +.IP - +library definitions +.IP - +declarations +.IP - +function calls +.IP - +variable usages +.RE +.QE +.PP +The main program of the second pass is easily explained. +For all different names, do the following. +First read the definitions. +If there is more than one definition, check for conflicts. +Then read the declarations, function calls and variable usages and +check them against the definitions. +After having processed all the declarations, function calls and +variable usages, check the definitions to see if they are used +correctly. +The next three paragraphs will explain the three most important +functions of the program. +.NH 3 +Read_defs() +.PP +This function reads all definitions belonging to the same name. +Only one external definition is allowed, so if there are more, a +warning is given. +In different files it is allowed to define static functions or +variables with the same name. +So if a static function is read, \f(CWread_defs\fP checks if there isn't +already an external definition, and if not it puts the static +definition in the list of static definitions, to be used later. +If no external or static definitions are met, a library definition is +taken as definition. +If a function or a variable is defined with the same name as a function +or a variable in a library (which is allowed) +.I lint +gives a warning. +Of course it is also possible that there is no definition at all. +In that case \f(CWcheck\fP will warn. +.NH 3 +Check() +.PP +\f(CWCheck\fP verifies declarations, function calls and variable +usages against the definitions. +For each of these entries the corresponding definition is looked up. +As there may be more than one static definition, first a static +definition from the same file as the entry is searched. +If not present, the external definition (which may be a library +definition) is taken as definition. +If no definition can be found and the current entry is an external +declaration, +.I lint +warns. +However in the case of an implicit function declaration +.I lint +will not warn, because +we will get a warning \f(CW%s used but not defined\fP later on. +Next a check is done if the declarations are consistent with their +definitions. +After the declarations, the function calls and variable usages are +verified against their corresponding definitions. +If no definition exists, +.I lint +warns. +Else the field \f(CWid_called\fP is set to 1. +(For variable definitions this should be interpreted as \fIused\fP.) +For variable usages this will be all. +If we are processing a function call we also check the number and types +of the arguments and we warn for function values which are used from +functions that don't return a value. +For each function call we administrate if a function value is used, +ignored or voided. +.NH 3 +Check_usage() +.PP +Checks if the external definition and static definitions are used +correctly. +If a function or variable is defined but never used, +.I lint +warns, except for library definitions. +Functions, which return a value but whose value is always or +sometimes ignored, get a warning. +(A function value which is voided (cast to void) is not ignored, +but it isn't used either.) +.bp diff --git a/doc/lint/chap5 b/doc/lint/chap5 new file mode 100644 index 000000000..28c4f7c30 --- /dev/null +++ b/doc/lint/chap5 @@ -0,0 +1,107 @@ +.NH 1 +How to make lint shut up +.PP +It can be very annoying having +.I lint +warn about questionable constructs of which the programmer already is +aware. +There should be a mechanism to give +.I lint +some extra information in the source code. +This could be done by introducing some special keywords, which +would have a special meaning to +.I lint. +This is a bad solution, because these keywords would cause existing +C compilers not to work on these programs. +A neater solution is to invent some comments having a special +meaning to +.I lint. +We call these comments +.I pseudocomments. +The pseudocomments have no meaning to existing C compilers, so +compilers will not have to be rewritten for C programs containing +the previously proposed special keywords. +The following pseudocomments are recognized by +.I lint. +.LP +\f(CW/* VARARGS\fIn\fP */\fR +.br +.in 5 +The next function can be called with a variable number of arguments. +Only check the first \fIn\fP arguments. +The \fIn\fP must follow the word \f(CWVARARGS\fP immediately. +This pseudocomment is useful for functions like e.g. printf. +(The definition of the function printf should be preceded by +\f(CW/*\ VARARGS1\ */\fP.) +.in +.LP +\f(CW/* VARARGS */\fP +.br +.in 5 +Means the same as \f(CW/* VARARGS0 */\fP. +.in +.LP +\f(CW/* ARGSUSED */\fP +.br +.in 5 +Don't complain about unused arguments in the next function. +When we are developing a program we sometimes write functions of +which we do not yet use the arguments. +Because we do want to use +.I lint +on these programs, it is nice to have this pseudocomment. +.in +.LP +\f(CW/* NOTREACHED */\fP +.br +.in 5 +.I Lint +makes no attempt to discover functions which never return, +although it \fIis\fP possible to find functions that don't return. +This would require a transitive closure with respect to the already +known \fInot-returning\fP functions; an inacceptable time consuming +process. +To make +.I lint +aware of a function that doesn't return, a call of this function +should be followed by the pseudocomment \f(CW/*\ NOTREACHED\ */\fP. +This pseudocomment can also be used to indicate that some case part +inside a switch (especially a default part) can't be reached. +The above mentioned cases of use of this pseudocomment are +examples. +The comment can be used just to indicate that some part of the +program can't be reached. +It sometimes is necessary to introduce an extra compound statement +to get the right effect. +See figure 9. +.KF +.DS B +.ft CW + if (cond) + /* if part */ ; + else { + error(); /* doesn't return */ + /* NOTREACHED */ + } +/* Without the compound else part, lint would assume + * the statement after the if statement to be NOTREACHED, + * instead of the end of the else part. + */ +.I +.DE +.ce +figure\ 9. +.R +.KE +.in +.LP +\f(CW/* LINTLIBRARY */\fP +.br +.in 5 +All definitions following this comment are assumed to be library +definitions. +It shuts off complaints about unused functions and variables. +See also section 4.2.7 for how to use this comment for generating +lint libraries. +.in +.bp diff --git a/doc/lint/chap6 b/doc/lint/chap6 new file mode 100644 index 000000000..6ba75b309 --- /dev/null +++ b/doc/lint/chap6 @@ -0,0 +1,107 @@ +.NH 1 +User options +.PP +.I Lint +recognizes the following command line flags. +Some of them are identical to the flags of +.I cem. +.I Lint +warns for flags it doesn't know. +.LP +\f(CW-D +.br +-D=\fP +.br +.in 5 +Causes \f(CW\fP to be defined as a macro. +The first form is equivalent to `\f(CW-D=1\fP'. +The second form is equivalent to putting `\f(CW#define \fP' +in front of all the source files. +.in +.LP +\f(CW-U\fP +.br +.in 5 +Acts as if the line `\f(CW#undef \fP' is put in front of all +the source files. +.in +.LP +\f(CW-I\fP +.br +.in 5 +This puts \f(CW\fP in the include directory +list. +.in +.LP +\f(CW-R\fP +.br +.in 5 +Turn off the `strict' option. +Default +.I lint +checks the program according to the Reference Manual, because this +gives a definition of the language with which there is a better chance +of writing portable programs. +With this flag on, some constructs, otherwise not allowed, are +accepted. +.in +.LP +\f(CW-l +.br +-llib-l.ln +.br +-l\fP +.br +.in 5 +`\f(CW-l\fP' tells +.I lint +to search the lint library +\f(CWllib-l.ln\fP for missing +definitions of functions and variables. +The option `\f(CW-llib-l.ln\fP' makes +.I lint +search the lint library file \f(CWllib-l.ln\fP in the current +directory for missing definitions. +Default is `\f(CW-lc\fP'; this default can be suppressed by +`\f(CW-l\fP'. +.in +.LP +\f(CW-a\fP +.br +.in 5 +Warn for conversions from integer to long and vice versa. +.in +.LP +\f(CW-b\fP +.br +.in 5 +Don't report not-reachable break statements. +This flag is useful for running +.I lint +on a \fIlex\fP- or \fIyacc\fP-generated source file. +.in +.LP +\f(CW-h\fP +.br +.in 5 +Check for useless statements and possible pointer alignment problems. +.in +.LP +\f(CW-n\fP +.br +.in 5 +Don't complain about unused and undefined functions and variables. +.in +.LP +\f(CW-v\fP +.br +.in 5 +Don't warn about unused arguments of functions. +.in +.LP +\f(CW-x\fP +.br +.in 5 +Complain about unused external variables. +.in +.bp diff --git a/doc/lint/chap7 b/doc/lint/chap7 new file mode 100644 index 000000000..d224f1fdb --- /dev/null +++ b/doc/lint/chap7 @@ -0,0 +1,139 @@ +.NH +Ideas for further development +.PP +Although the program in its current state is a useful program, +there are still a lot of features that should be implemented +in following versions. +I'll summarize them in this section. +.IP \(bu +Actually the program consists of three passes. +The filter +.I sort +is a complete pass, just as the first and the second pass. +I think we speed up the program by removing the filter and making +the second pass accept an unsorted file. +The sorting process can be done in parallel to the first pass if +both processes communicate through a pipe. +In addition to this sorting, the second pass can generate already +some warnings. +(Warnings like \f(CW%s defined but never used\fP can only be +generated after having processed all the input.) +These warnings generated in parallel to the warnings of the first pass, +should be sent to an intermediate file, otherwise the warnings would +get messed up. +Such an improvement will have best effect on a multi processing +machine, but even on single processing machines this will give a better +performance. (On a single processing machine the pipe should be +replaced by an intermediate file.) +.IP \(bu +Expressions could be classified so +.I lint +can warn for some classes of expressions in strange contexts. +Suppose as class . +\f(CWb\fP Will be of class if e.g. \f(CWb\fP is assigned to +the expression \f(CW || \fP. +The following expression should then give a warning +.DS B +.ft CW +b + i; /* weird expression */ +.R +.DE +.IP \(bu +A mechanism to check printf like routines. +This mechanism should verify the format string against the following +arguments. +There is a public domain program that can be used to do this job. +It is called printfck and should be used as a filter between the +source files and +.I lint. +.IP \(bu +Raise warnings for incomplete initializer lists like +.DS B +.ft CW +int a[10] = {0, 1, 2}; +/* initializer list not complete */ +.R +.DE +.IP \(bu +Warnings for constructs like +.DS B +.ft CW +for (i = 0; i < 10; i++) { + . . . . + i--; + /* loop control variable affected */ + . . . . +} +.R +.DE +and +.DS B +.ft CW +while (var) { + /* statements in which the value + * of var is never changed + */ +} +/* loop control variable not updated */ +.R +.DE +.IP \(bu +A warning \f(CWbad layout\fP for program fragments like +.DS B +.ft CW +if (cond1) + if (cond2) + statement(); +else /* bad layout */ + statement(); +.R +.DE +.IP \(bu +A warning \f(CWassignment in conditional context\fP in case of +.DS B +.ft CW +if (a = b) +.R +.DE +.IP +The programmer probably meant \f(CWif (a == b)\fP. +No warning should be given for \f(CWif ((a = b) != c)\fP, +nor for \f(CWif ((a = b))\fP. +.IP \(bu +Warnings for empty statements in strange contexts, like +.DS B +.ft CW +if (cond); /* mistake */ + statement(); +.R +.DE +.IP +(This mistake would also be detected by a warning \f(CWbad layout\fP.) +.IP \(bu +A mechanism to prevent the warning \f(CWpossible pointer alignment +problem\fP for functions of which the programmer already knows that +no problem will arise. +E.g. for functions like malloc and family. +.IP \(bu +The current version of +.I lint +warns for conversions from long to int (if -a flag is +on). +It even warns if the programmer used the proper cast, as e.g. +.DS B +.ft CW +int i; +long l = 0L; + +i = (int)l; +.R +.DE +.IP +In this case I think +.I lint +need not warn. +The explicit cast indicates that the programmer knows what he is +doing. +This feature is not implemented because the expression tree doesn't +show if the cast was implicit or explicit. +.bp diff --git a/doc/lint/chap8 b/doc/lint/chap8 new file mode 100644 index 000000000..eac0ea6b3 --- /dev/null +++ b/doc/lint/chap8 @@ -0,0 +1,56 @@ +.NH 1 +Testing the program +.PP +There is no test-suite for testing +.I lint. +I have written a lot of small files that each test one +particular property of the program. +At this moment there are about 220 test programs. +.PP +It would take a lot of time and effort to run these tests by hand. +To ease this work I wrote a program that runs these tests +automatically. +The test program (the program that runs the tests) needs, associated +with each .c file, a .w file, containing from each expected warning +a substring. E.g. when the following warnings should be given by +.I lint: +.DS B +.ft CW + file t.c, line 3, i evaluation order undefined + file t.c, line 6, a set but not used in function main +.R +.DE +it is sufficient to write a file \f(CWt.w\fP containing +.DS B +.ft CW + a set but not used in function main + i evaluation order undefined +.R +.DE +The test program is called with all the .c files to be tested +as arguments. +.PP +Sometimes it is necessary to test +.I lint +on two files. +The test program runs +.I lint +on two files when two consecutive +arguments are of the form \fIname\fPa.c and \fIname\fPb.c. +It then compares the output of +.I lint +with the file \fIname\fP.w. +.PP +.I Lint +is also tested by running it on existing programs. +.I Lint +has been run on some \s-2UNIX\s+2 utility programs in +/usr/src/cmd, on Unipress Emacs (consisting of more than 30,000 +lines of code) and the program itself. +Bugs have been found in e.g. /usr/src/cmd/cat.c and +/usr/src/cmd/ld.c. +To test the robustness of the program, it was run on the +password file /etc/passwd and on `mixed' C program files. +These mixed C program files are C program files that were +broken in chunks and then put together in a different order. +.bp diff --git a/doc/lint/chap9 b/doc/lint/chap9 new file mode 100644 index 000000000..fca2bb637 --- /dev/null +++ b/doc/lint/chap9 @@ -0,0 +1,48 @@ +.NH 1 +References +.IP [1] +Dennis M. Ritchie, +.I +C Reference Manual, +.R +Bell Laboratories, +Murray Hill, +New Jersey, +1978. +.IP [2] +B.W. Kernighan and D.M. Ritchie, +.I +The C Programming Language, +.R +Prentice Hall, +1978. +.IP [3] +Eric H. Baalbergen, Dick Grune, Maarten Waage, +.I +The CEM Compiler, +.R +Manual IM-4, Vrije Universiteit, Amsterdam, +1985. +.IP [4] +Andrew S. Tanenbaum et al., +.I +A practical tool kit for making portable compilers, +.R +Comm. ACM, +Sep. 1983. +.IP [5] +S. C. Johnson, +.I +Lint, a C program verifier, +.R +Bell Laboratories, +Murray Hill, +New Jersey, +1978. +.IP [6] +Dick Grune, Ceriel J. H. Jacobs, +.I +A Programmer-friendly LL(1) Parser Generator, +.R +IR 127, Vrije Universiteit, Amsterdam, +1987. diff --git a/doc/lint/contents b/doc/lint/contents new file mode 100644 index 000000000..93538fe71 --- /dev/null +++ b/doc/lint/contents @@ -0,0 +1,59 @@ +.DS + + + + + + +.DE +.SH +Contents +.R +.sp 1 +.IP 1. +Introduction +.IP 2. +Outline of the program +.IP 3. +What lint checks +.RS +.IP 3.1 +Set, used and unused variables +.IP 3.2 +Flow of control +.IP 3.3 +Functions +.IP 3.4 +Undefined evaluation order +.IP 3.5 +Pointer alignment problems +.IP 3.6 +Libraries +.RE +.IP 4. +How lint checks +.RS +.IP 4.1 +The first pass data structure +.IP 4.2 +The first pass checking mechanism +.IP 4.3 +The second pass data structure +.IP 4.4 +The second pass checking mechanism +.RE +.IP 5. +How to make lint shut up +.IP 6. +User options +.IP 7. +Ideas for further development +.IP 8. +Testing the program +.IP 9. +References +.LP +Appendix A \- The warnings +.br +Appendix B \- The Ten Commandments for C programmers +.bp diff --git a/doc/lint/frontpage b/doc/lint/frontpage new file mode 100644 index 000000000..7ac226657 --- /dev/null +++ b/doc/lint/frontpage @@ -0,0 +1,14 @@ +.TL +.sp 5 +Lint, a C Program Checker +.AU +Frans Kunst +.AI +Vrije Universiteit +Amsterdam +.LP +.sp 8 +.ce +Afstudeer verslag +.ce +18 mei 1988