bug fix: reparent() sometimes deadlocked

bug fix: exit() sometimes released a different parent lock than it acquired
usertests
This commit is contained in:
Robert Morris 2019-09-23 06:50:25 -04:00
parent 843ce77765
commit d175beadf5
2 changed files with 71 additions and 31 deletions

View file

@ -286,11 +286,11 @@ fork(void)
}
// Pass p's abandoned children to init.
// Caller must hold p->lock and parent->lock.
// Caller must hold p->lock.
void
reparent(struct proc *p, struct proc *parent) {
reparent(struct proc *p)
{
struct proc *pp;
int child_of_init = (p->parent == initproc);
for(pp = proc; pp < &proc[NPROC]; pp++){
// this code uses pp->parent without holding pp->lock.
@ -302,13 +302,10 @@ reparent(struct proc *p, struct proc *parent) {
// because only the parent changes it, and we're the parent.
acquire(&pp->lock);
pp->parent = initproc;
if(pp->state == ZOMBIE) {
if(!child_of_init)
acquire(&initproc->lock);
wakeup1(initproc);
if(!child_of_init)
release(&initproc->lock);
}
// we should wake up init here, but that would require
// initproc->lock, which would be a deadlock, since we hold
// the lock on one of init's children (pp). this is why
// exit() always wakes init (before acquiring any locks).
release(&pp->lock);
}
}
@ -339,20 +336,38 @@ exit(int status)
end_op();
p->cwd = 0;
acquire(&p->parent->lock);
// we might re-parent a child to init. we can't be precise
// about waking up init, since we can't acquire its lock once
// we've acquired any other proc lock. so wake up init whether
// that's necessary or not. init may miss this wakeup,
// that that seems harmless.
acquire(&initproc->lock);
wakeup1(initproc);
release(&initproc->lock);
// grab a copy of p->parent, to ensure that we unlock the
// same parent we locked. in case our parent gives us away
// to init while we're waiting for the parent lock.
acquire(&p->lock);
struct proc *original_parent = p->parent;
release(&p->lock);
// we need the parent's lock in order to wake it up from wait().
// the parent-then-child rule says we have to lock it first.
acquire(&original_parent->lock);
acquire(&p->lock);
// Give any children to init.
reparent(p, p->parent);
reparent(p);
// Parent might be sleeping in wait().
wakeup1(p->parent);
wakeup1(original_parent);
p->xstate = status;
p->state = ZOMBIE;
release(&p->parent->lock);
release(&original_parent->lock);
// Jump into the scheduler, never to return.
sched();
@ -564,6 +579,8 @@ wakeup(void *chan)
static void
wakeup1(struct proc *p)
{
if(!holding(&p->lock))
panic("wakeup1");
if(p->chan == p && p->state == SLEEPING) {
p->state = RUNNABLE;
}

View file

@ -598,6 +598,31 @@ forkforkfork(char *s)
sleep(10); // one second
}
// regression test. does reparent() violate the parent-then-child
// locking order when giving away a child to init, so that exit()
// deadlocks against init's wait()? also used to trigger a "panic:
// release" due to exit() releasing a different p->parent->lock than
// it acquired.
void
reparent2(char *s)
{
for(int i = 0; i < 800; i++){
int pid1 = fork();
if(pid1 < 0){
printf("fork failed\n");
exit(1);
}
if(pid1 == 0){
fork();
fork();
exit(0);
}
wait(0);
}
exit(0);
}
// allocate all mem, free it, and allocate again
void
mem(char *s)
@ -1937,9 +1962,9 @@ stacktest(char *s)
exit(xstatus);
}
// copyin(), copyout(), and copyinstr() used to cast the virtual page
// address to uint, which (with certain wild system call arguments)
// resulted in a kernel page faults.
// regression test. copyin(), copyout(), and copyinstr() used to cast
// the virtual page address to uint, which (with certain wild system
// call arguments) resulted in a kernel page faults.
void
pgbug(char *s)
{
@ -1952,9 +1977,9 @@ pgbug(char *s)
exit(0);
}
// does the kernel panic if a process sbrk()s its size to be less than
// a page, or zero, or reduces the break by an amount too small to
// cause a page to be freed?
// regression test. does the kernel panic if a process sbrk()s its
// size to be less than a page, or zero, or reduces the break by an
// amount too small to cause a page to be freed?
void
sbrkbugs(char *s)
{
@ -2010,13 +2035,11 @@ sbrkbugs(char *s)
exit(0);
}
// does write() with an invalid buffer pointer cause
// a block to be allocated for a file that is then
// not freed when the file is deleted? if the kernel
// has this bug, it will panic: balloc: out of blocks.
// assumed_free may need to be raised to be
// more than the number of free blocks.
// this test takes a long time.
// regression test. does write() with an invalid buffer pointer cause
// a block to be allocated for a file that is then not freed when the
// file is deleted? if the kernel has this bug, it will panic: balloc:
// out of blocks. assumed_free may need to be raised to be more than
// the number of free blocks. this test takes a long time.
void
badwrite(char *s)
{
@ -2049,9 +2072,8 @@ badwrite(char *s)
exit(0);
}
// test whether exec() leaks memory if one of the
// arguments is invalid. the test passes if
// the kernel doesn't panic.
// regression test. test whether exec() leaks memory if one of the
// arguments is invalid. the test passes if the kernel doesn't panic.
void
badarg(char *s)
{
@ -2102,6 +2124,7 @@ main(int argc, char *argv[])
void (*f)(char *);
char *s;
} tests[] = {
{reparent2, "reparent2"},
{pgbug, "pgbug" },
{sbrkbugs, "sbrkbugs" },
// {badwrite, "badwrite" },