avoid deadlock by disk intr acking interrupt first, then processing ring

This commit is contained in:
Robert Morris 2020-10-04 09:21:03 -04:00
parent e3672e018a
commit 792d60e912
2 changed files with 37 additions and 21 deletions

View file

@ -130,10 +130,13 @@ static void
free_desc(int i) free_desc(int i)
{ {
if(i >= NUM) if(i >= NUM)
panic("virtio_disk_intr 1"); panic("free_desc 1");
if(disk.free[i]) if(disk.free[i])
panic("virtio_disk_intr 2"); panic("free_desc 2");
disk.desc[i].addr = 0; disk.desc[i].addr = 0;
disk.desc[i].len = 0;
disk.desc[i].flags = 0;
disk.desc[i].next = 0;
disk.free[i] = 1; disk.free[i] = 1;
wakeup(&disk.free[0]); wakeup(&disk.free[0]);
} }
@ -143,9 +146,11 @@ static void
free_chain(int i) free_chain(int i)
{ {
while(1){ while(1){
int flag = disk.desc[i].flags;
int nxt = disk.desc[i].next;
free_desc(i); free_desc(i);
if(disk.desc[i].flags & VRING_DESC_F_NEXT) if(flag & VRING_DESC_F_NEXT)
i = disk.desc[i].next; i = nxt;
else else
break; break;
} }
@ -217,7 +222,7 @@ virtio_disk_rw(struct buf *b, int write)
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT; disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
disk.desc[idx[1]].next = idx[2]; disk.desc[idx[1]].next = idx[2];
disk.info[idx[0]].status = 0; disk.info[idx[0]].status = 0xff; // device writes 0 on success
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status; disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
disk.desc[idx[2]].len = 1; disk.desc[idx[2]].len = 1;
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
@ -227,13 +232,17 @@ virtio_disk_rw(struct buf *b, int write)
b->disk = 1; b->disk = 1;
disk.info[idx[0]].b = b; disk.info[idx[0]].b = b;
// avail[0] is flags // avail[0] is flags (always zero)
// avail[1] tells the device how far to look in avail[2...]. // avail[1] is an index into avail[2...] telling where we'll write next
// avail[2...] are desc[] indices the device should process. // avail[2...] is a ring of NUM indices the device should process
// we only tell device the first index in our chain of descriptors. // we only tell device the first index in our chain of descriptors.
disk.avail[2 + (disk.avail[1] % NUM)] = idx[0]; disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
__sync_synchronize();
disk.avail[1] = disk.avail[1] + 1; // not % NUM ...
__sync_synchronize(); __sync_synchronize();
disk.avail[1] = disk.avail[1] + 1;
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
@ -253,18 +262,26 @@ virtio_disk_intr()
{ {
acquire(&disk.vdisk_lock); acquire(&disk.vdisk_lock);
while((disk.used_idx % NUM) != (disk.used->id % NUM)){ // this ack may race with the device writing new notifications to
int id = disk.used->elems[disk.used_idx].id; // the "used" ring, in which case we may get an interrupt we don't
// need, which is harmless.
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
__sync_synchronize();
while(disk.used_idx != disk.used->id){
__sync_synchronize();
int id = disk.used->elems[disk.used_idx % NUM].id;
if(disk.info[id].status != 0) if(disk.info[id].status != 0)
panic("virtio_disk_intr status"); panic("virtio_disk_intr status");
disk.info[id].b->disk = 0; // disk is done with buf struct buf *b = disk.info[id].b;
wakeup(disk.info[id].b); b->disk = 0; // disk is done with buf
wakeup(b);
disk.used_idx = (disk.used_idx + 1) % NUM; disk.used_idx += 1;
} }
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
release(&disk.vdisk_lock); release(&disk.vdisk_lock);
} }

View file

@ -1734,6 +1734,7 @@ void
manywrites(char *s) manywrites(char *s)
{ {
int nchildren = 4; int nchildren = 4;
int howmany = 30; // increase to look for deadlock
for(int ci = 0; ci < nchildren; ci++){ for(int ci = 0; ci < nchildren; ci++){
int pid = fork(); int pid = fork();
@ -1749,7 +1750,7 @@ manywrites(char *s)
name[2] = '\0'; name[2] = '\0';
unlink(name); unlink(name);
for(int iters = 0; iters < 500000; iters++){ for(int iters = 0; iters < howmany; iters++){
for(int i = 0; i < ci+1; i++){ for(int i = 0; i < ci+1; i++){
int fd = open(name, O_CREATE | O_RDWR); int fd = open(name, O_CREATE | O_RDWR);
if(fd < 0){ if(fd < 0){
@ -1765,8 +1766,6 @@ manywrites(char *s)
close(fd); close(fd);
} }
unlink(name); unlink(name);
if((iters % 50) == ci)
write(1, ".", 1);
} }
unlink(name); unlink(name);
@ -2737,7 +2736,7 @@ main(int argc, char *argv[])
void (*f)(char *); void (*f)(char *);
char *s; char *s;
} tests[] = { } tests[] = {
// {manywrites, "manywrites"}, {manywrites, "manywrites"},
{execout, "execout"}, {execout, "execout"},
{copyin, "copyin"}, {copyin, "copyin"},
{copyout, "copyout"}, {copyout, "copyout"},