avoid deadlock by disk intr acking interrupt first, then processing ring

This commit is contained in:
Robert Morris 2020-10-04 09:21:03 -04:00 committed by Frans Kaashoek
parent 3c70be9070
commit aeaf610c67
2 changed files with 37 additions and 21 deletions

View file

@ -130,10 +130,13 @@ static void
free_desc(int i)
{
if(i >= NUM)
panic("virtio_disk_intr 1");
panic("free_desc 1");
if(disk.free[i])
panic("virtio_disk_intr 2");
panic("free_desc 2");
disk.desc[i].addr = 0;
disk.desc[i].len = 0;
disk.desc[i].flags = 0;
disk.desc[i].next = 0;
disk.free[i] = 1;
wakeup(&disk.free[0]);
}
@ -143,9 +146,11 @@ static void
free_chain(int i)
{
while(1){
int flag = disk.desc[i].flags;
int nxt = disk.desc[i].next;
free_desc(i);
if(disk.desc[i].flags & VRING_DESC_F_NEXT)
i = disk.desc[i].next;
if(flag & VRING_DESC_F_NEXT)
i = nxt;
else
break;
}
@ -184,7 +189,7 @@ virtio_disk_rw(struct buf *b, int write)
}
sleep(&disk.free[0], &disk.vdisk_lock);
}
// format the three descriptors.
// qemu's virtio-blk.c reads them.
@ -217,7 +222,7 @@ virtio_disk_rw(struct buf *b, int write)
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
disk.desc[idx[1]].next = idx[2];
disk.info[idx[0]].status = 0;
disk.info[idx[0]].status = 0xff; // device writes 0 on success
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
disk.desc[idx[2]].len = 1;
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
@ -227,13 +232,17 @@ virtio_disk_rw(struct buf *b, int write)
b->disk = 1;
disk.info[idx[0]].b = b;
// avail[0] is flags
// avail[1] tells the device how far to look in avail[2...].
// avail[2...] are desc[] indices the device should process.
// avail[0] is flags (always zero)
// avail[1] is an index into avail[2...] telling where we'll write next
// avail[2...] is a ring of NUM indices the device should process
// we only tell device the first index in our chain of descriptors.
disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
__sync_synchronize();
disk.avail[1] = disk.avail[1] + 1; // not % NUM ...
__sync_synchronize();
disk.avail[1] = disk.avail[1] + 1;
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
@ -253,18 +262,26 @@ virtio_disk_intr()
{
acquire(&disk.vdisk_lock);
while((disk.used_idx % NUM) != (disk.used->id % NUM)){
int id = disk.used->elems[disk.used_idx].id;
// this ack may race with the device writing new notifications to
// the "used" ring, in which case we may get an interrupt we don't
// need, which is harmless.
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
__sync_synchronize();
while(disk.used_idx != disk.used->id){
__sync_synchronize();
int id = disk.used->elems[disk.used_idx % NUM].id;
if(disk.info[id].status != 0)
panic("virtio_disk_intr status");
disk.info[id].b->disk = 0; // disk is done with buf
wakeup(disk.info[id].b);
disk.used_idx = (disk.used_idx + 1) % NUM;
struct buf *b = disk.info[id].b;
b->disk = 0; // disk is done with buf
wakeup(b);
disk.used_idx += 1;
}
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
release(&disk.vdisk_lock);
}

View file

@ -1734,6 +1734,7 @@ void
manywrites(char *s)
{
int nchildren = 4;
int howmany = 30; // increase to look for deadlock
for(int ci = 0; ci < nchildren; ci++){
int pid = fork();
@ -1749,7 +1750,7 @@ manywrites(char *s)
name[2] = '\0';
unlink(name);
for(int iters = 0; iters < 500000; iters++){
for(int iters = 0; iters < howmany; iters++){
for(int i = 0; i < ci+1; i++){
int fd = open(name, O_CREATE | O_RDWR);
if(fd < 0){
@ -1765,8 +1766,6 @@ manywrites(char *s)
close(fd);
}
unlink(name);
if((iters % 50) == ci)
write(1, ".", 1);
}
unlink(name);
@ -2737,7 +2736,7 @@ main(int argc, char *argv[])
void (*f)(char *);
char *s;
} tests[] = {
// {manywrites, "manywrites"},
{manywrites, "manywrites"},
{execout, "execout"},
{copyin, "copyin"},
{copyout, "copyout"},