avoid deadlock by disk intr acking interrupt first, then processing ring
This commit is contained in:
parent
3c70be9070
commit
aeaf610c67
|
@ -130,10 +130,13 @@ static void
|
|||
free_desc(int i)
|
||||
{
|
||||
if(i >= NUM)
|
||||
panic("virtio_disk_intr 1");
|
||||
panic("free_desc 1");
|
||||
if(disk.free[i])
|
||||
panic("virtio_disk_intr 2");
|
||||
panic("free_desc 2");
|
||||
disk.desc[i].addr = 0;
|
||||
disk.desc[i].len = 0;
|
||||
disk.desc[i].flags = 0;
|
||||
disk.desc[i].next = 0;
|
||||
disk.free[i] = 1;
|
||||
wakeup(&disk.free[0]);
|
||||
}
|
||||
|
@ -143,9 +146,11 @@ static void
|
|||
free_chain(int i)
|
||||
{
|
||||
while(1){
|
||||
int flag = disk.desc[i].flags;
|
||||
int nxt = disk.desc[i].next;
|
||||
free_desc(i);
|
||||
if(disk.desc[i].flags & VRING_DESC_F_NEXT)
|
||||
i = disk.desc[i].next;
|
||||
if(flag & VRING_DESC_F_NEXT)
|
||||
i = nxt;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
@ -184,7 +189,7 @@ virtio_disk_rw(struct buf *b, int write)
|
|||
}
|
||||
sleep(&disk.free[0], &disk.vdisk_lock);
|
||||
}
|
||||
|
||||
|
||||
// format the three descriptors.
|
||||
// qemu's virtio-blk.c reads them.
|
||||
|
||||
|
@ -217,7 +222,7 @@ virtio_disk_rw(struct buf *b, int write)
|
|||
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
|
||||
disk.desc[idx[1]].next = idx[2];
|
||||
|
||||
disk.info[idx[0]].status = 0;
|
||||
disk.info[idx[0]].status = 0xff; // device writes 0 on success
|
||||
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
|
||||
disk.desc[idx[2]].len = 1;
|
||||
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
|
||||
|
@ -227,13 +232,17 @@ virtio_disk_rw(struct buf *b, int write)
|
|||
b->disk = 1;
|
||||
disk.info[idx[0]].b = b;
|
||||
|
||||
// avail[0] is flags
|
||||
// avail[1] tells the device how far to look in avail[2...].
|
||||
// avail[2...] are desc[] indices the device should process.
|
||||
// avail[0] is flags (always zero)
|
||||
// avail[1] is an index into avail[2...] telling where we'll write next
|
||||
// avail[2...] is a ring of NUM indices the device should process
|
||||
// we only tell device the first index in our chain of descriptors.
|
||||
disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
|
||||
|
||||
__sync_synchronize();
|
||||
|
||||
disk.avail[1] = disk.avail[1] + 1; // not % NUM ...
|
||||
|
||||
__sync_synchronize();
|
||||
disk.avail[1] = disk.avail[1] + 1;
|
||||
|
||||
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
|
||||
|
||||
|
@ -253,18 +262,26 @@ virtio_disk_intr()
|
|||
{
|
||||
acquire(&disk.vdisk_lock);
|
||||
|
||||
while((disk.used_idx % NUM) != (disk.used->id % NUM)){
|
||||
int id = disk.used->elems[disk.used_idx].id;
|
||||
// this ack may race with the device writing new notifications to
|
||||
// the "used" ring, in which case we may get an interrupt we don't
|
||||
// need, which is harmless.
|
||||
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
|
||||
|
||||
__sync_synchronize();
|
||||
|
||||
while(disk.used_idx != disk.used->id){
|
||||
__sync_synchronize();
|
||||
int id = disk.used->elems[disk.used_idx % NUM].id;
|
||||
|
||||
if(disk.info[id].status != 0)
|
||||
panic("virtio_disk_intr status");
|
||||
|
||||
disk.info[id].b->disk = 0; // disk is done with buf
|
||||
wakeup(disk.info[id].b);
|
||||
|
||||
disk.used_idx = (disk.used_idx + 1) % NUM;
|
||||
struct buf *b = disk.info[id].b;
|
||||
b->disk = 0; // disk is done with buf
|
||||
wakeup(b);
|
||||
|
||||
disk.used_idx += 1;
|
||||
}
|
||||
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
|
||||
|
||||
release(&disk.vdisk_lock);
|
||||
}
|
||||
|
|
|
@ -1734,6 +1734,7 @@ void
|
|||
manywrites(char *s)
|
||||
{
|
||||
int nchildren = 4;
|
||||
int howmany = 30; // increase to look for deadlock
|
||||
|
||||
for(int ci = 0; ci < nchildren; ci++){
|
||||
int pid = fork();
|
||||
|
@ -1749,7 +1750,7 @@ manywrites(char *s)
|
|||
name[2] = '\0';
|
||||
unlink(name);
|
||||
|
||||
for(int iters = 0; iters < 500000; iters++){
|
||||
for(int iters = 0; iters < howmany; iters++){
|
||||
for(int i = 0; i < ci+1; i++){
|
||||
int fd = open(name, O_CREATE | O_RDWR);
|
||||
if(fd < 0){
|
||||
|
@ -1765,8 +1766,6 @@ manywrites(char *s)
|
|||
close(fd);
|
||||
}
|
||||
unlink(name);
|
||||
if((iters % 50) == ci)
|
||||
write(1, ".", 1);
|
||||
}
|
||||
|
||||
unlink(name);
|
||||
|
@ -2737,7 +2736,7 @@ main(int argc, char *argv[])
|
|||
void (*f)(char *);
|
||||
char *s;
|
||||
} tests[] = {
|
||||
// {manywrites, "manywrites"},
|
||||
{manywrites, "manywrites"},
|
||||
{execout, "execout"},
|
||||
{copyin, "copyin"},
|
||||
{copyout, "copyout"},
|
||||
|
|
Loading…
Reference in a new issue