improve virtio_disk comments; bring it closer to wording in the spec

This commit is contained in:
Robert Morris 2020-10-05 06:26:58 -04:00 committed by Frans Kaashoek
parent 548ffc97e1
commit 3092fe2c9e
2 changed files with 85 additions and 37 deletions

View file

@ -47,7 +47,8 @@
// must be a power of two.
#define NUM 8
struct VRingDesc {
// a single descriptor, from the spec.
struct virtq_desc {
uint64 addr;
uint32 len;
uint16 flags;
@ -56,17 +57,38 @@ struct VRingDesc {
#define VRING_DESC_F_NEXT 1 // chained with another descriptor
#define VRING_DESC_F_WRITE 2 // device writes (vs read)
struct VRingUsedElem {
// the (entire) avail ring, from the spec.
struct virtq_avail {
uint16 flags; // always zero
uint16 idx; // driver will write ring[idx] next
uint16 ring[NUM]; // descriptor numbers of chain heads
uint16 unused;
};
// one entry in the "used" ring, with which the
// device tells the driver about completed requests.
struct virtq_used_elem {
uint32 id; // index of start of completed descriptor chain
uint32 len;
};
// for disk ops
struct virtq_used {
uint16 flags; // always zero
uint16 idx; // device increments when it adds a ring[] entry
struct virtq_used_elem ring[NUM];
};
// these are specific to virtio block devices, e.g. disks,
// described in Section 5.2 of the spec.
#define VIRTIO_BLK_T_IN 0 // read the disk
#define VIRTIO_BLK_T_OUT 1 // write the disk
struct UsedArea {
uint16 flags;
uint16 id;
struct VRingUsedElem elems[NUM];
// the format of the first descriptor in a disk request.
// to be followed by two more descriptors containing
// the block, and a one-byte status.
struct virtio_blk_req {
uint32 type; // VIRTIO_BLK_T_IN or ..._OUT
uint32 reserved;
uint64 sector;
};

View file

@ -21,14 +21,37 @@
#define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
static struct disk {
// memory for virtio descriptors &c for queue 0.
// this is a global instead of allocated because it must
// be multiple contiguous pages, which kalloc()
// doesn't support, and page aligned.
// the virtio driver and device mostly communicate through a set of
// structures in RAM. pages[] allocates that memory. pages[] is a
// global (instead of calls to kalloc()) because it must consist of
// two contiguous pages of page-aligned physical memory.
char pages[2*PGSIZE];
struct VRingDesc *desc;
uint16 *avail;
struct UsedArea *used;
// pages[] is divided into three regions (descriptors, avail, and
// used), as explained in Section 2.6 of the virtio specification
// for the legacy interface.
// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf
// the first region of pages[] is a set (not a ring) of DMA
// descriptors, with which the driver tells the device where to read
// and write individual disk operations. there are NUM descriptors.
// most commands consist of a "chain" (a linked list) of a couple of
// these descriptors.
// points into pages[].
struct virtq_desc *desc;
// next is a ring in which the driver writes descriptor numbers
// that the driver would like the device to process. it only
// includes the head descriptor of each chain. the ring has
// NUM elements.
// points into pages[].
struct virtq_avail *avail;
// finally a ring in which the device writes descriptor numbers that
// the device has finished processing (just the head of each chain).
// there are NUM used ring entries.
// points into pages[].
struct virtq_used *used;
// our own book-keeping.
char free[NUM]; // is a descriptor free?
@ -98,14 +121,15 @@ virtio_disk_init(void)
memset(disk.pages, 0, sizeof(disk.pages));
*R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT;
// desc = pages -- num * VRingDesc
// desc = pages -- num * virtq_desc
// avail = pages + 0x40 -- 2 * uint16, then num * uint16
// used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
disk.desc = (struct VRingDesc *) disk.pages;
disk.avail = (uint16*)(((char*)disk.desc) + NUM*sizeof(struct VRingDesc));
disk.used = (struct UsedArea *) (disk.pages + PGSIZE);
disk.desc = (struct virtq_desc *) disk.pages;
disk.avail = (struct virtq_avail *)(disk.pages + NUM*sizeof(struct virtq_desc));
disk.used = (struct virtq_used *) (disk.pages + PGSIZE);
// all NUM descriptors start out unused.
for(int i = 0; i < NUM; i++)
disk.free[i] = 1;
@ -156,6 +180,8 @@ free_chain(int i)
}
}
// allocate three descriptors (they need not be contiguous).
// disk transfers always use three descriptors.
static int
alloc3_desc(int *idx)
{
@ -177,9 +203,9 @@ virtio_disk_rw(struct buf *b, int write)
acquire(&disk.vdisk_lock);
// the spec says that legacy block operations use three
// descriptors: one for type/reserved/sector, one for
// the data, one for a 1-byte status result.
// the spec's Section 5.2 says that legacy block operations use
// three descriptors: one for type/reserved/sector, one for the
// data, one for a 1-byte status result.
// allocate the three descriptors.
int idx[3];
@ -193,11 +219,7 @@ virtio_disk_rw(struct buf *b, int write)
// format the three descriptors.
// qemu's virtio-blk.c reads them.
struct virtio_blk_outhdr {
uint32 type;
uint32 reserved;
uint64 sector;
} buf0;
struct virtio_blk_req buf0;
if(write)
buf0.type = VIRTIO_BLK_T_OUT; // write the disk
@ -232,15 +254,13 @@ virtio_disk_rw(struct buf *b, int write)
b->disk = 1;
disk.info[idx[0]].b = b;
// avail[0] is flags (always zero)
// avail[1] is an index into avail[2...] telling where we'll write next
// avail[2...] is a ring of NUM indices the device should process
// we only tell device the first index in our chain of descriptors.
disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
// tell the device the first index in our chain of descriptors.
disk.avail->ring[disk.avail->idx % NUM] = idx[0];
__sync_synchronize();
disk.avail[1] = disk.avail[1] + 1; // not % NUM ...
// tell the device another avail ring entry is available.
disk.avail->idx += 1; // not % NUM ...
__sync_synchronize();
@ -262,16 +282,22 @@ virtio_disk_intr()
{
acquire(&disk.vdisk_lock);
// this ack may race with the device writing new notifications to
// the "used" ring, in which case we may get an interrupt we don't
// need, which is harmless.
// the device won't raise another interrupt until we tell it
// we've seen this interrupt, which the following line does.
// this may race with the device writing new entries to
// the "used" ring, in which case we may process the new
// completion entries in this interrupt, and have nothing to do
// in the next interrupt, which is harmless.
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
__sync_synchronize();
while(disk.used_idx != disk.used->id){
// the device increments disk.used->idx when it
// adds an entry to the used ring.
while(disk.used_idx != disk.used->idx){
__sync_synchronize();
int id = disk.used->elems[disk.used_idx % NUM].id;
int id = disk.used->ring[disk.used_idx % NUM].id;
if(disk.info[id].status != 0)
panic("virtio_disk_intr status");