316 lines
8.5 KiB
C
316 lines
8.5 KiB
C
//
|
|
// driver for qemu's virtio disk device.
|
|
// uses qemu's mmio interface to virtio.
|
|
// qemu presents a "legacy" virtio interface.
|
|
//
|
|
// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
|
|
//
|
|
|
|
#include "types.h"
|
|
#include "riscv.h"
|
|
#include "defs.h"
|
|
#include "param.h"
|
|
#include "memlayout.h"
|
|
#include "spinlock.h"
|
|
#include "sleeplock.h"
|
|
#include "fs.h"
|
|
#include "buf.h"
|
|
#include "virtio.h"
|
|
|
|
// the address of virtio mmio register r.
|
|
#define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
|
|
|
|
static struct disk {
|
|
// the virtio driver and device mostly communicate through a set of
|
|
// structures in RAM. pages[] allocates that memory. pages[] is a
|
|
// global (instead of calls to kalloc()) because it must consist of
|
|
// two contiguous pages of page-aligned physical memory.
|
|
char pages[2*PGSIZE];
|
|
|
|
// pages[] is divided into three regions (descriptors, avail, and
|
|
// used), as explained in Section 2.6 of the virtio specification
|
|
// for the legacy interface.
|
|
// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf
|
|
|
|
// the first region of pages[] is a set (not a ring) of DMA
|
|
// descriptors, with which the driver tells the device where to read
|
|
// and write individual disk operations. there are NUM descriptors.
|
|
// most commands consist of a "chain" (a linked list) of a couple of
|
|
// these descriptors.
|
|
// points into pages[].
|
|
struct virtq_desc *desc;
|
|
|
|
// next is a ring in which the driver writes descriptor numbers
|
|
// that the driver would like the device to process. it only
|
|
// includes the head descriptor of each chain. the ring has
|
|
// NUM elements.
|
|
// points into pages[].
|
|
struct virtq_avail *avail;
|
|
|
|
// finally a ring in which the device writes descriptor numbers that
|
|
// the device has finished processing (just the head of each chain).
|
|
// there are NUM used ring entries.
|
|
// points into pages[].
|
|
struct virtq_used *used;
|
|
|
|
// our own book-keeping.
|
|
char free[NUM]; // is a descriptor free?
|
|
uint16 used_idx; // we've looked this far in used[2..NUM].
|
|
|
|
// track info about in-flight operations,
|
|
// for use when completion interrupt arrives.
|
|
// indexed by first descriptor index of chain.
|
|
struct {
|
|
struct buf *b;
|
|
char status;
|
|
} info[NUM];
|
|
|
|
// disk command headers.
|
|
// one-for-one with descriptors, for convenience.
|
|
struct virtio_blk_req ops[NUM];
|
|
|
|
struct spinlock vdisk_lock;
|
|
|
|
} __attribute__ ((aligned (PGSIZE))) disk;
|
|
|
|
void
|
|
virtio_disk_init(void)
|
|
{
|
|
uint32 status = 0;
|
|
|
|
initlock(&disk.vdisk_lock, "virtio_disk");
|
|
|
|
if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
|
|
*R(VIRTIO_MMIO_VERSION) != 1 ||
|
|
*R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
|
|
*R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
|
|
panic("could not find virtio disk");
|
|
}
|
|
|
|
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
|
|
*R(VIRTIO_MMIO_STATUS) = status;
|
|
|
|
status |= VIRTIO_CONFIG_S_DRIVER;
|
|
*R(VIRTIO_MMIO_STATUS) = status;
|
|
|
|
// negotiate features
|
|
uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
|
|
features &= ~(1 << VIRTIO_BLK_F_RO);
|
|
features &= ~(1 << VIRTIO_BLK_F_SCSI);
|
|
features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
|
|
features &= ~(1 << VIRTIO_BLK_F_MQ);
|
|
features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
|
|
features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
|
|
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
|
|
*R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
|
|
|
|
// tell device that feature negotiation is complete.
|
|
status |= VIRTIO_CONFIG_S_FEATURES_OK;
|
|
*R(VIRTIO_MMIO_STATUS) = status;
|
|
|
|
// tell device we're completely ready.
|
|
status |= VIRTIO_CONFIG_S_DRIVER_OK;
|
|
*R(VIRTIO_MMIO_STATUS) = status;
|
|
|
|
*R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
|
|
|
|
// initialize queue 0.
|
|
*R(VIRTIO_MMIO_QUEUE_SEL) = 0;
|
|
uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
|
|
if(max == 0)
|
|
panic("virtio disk has no queue 0");
|
|
if(max < NUM)
|
|
panic("virtio disk max queue too short");
|
|
*R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
|
|
memset(disk.pages, 0, sizeof(disk.pages));
|
|
*R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT;
|
|
|
|
// desc = pages -- num * virtq_desc
|
|
// avail = pages + 0x40 -- 2 * uint16, then num * uint16
|
|
// used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
|
|
|
|
disk.desc = (struct virtq_desc *) disk.pages;
|
|
disk.avail = (struct virtq_avail *)(disk.pages + NUM*sizeof(struct virtq_desc));
|
|
disk.used = (struct virtq_used *) (disk.pages + PGSIZE);
|
|
|
|
// all NUM descriptors start out unused.
|
|
for(int i = 0; i < NUM; i++)
|
|
disk.free[i] = 1;
|
|
|
|
// plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
|
|
}
|
|
|
|
// find a free descriptor, mark it non-free, return its index.
|
|
static int
|
|
alloc_desc()
|
|
{
|
|
for(int i = 0; i < NUM; i++){
|
|
if(disk.free[i]){
|
|
disk.free[i] = 0;
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
// mark a descriptor as free.
|
|
static void
|
|
free_desc(int i)
|
|
{
|
|
if(i >= NUM)
|
|
panic("free_desc 1");
|
|
if(disk.free[i])
|
|
panic("free_desc 2");
|
|
disk.desc[i].addr = 0;
|
|
disk.desc[i].len = 0;
|
|
disk.desc[i].flags = 0;
|
|
disk.desc[i].next = 0;
|
|
disk.free[i] = 1;
|
|
wakeup(&disk.free[0]);
|
|
}
|
|
|
|
// free a chain of descriptors.
|
|
static void
|
|
free_chain(int i)
|
|
{
|
|
while(1){
|
|
int flag = disk.desc[i].flags;
|
|
int nxt = disk.desc[i].next;
|
|
free_desc(i);
|
|
if(flag & VRING_DESC_F_NEXT)
|
|
i = nxt;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
// allocate three descriptors (they need not be contiguous).
|
|
// disk transfers always use three descriptors.
|
|
static int
|
|
alloc3_desc(int *idx)
|
|
{
|
|
for(int i = 0; i < 3; i++){
|
|
idx[i] = alloc_desc();
|
|
if(idx[i] < 0){
|
|
for(int j = 0; j < i; j++)
|
|
free_desc(idx[j]);
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
virtio_disk_rw(struct buf *b, int write)
|
|
{
|
|
uint64 sector = b->blockno * (BSIZE / 512);
|
|
|
|
acquire(&disk.vdisk_lock);
|
|
|
|
// the spec's Section 5.2 says that legacy block operations use
|
|
// three descriptors: one for type/reserved/sector, one for the
|
|
// data, one for a 1-byte status result.
|
|
|
|
// allocate the three descriptors.
|
|
int idx[3];
|
|
while(1){
|
|
if(alloc3_desc(idx) == 0) {
|
|
break;
|
|
}
|
|
sleep(&disk.free[0], &disk.vdisk_lock);
|
|
}
|
|
|
|
// format the three descriptors.
|
|
// qemu's virtio-blk.c reads them.
|
|
|
|
struct virtio_blk_req *buf0 = &disk.ops[idx[0]];
|
|
|
|
if(write)
|
|
buf0->type = VIRTIO_BLK_T_OUT; // write the disk
|
|
else
|
|
buf0->type = VIRTIO_BLK_T_IN; // read the disk
|
|
buf0->reserved = 0;
|
|
buf0->sector = sector;
|
|
|
|
disk.desc[idx[0]].addr = (uint64) buf0;
|
|
disk.desc[idx[0]].len = sizeof(struct virtio_blk_req);
|
|
disk.desc[idx[0]].flags = VRING_DESC_F_NEXT;
|
|
disk.desc[idx[0]].next = idx[1];
|
|
|
|
disk.desc[idx[1]].addr = (uint64) b->data;
|
|
disk.desc[idx[1]].len = BSIZE;
|
|
if(write)
|
|
disk.desc[idx[1]].flags = 0; // device reads b->data
|
|
else
|
|
disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
|
|
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
|
|
disk.desc[idx[1]].next = idx[2];
|
|
|
|
disk.info[idx[0]].status = 0xff; // device writes 0 on success
|
|
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
|
|
disk.desc[idx[2]].len = 1;
|
|
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
|
|
disk.desc[idx[2]].next = 0;
|
|
|
|
// record struct buf for virtio_disk_intr().
|
|
b->disk = 1;
|
|
disk.info[idx[0]].b = b;
|
|
|
|
// tell the device the first index in our chain of descriptors.
|
|
disk.avail->ring[disk.avail->idx % NUM] = idx[0];
|
|
|
|
__sync_synchronize();
|
|
|
|
// tell the device another avail ring entry is available.
|
|
disk.avail->idx += 1; // not % NUM ...
|
|
|
|
__sync_synchronize();
|
|
|
|
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
|
|
|
|
// Wait for virtio_disk_intr() to say request has finished.
|
|
while(b->disk == 1) {
|
|
sleep(b, &disk.vdisk_lock);
|
|
}
|
|
|
|
disk.info[idx[0]].b = 0;
|
|
free_chain(idx[0]);
|
|
|
|
release(&disk.vdisk_lock);
|
|
}
|
|
|
|
void
|
|
virtio_disk_intr()
|
|
{
|
|
acquire(&disk.vdisk_lock);
|
|
|
|
// the device won't raise another interrupt until we tell it
|
|
// we've seen this interrupt, which the following line does.
|
|
// this may race with the device writing new entries to
|
|
// the "used" ring, in which case we may process the new
|
|
// completion entries in this interrupt, and have nothing to do
|
|
// in the next interrupt, which is harmless.
|
|
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
|
|
|
|
__sync_synchronize();
|
|
|
|
// the device increments disk.used->idx when it
|
|
// adds an entry to the used ring.
|
|
|
|
while(disk.used_idx != disk.used->idx){
|
|
__sync_synchronize();
|
|
int id = disk.used->ring[disk.used_idx % NUM].id;
|
|
|
|
if(disk.info[id].status != 0)
|
|
panic("virtio_disk_intr status");
|
|
|
|
struct buf *b = disk.info[id].b;
|
|
b->disk = 0; // disk is done with buf
|
|
wakeup(b);
|
|
|
|
disk.used_idx += 1;
|
|
}
|
|
|
|
release(&disk.vdisk_lock);
|
|
}
|