From de9d72c9086ec935d5b2b889f50ff611135f80fa Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 13 Jun 2019 06:49:02 -0400 Subject: [PATCH] virtio disk driver --- Makefile | 4 +- kernel/bio.c | 6 +- kernel/defs.h | 5 + kernel/kalloc.c | 1 + kernel/kernel.ld | 1 + kernel/main.c | 3 +- kernel/memlayout.h | 6 +- kernel/plic.c | 7 +- kernel/trap.c | 2 + kernel/virtio.h | 59 ++++++++++ kernel/virtio_disk.c | 268 +++++++++++++++++++++++++++++++++++++++++++ kernel/vm.c | 4 + 12 files changed, 357 insertions(+), 9 deletions(-) create mode 100644 kernel/virtio.h create mode 100644 kernel/virtio_disk.c diff --git a/Makefile b/Makefile index 545f28c..7580ad5 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,8 @@ OBJS = \ $K/exec.o \ $K/sysfile.o \ $K/kernelvec.o \ - $K/plic.o + $K/plic.o \ + $K/virtio_disk.o # riscv64-unknown-elf- or riscv64-linux-gnu- # perhaps in /opt/riscv/bin @@ -163,6 +164,7 @@ CPUS := 3 endif QEMUOPTS = -machine virt -kernel $K/kernel -m 3G -smp $(CPUS) -nographic QEMUOPTS += -initrd fs.img +QEMUOPTS += -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 qemu: $K/kernel fs.img $(QEMU) $(QEMUOPTS) diff --git a/kernel/bio.c b/kernel/bio.c index 90f9af9..07ea030 100644 --- a/kernel/bio.c +++ b/kernel/bio.c @@ -101,7 +101,8 @@ bread(uint dev, uint blockno) b = bget(dev, blockno); if((b->flags & B_VALID) == 0) { - ramdiskrw(b); + //ramdiskrw(b); + virtio_disk_rw(b); } return b; } @@ -113,7 +114,8 @@ bwrite(struct buf *b) if(!holdingsleep(&b->lock)) panic("bwrite"); b->flags |= B_DIRTY; - ramdiskrw(b); + //ramdiskrw(b); + virtio_disk_rw(b); } // Release a locked buffer. diff --git a/kernel/defs.h b/kernel/defs.h index 597e5b6..1b397fe 100644 --- a/kernel/defs.h +++ b/kernel/defs.h @@ -201,5 +201,10 @@ uint64 plic_pending(void); int plic_claim(void); void plic_complete(int); +// virtio_disk.c +void virtio_disk_init(void); +void virtio_disk_rw(struct buf *); +void virtio_disk_intr(); + // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/kernel/kalloc.c b/kernel/kalloc.c index 1ed1c49..afadb02 100644 --- a/kernel/kalloc.c +++ b/kernel/kalloc.c @@ -35,6 +35,7 @@ freerange(void *pa_start, void *pa_end) { char *p; p = (char*)PGROUNDUP((uint64)pa_start); + p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol. for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) kfree(p); } diff --git a/kernel/kernel.ld b/kernel/kernel.ld index 53c9b90..dec8e4f 100644 --- a/kernel/kernel.ld +++ b/kernel/kernel.ld @@ -28,4 +28,5 @@ SECTIONS *(.bss) PROVIDE(end = .); } + } diff --git a/kernel/main.c b/kernel/main.c index 2168b9f..d44c82c 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -26,7 +26,8 @@ main() plicinithart(); // ask PLIC for device interrupts binit(); // buffer cache fileinit(); // file table - ramdiskinit(); // disk + virtio_disk_init(); // emulated hard disk + ramdiskinit(); // in-memory disk userinit(); // first user process started = 1; } else { diff --git a/kernel/memlayout.h b/kernel/memlayout.h index 462986c..6d86166 100644 --- a/kernel/memlayout.h +++ b/kernel/memlayout.h @@ -6,7 +6,8 @@ // 00001000 -- boot ROM, provided by qemu // 02000000 -- CLINT // 0C000000 -- PLIC -// 10000000 -- uart0 registers +// 10000000 -- uart0 +// 10001000 -- virtio disk // 80000000 -- boot ROM jumps here in machine mode // -kernel loads the kernel here // 88000000 -- -initrd fs.img ramdisk image. @@ -21,6 +22,9 @@ #define UART0 0x10000000L #define UART0_IRQ 10 +#define VIRTIO 0x10001000 +#define VIRTIO_IRQ 1 // really the first of 8 units + // local interrupt controller, which contains the timer. #define CLINT 0x2000000L #define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) diff --git a/kernel/plic.c b/kernel/plic.c index 0f19ab0..cc9a97e 100644 --- a/kernel/plic.c +++ b/kernel/plic.c @@ -11,8 +11,9 @@ void plicinit(void) { - // set uart's priority to be non-zero (otherwise disabled). + // set desired IRQ priorities non-zero (otherwise disabled). *(uint32*)(PLIC + UART0_IRQ*4) = 1; + *(uint32*)(PLIC + VIRTIO_IRQ*4) = 1; } void @@ -21,11 +22,9 @@ plicinithart(void) int hart = cpuid(); // set uart's enable bit for this hart's S-mode. - //*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ); - *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ); + *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO_IRQ); // set this hart's S-mode priority threshold to 0. - //*(uint32*)(PLIC + 0x201000) = 0; *(uint32*)PLIC_SPRIORITY(hart) = 0; } diff --git a/kernel/trap.c b/kernel/trap.c index 050a94d..13ad362 100644 --- a/kernel/trap.c +++ b/kernel/trap.c @@ -159,6 +159,8 @@ devintr() if(irq == UART0_IRQ){ uartintr(); + } else if(irq == VIRTIO_IRQ){ + virtio_disk_intr(); } plic_complete(irq); diff --git a/kernel/virtio.h b/kernel/virtio.h new file mode 100644 index 0000000..258d107 --- /dev/null +++ b/kernel/virtio.h @@ -0,0 +1,59 @@ +// +// virtio device definitions. +// for both the mmio interface, and virtio descriptors. +// only tested with qemu. +// this is the "legacy" virtio interface. +// + +// virtio mmio control registers, mapped starting at 0x10001000. +// from qemu virtio_mmio.h +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976 +#define VIRTIO_MMIO_VERSION 0x004 // 1 -- version, 1 is legacy +#define VIRTIO_MMIO_DEVICE_ID 0x008 // 2 -- block device type +#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551 +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 +#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 // page size for PFN, write-only +#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only +#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only +#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c // used ring alignment, write-only +#define VIRTIO_MMIO_QUEUE_PFN 0x040 // physical page number for queue, read/write +#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only +#define VIRTIO_MMIO_STATUS 0x070 // read/write + +// status register bits, from qemu virtio_config.h +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +#define VIRTIO_CONFIG_S_DRIVER 2 +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +#define VIRTIO_CONFIG_S_FEATURES_OK 8 + +// device feature bits +#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_F_ANY_LAYOUT 27 +#define VIRTIO_RING_F_INDIRECT_DESC 28 +#define VIRTIO_RING_F_EVENT_IDX 29 + +struct VRingDesc { + uint64 addr; + uint32 len; + uint16 flags; + uint16 next; +}; +#define VRING_DESC_F_NEXT 1 +#define VRING_DESC_F_WRITE 2 // device writes (vs read) + +struct VRingUsedElem { + uint32 id; // index of start of completed descriptor chain + uint32 len; +}; + +// for disk ops +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c new file mode 100644 index 0000000..558d3b0 --- /dev/null +++ b/kernel/virtio_disk.c @@ -0,0 +1,268 @@ +// +// driver for qemu's virtio disk device. +// uses qemu's mmio interface to virtio. +// qemu presents a "legacy" virtio interface. +// +// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "virtio.h" + +// the address of a virtio mmio register. +#define R(off) ((volatile uint32 *)(VIRTIO + (off))) + +struct spinlock virtio_disk_lock; + +// this many virtio descriptors. +// must be a power of two. +#define NUM 8 + +// memory for virtio descriptors &c for queue 0. +// this is a global instead of allocated because it has +// to be multiple contiguous pages, which kalloc() +// doesn't support. +__attribute__ ((aligned (PGSIZE))) +static char pages[2*PGSIZE]; +static struct VRingDesc *desc; +static uint16 *avail; +static char *used; + +// our own book-keeping. +static char free[NUM]; // is a descriptor free? +static uint16 used_idx; // we've looked this far in used[2..NUM]. + +// track info about in-flight operations, +// for use when completion interrupt arrives. +// indexed by first descriptor index of chain. +static struct { + struct buf *b; +} info[NUM]; + +void +virtio_disk_init(void) +{ + uint32 status = 0; + + initlock(&virtio_disk_lock, "virtio_disk"); + + // qemu's virtio-mmio.c + + if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || + *R(VIRTIO_MMIO_VERSION) != 1 || + *R(VIRTIO_MMIO_DEVICE_ID) != 2 || + *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ + panic("could not find virtio disk"); + } + + status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; + *R(VIRTIO_MMIO_STATUS) = status; + + status |= VIRTIO_CONFIG_S_DRIVER; + *R(VIRTIO_MMIO_STATUS) = status; + + // negotiate features + uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); + features &= ~(1 << VIRTIO_BLK_F_RO); + features &= ~(1 << VIRTIO_BLK_F_SCSI); + features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); + features &= ~(1 << VIRTIO_BLK_F_MQ); + features &= ~(1 << VIRTIO_F_ANY_LAYOUT); + features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); + features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); + *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; + + // tell device that feature negotiation is complete. + status |= VIRTIO_CONFIG_S_FEATURES_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + // tell device we're completely ready. + status |= VIRTIO_CONFIG_S_DRIVER_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE; + + // qemu's hw/virtio/virtio.c + + // initialize queue 0 + *R(VIRTIO_MMIO_QUEUE_SEL) = 0; + uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); + if(max == 0) + panic("virtio disk has no queue 0"); + if(max < NUM) + panic("virtio disk max queue too short"); + *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; + memset(pages, 0, sizeof(pages)); + *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)pages) >> PGSHIFT; + + // desc = pages -- num * VRingDesc + // avail = pages + 0x40 -- 2 * uint16, then num * uint16 + // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem + + desc = (struct VRingDesc *) pages; + avail = (uint16*)(((char*)desc) + NUM*sizeof(struct VRingDesc)); + used = pages + PGSIZE; + + for(int i = 0; i < NUM; i++) + free[i] = 1; +} + +// find a free descriptor, mark it non-free, return its index. +static int +alloc_desc() +{ + for(int i = 0; i < NUM; i++){ + if(free[i]){ + free[i] = 0; + return i; + } + } + return -1; +} + +void +free_desc(int i) +{ + if(i >= NUM) + panic("virtio_disk_intr 1"); + if(free[i]) + panic("virtio_disk_intr 2"); + free[i] = 1; +} + +void +virtio_disk_rw(struct buf *b) +{ + uint64 sector = b->blockno * (BSIZE / 512); + + acquire(&virtio_disk_lock); + + // the spec says that legacy block operations always use three + // descriptors: one for type/reserved/sector, one for + // the data, one for a 1-byte status result. + + // allocate the three descriptors. + int idx[3]; + while(1){ + int done = 1; + for(int i = 0; i < 3; i++){ + idx[i] = alloc_desc(); + if(idx[i] < 0){ + for(int j = 0; j < i; j++) + free_desc(idx[j]); + wakeup(&free[0]); + done = 0; + break; + } + } + if(done) + break; + sleep(&free[0], &virtio_disk_lock); + } + + // format the three descriptors. + // qemu's virtio-blk.c reads them. + + struct virtio_blk_outhdr { + uint32 type; + uint32 reserved; + uint64 sector; + } buf0; + + if(b->flags & B_DIRTY) + buf0.type = VIRTIO_BLK_T_OUT; // write the disk + else + buf0.type = VIRTIO_BLK_T_IN; // read the disk + buf0.reserved = 0; + buf0.sector = sector; + + desc[idx[0]].addr = (uint64) &buf0; + desc[idx[0]].len = sizeof(buf0); + desc[idx[0]].flags = VRING_DESC_F_NEXT; + desc[idx[0]].next = idx[1]; + + desc[idx[1]].addr = (uint64) b->data; + desc[idx[1]].len = BSIZE; + if(b->flags & B_DIRTY) + desc[idx[1]].flags = 0; // device reads b->data + else + desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data + desc[idx[1]].flags |= VRING_DESC_F_NEXT; + desc[idx[1]].next = idx[2]; + + char status = 0; + desc[idx[2]].addr = (uint64) &status; + desc[idx[2]].len = 1; + desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status + desc[idx[2]].next = 0; + + // record struct buf for virtio_disk_intr(). + info[idx[0]].b = b; + + // avail[0] is flags + // avail[1] tells the device how far to look in avail[2...]. + // avail[2...] are desc[] indices the device should process. + // we only tell device the first index in our chain of descriptors. + avail[2 + (avail[1] % NUM)] = idx[0]; + __sync_synchronize(); + avail[1] = avail[1] + 1; + + *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number + + // Wait for virtio_disk_intr() to say request has finished. + while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ + sleep(b, &virtio_disk_lock); + } + + release(&virtio_disk_lock); +} + +void +virtio_disk_intr() +{ + // the used area is: + // uint16 flags + // uint16 idx + // array of VRingUsedElem + + // XXX spec says to read INTERRUPT_STATUS and + // write INTERRUPT_ACK + + acquire(&virtio_disk_lock); + + while((used_idx % NUM) != (*(volatile uint16 *)(used+2) % NUM)){ + struct VRingUsedElem *ue = (struct VRingUsedElem *) (used + 4 + 8*used_idx); + + // XXX check the one-byte status in the 3rd descriptor. + + info[ue->id].b->flags |= B_VALID; + info[ue->id].b->flags &= ~B_DIRTY; + + wakeup(info[ue->id].b); + + info[ue->id].b = 0; + + uint i = ue->id; + while(1){ + desc[i].addr = 0; + free_desc(i); + if(desc[i].flags & VRING_DESC_F_NEXT) + i = desc[i].next; + else + break; + } + wakeup(&free[0]); + + used_idx = (used_idx + 1) % NUM; + } + + release(&virtio_disk_lock); +} diff --git a/kernel/vm.c b/kernel/vm.c index 0ea6bca..0d0a9d9 100644 --- a/kernel/vm.c +++ b/kernel/vm.c @@ -30,6 +30,10 @@ kvminit() mappages(kernel_pagetable, UART0, PGSIZE, UART0, PTE_R | PTE_W); + // virtio disk interface + mappages(kernel_pagetable, VIRTIO, PGSIZE, + VIRTIO, PTE_R | PTE_W); + // CLINT mappages(kernel_pagetable, CLINT, 0x10000, CLINT, PTE_R | PTE_W);