virtio disk driver

This commit is contained in:
Robert Morris 2019-06-13 06:49:02 -04:00
parent 5753553213
commit de9d72c908
12 changed files with 357 additions and 9 deletions

View file

@ -27,7 +27,8 @@ OBJS = \
$K/exec.o \ $K/exec.o \
$K/sysfile.o \ $K/sysfile.o \
$K/kernelvec.o \ $K/kernelvec.o \
$K/plic.o $K/plic.o \
$K/virtio_disk.o
# riscv64-unknown-elf- or riscv64-linux-gnu- # riscv64-unknown-elf- or riscv64-linux-gnu-
# perhaps in /opt/riscv/bin # perhaps in /opt/riscv/bin
@ -163,6 +164,7 @@ CPUS := 3
endif endif
QEMUOPTS = -machine virt -kernel $K/kernel -m 3G -smp $(CPUS) -nographic QEMUOPTS = -machine virt -kernel $K/kernel -m 3G -smp $(CPUS) -nographic
QEMUOPTS += -initrd fs.img QEMUOPTS += -initrd fs.img
QEMUOPTS += -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
qemu: $K/kernel fs.img qemu: $K/kernel fs.img
$(QEMU) $(QEMUOPTS) $(QEMU) $(QEMUOPTS)

View file

@ -101,7 +101,8 @@ bread(uint dev, uint blockno)
b = bget(dev, blockno); b = bget(dev, blockno);
if((b->flags & B_VALID) == 0) { if((b->flags & B_VALID) == 0) {
ramdiskrw(b); //ramdiskrw(b);
virtio_disk_rw(b);
} }
return b; return b;
} }
@ -113,7 +114,8 @@ bwrite(struct buf *b)
if(!holdingsleep(&b->lock)) if(!holdingsleep(&b->lock))
panic("bwrite"); panic("bwrite");
b->flags |= B_DIRTY; b->flags |= B_DIRTY;
ramdiskrw(b); //ramdiskrw(b);
virtio_disk_rw(b);
} }
// Release a locked buffer. // Release a locked buffer.

View file

@ -201,5 +201,10 @@ uint64 plic_pending(void);
int plic_claim(void); int plic_claim(void);
void plic_complete(int); void plic_complete(int);
// virtio_disk.c
void virtio_disk_init(void);
void virtio_disk_rw(struct buf *);
void virtio_disk_intr();
// number of elements in fixed-size array // number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0])) #define NELEM(x) (sizeof(x)/sizeof((x)[0]))

View file

@ -35,6 +35,7 @@ freerange(void *pa_start, void *pa_end)
{ {
char *p; char *p;
p = (char*)PGROUNDUP((uint64)pa_start); p = (char*)PGROUNDUP((uint64)pa_start);
p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol.
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
kfree(p); kfree(p);
} }

View file

@ -28,4 +28,5 @@ SECTIONS
*(.bss) *(.bss)
PROVIDE(end = .); PROVIDE(end = .);
} }
} }

View file

@ -26,7 +26,8 @@ main()
plicinithart(); // ask PLIC for device interrupts plicinithart(); // ask PLIC for device interrupts
binit(); // buffer cache binit(); // buffer cache
fileinit(); // file table fileinit(); // file table
ramdiskinit(); // disk virtio_disk_init(); // emulated hard disk
ramdiskinit(); // in-memory disk
userinit(); // first user process userinit(); // first user process
started = 1; started = 1;
} else { } else {

View file

@ -6,7 +6,8 @@
// 00001000 -- boot ROM, provided by qemu // 00001000 -- boot ROM, provided by qemu
// 02000000 -- CLINT // 02000000 -- CLINT
// 0C000000 -- PLIC // 0C000000 -- PLIC
// 10000000 -- uart0 registers // 10000000 -- uart0
// 10001000 -- virtio disk
// 80000000 -- boot ROM jumps here in machine mode // 80000000 -- boot ROM jumps here in machine mode
// -kernel loads the kernel here // -kernel loads the kernel here
// 88000000 -- -initrd fs.img ramdisk image. // 88000000 -- -initrd fs.img ramdisk image.
@ -21,6 +22,9 @@
#define UART0 0x10000000L #define UART0 0x10000000L
#define UART0_IRQ 10 #define UART0_IRQ 10
#define VIRTIO 0x10001000
#define VIRTIO_IRQ 1 // really the first of 8 units
// local interrupt controller, which contains the timer. // local interrupt controller, which contains the timer.
#define CLINT 0x2000000L #define CLINT 0x2000000L
#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) #define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid))

View file

@ -11,8 +11,9 @@
void void
plicinit(void) plicinit(void)
{ {
// set uart's priority to be non-zero (otherwise disabled). // set desired IRQ priorities non-zero (otherwise disabled).
*(uint32*)(PLIC + UART0_IRQ*4) = 1; *(uint32*)(PLIC + UART0_IRQ*4) = 1;
*(uint32*)(PLIC + VIRTIO_IRQ*4) = 1;
} }
void void
@ -21,11 +22,9 @@ plicinithart(void)
int hart = cpuid(); int hart = cpuid();
// set uart's enable bit for this hart's S-mode. // set uart's enable bit for this hart's S-mode.
//*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ); *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO_IRQ);
*(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ);
// set this hart's S-mode priority threshold to 0. // set this hart's S-mode priority threshold to 0.
//*(uint32*)(PLIC + 0x201000) = 0;
*(uint32*)PLIC_SPRIORITY(hart) = 0; *(uint32*)PLIC_SPRIORITY(hart) = 0;
} }

View file

@ -159,6 +159,8 @@ devintr()
if(irq == UART0_IRQ){ if(irq == UART0_IRQ){
uartintr(); uartintr();
} else if(irq == VIRTIO_IRQ){
virtio_disk_intr();
} }
plic_complete(irq); plic_complete(irq);

59
kernel/virtio.h Normal file
View file

@ -0,0 +1,59 @@
//
// virtio device definitions.
// for both the mmio interface, and virtio descriptors.
// only tested with qemu.
// this is the "legacy" virtio interface.
//
// virtio mmio control registers, mapped starting at 0x10001000.
// from qemu virtio_mmio.h
#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976
#define VIRTIO_MMIO_VERSION 0x004 // 1 -- version, 1 is legacy
#define VIRTIO_MMIO_DEVICE_ID 0x008 // 2 -- block device type
#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551
#define VIRTIO_MMIO_DEVICE_FEATURES 0x010
#define VIRTIO_MMIO_DRIVER_FEATURES 0x020
#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 // page size for PFN, write-only
#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only
#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only
#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only
#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c // used ring alignment, write-only
#define VIRTIO_MMIO_QUEUE_PFN 0x040 // physical page number for queue, read/write
#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit
#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only
#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only
#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only
#define VIRTIO_MMIO_STATUS 0x070 // read/write
// status register bits, from qemu virtio_config.h
#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
#define VIRTIO_CONFIG_S_DRIVER 2
#define VIRTIO_CONFIG_S_DRIVER_OK 4
#define VIRTIO_CONFIG_S_FEATURES_OK 8
// device feature bits
#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
#define VIRTIO_F_ANY_LAYOUT 27
#define VIRTIO_RING_F_INDIRECT_DESC 28
#define VIRTIO_RING_F_EVENT_IDX 29
struct VRingDesc {
uint64 addr;
uint32 len;
uint16 flags;
uint16 next;
};
#define VRING_DESC_F_NEXT 1
#define VRING_DESC_F_WRITE 2 // device writes (vs read)
struct VRingUsedElem {
uint32 id; // index of start of completed descriptor chain
uint32 len;
};
// for disk ops
#define VIRTIO_BLK_T_IN 0
#define VIRTIO_BLK_T_OUT 1

268
kernel/virtio_disk.c Normal file
View file

@ -0,0 +1,268 @@
//
// driver for qemu's virtio disk device.
// uses qemu's mmio interface to virtio.
// qemu presents a "legacy" virtio interface.
//
// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
//
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "buf.h"
#include "virtio.h"
// the address of a virtio mmio register.
#define R(off) ((volatile uint32 *)(VIRTIO + (off)))
struct spinlock virtio_disk_lock;
// this many virtio descriptors.
// must be a power of two.
#define NUM 8
// memory for virtio descriptors &c for queue 0.
// this is a global instead of allocated because it has
// to be multiple contiguous pages, which kalloc()
// doesn't support.
__attribute__ ((aligned (PGSIZE)))
static char pages[2*PGSIZE];
static struct VRingDesc *desc;
static uint16 *avail;
static char *used;
// our own book-keeping.
static char free[NUM]; // is a descriptor free?
static uint16 used_idx; // we've looked this far in used[2..NUM].
// track info about in-flight operations,
// for use when completion interrupt arrives.
// indexed by first descriptor index of chain.
static struct {
struct buf *b;
} info[NUM];
void
virtio_disk_init(void)
{
uint32 status = 0;
initlock(&virtio_disk_lock, "virtio_disk");
// qemu's virtio-mmio.c
if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
*R(VIRTIO_MMIO_VERSION) != 1 ||
*R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
*R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
panic("could not find virtio disk");
}
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
*R(VIRTIO_MMIO_STATUS) = status;
status |= VIRTIO_CONFIG_S_DRIVER;
*R(VIRTIO_MMIO_STATUS) = status;
// negotiate features
uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
features &= ~(1 << VIRTIO_BLK_F_RO);
features &= ~(1 << VIRTIO_BLK_F_SCSI);
features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
features &= ~(1 << VIRTIO_BLK_F_MQ);
features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
*R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
// tell device that feature negotiation is complete.
status |= VIRTIO_CONFIG_S_FEATURES_OK;
*R(VIRTIO_MMIO_STATUS) = status;
// tell device we're completely ready.
status |= VIRTIO_CONFIG_S_DRIVER_OK;
*R(VIRTIO_MMIO_STATUS) = status;
*R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
// qemu's hw/virtio/virtio.c
// initialize queue 0
*R(VIRTIO_MMIO_QUEUE_SEL) = 0;
uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
if(max == 0)
panic("virtio disk has no queue 0");
if(max < NUM)
panic("virtio disk max queue too short");
*R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
memset(pages, 0, sizeof(pages));
*R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)pages) >> PGSHIFT;
// desc = pages -- num * VRingDesc
// avail = pages + 0x40 -- 2 * uint16, then num * uint16
// used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
desc = (struct VRingDesc *) pages;
avail = (uint16*)(((char*)desc) + NUM*sizeof(struct VRingDesc));
used = pages + PGSIZE;
for(int i = 0; i < NUM; i++)
free[i] = 1;
}
// find a free descriptor, mark it non-free, return its index.
static int
alloc_desc()
{
for(int i = 0; i < NUM; i++){
if(free[i]){
free[i] = 0;
return i;
}
}
return -1;
}
void
free_desc(int i)
{
if(i >= NUM)
panic("virtio_disk_intr 1");
if(free[i])
panic("virtio_disk_intr 2");
free[i] = 1;
}
void
virtio_disk_rw(struct buf *b)
{
uint64 sector = b->blockno * (BSIZE / 512);
acquire(&virtio_disk_lock);
// the spec says that legacy block operations always use three
// descriptors: one for type/reserved/sector, one for
// the data, one for a 1-byte status result.
// allocate the three descriptors.
int idx[3];
while(1){
int done = 1;
for(int i = 0; i < 3; i++){
idx[i] = alloc_desc();
if(idx[i] < 0){
for(int j = 0; j < i; j++)
free_desc(idx[j]);
wakeup(&free[0]);
done = 0;
break;
}
}
if(done)
break;
sleep(&free[0], &virtio_disk_lock);
}
// format the three descriptors.
// qemu's virtio-blk.c reads them.
struct virtio_blk_outhdr {
uint32 type;
uint32 reserved;
uint64 sector;
} buf0;
if(b->flags & B_DIRTY)
buf0.type = VIRTIO_BLK_T_OUT; // write the disk
else
buf0.type = VIRTIO_BLK_T_IN; // read the disk
buf0.reserved = 0;
buf0.sector = sector;
desc[idx[0]].addr = (uint64) &buf0;
desc[idx[0]].len = sizeof(buf0);
desc[idx[0]].flags = VRING_DESC_F_NEXT;
desc[idx[0]].next = idx[1];
desc[idx[1]].addr = (uint64) b->data;
desc[idx[1]].len = BSIZE;
if(b->flags & B_DIRTY)
desc[idx[1]].flags = 0; // device reads b->data
else
desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
desc[idx[1]].flags |= VRING_DESC_F_NEXT;
desc[idx[1]].next = idx[2];
char status = 0;
desc[idx[2]].addr = (uint64) &status;
desc[idx[2]].len = 1;
desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
desc[idx[2]].next = 0;
// record struct buf for virtio_disk_intr().
info[idx[0]].b = b;
// avail[0] is flags
// avail[1] tells the device how far to look in avail[2...].
// avail[2...] are desc[] indices the device should process.
// we only tell device the first index in our chain of descriptors.
avail[2 + (avail[1] % NUM)] = idx[0];
__sync_synchronize();
avail[1] = avail[1] + 1;
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
// Wait for virtio_disk_intr() to say request has finished.
while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){
sleep(b, &virtio_disk_lock);
}
release(&virtio_disk_lock);
}
void
virtio_disk_intr()
{
// the used area is:
// uint16 flags
// uint16 idx
// array of VRingUsedElem
// XXX spec says to read INTERRUPT_STATUS and
// write INTERRUPT_ACK
acquire(&virtio_disk_lock);
while((used_idx % NUM) != (*(volatile uint16 *)(used+2) % NUM)){
struct VRingUsedElem *ue = (struct VRingUsedElem *) (used + 4 + 8*used_idx);
// XXX check the one-byte status in the 3rd descriptor.
info[ue->id].b->flags |= B_VALID;
info[ue->id].b->flags &= ~B_DIRTY;
wakeup(info[ue->id].b);
info[ue->id].b = 0;
uint i = ue->id;
while(1){
desc[i].addr = 0;
free_desc(i);
if(desc[i].flags & VRING_DESC_F_NEXT)
i = desc[i].next;
else
break;
}
wakeup(&free[0]);
used_idx = (used_idx + 1) % NUM;
}
release(&virtio_disk_lock);
}

View file

@ -30,6 +30,10 @@ kvminit()
mappages(kernel_pagetable, UART0, PGSIZE, mappages(kernel_pagetable, UART0, PGSIZE,
UART0, PTE_R | PTE_W); UART0, PTE_R | PTE_W);
// virtio disk interface
mappages(kernel_pagetable, VIRTIO, PGSIZE,
VIRTIO, PTE_R | PTE_W);
// CLINT // CLINT
mappages(kernel_pagetable, CLINT, 0x10000, mappages(kernel_pagetable, CLINT, 0x10000,
CLINT, PTE_R | PTE_W); CLINT, PTE_R | PTE_W);