virtio disk driver
This commit is contained in:
parent
5753553213
commit
de9d72c908
4
Makefile
4
Makefile
|
@ -27,7 +27,8 @@ OBJS = \
|
|||
$K/exec.o \
|
||||
$K/sysfile.o \
|
||||
$K/kernelvec.o \
|
||||
$K/plic.o
|
||||
$K/plic.o \
|
||||
$K/virtio_disk.o
|
||||
|
||||
# riscv64-unknown-elf- or riscv64-linux-gnu-
|
||||
# perhaps in /opt/riscv/bin
|
||||
|
@ -163,6 +164,7 @@ CPUS := 3
|
|||
endif
|
||||
QEMUOPTS = -machine virt -kernel $K/kernel -m 3G -smp $(CPUS) -nographic
|
||||
QEMUOPTS += -initrd fs.img
|
||||
QEMUOPTS += -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
|
||||
|
||||
qemu: $K/kernel fs.img
|
||||
$(QEMU) $(QEMUOPTS)
|
||||
|
|
|
@ -101,7 +101,8 @@ bread(uint dev, uint blockno)
|
|||
|
||||
b = bget(dev, blockno);
|
||||
if((b->flags & B_VALID) == 0) {
|
||||
ramdiskrw(b);
|
||||
//ramdiskrw(b);
|
||||
virtio_disk_rw(b);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
@ -113,7 +114,8 @@ bwrite(struct buf *b)
|
|||
if(!holdingsleep(&b->lock))
|
||||
panic("bwrite");
|
||||
b->flags |= B_DIRTY;
|
||||
ramdiskrw(b);
|
||||
//ramdiskrw(b);
|
||||
virtio_disk_rw(b);
|
||||
}
|
||||
|
||||
// Release a locked buffer.
|
||||
|
|
|
@ -201,5 +201,10 @@ uint64 plic_pending(void);
|
|||
int plic_claim(void);
|
||||
void plic_complete(int);
|
||||
|
||||
// virtio_disk.c
|
||||
void virtio_disk_init(void);
|
||||
void virtio_disk_rw(struct buf *);
|
||||
void virtio_disk_intr();
|
||||
|
||||
// number of elements in fixed-size array
|
||||
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))
|
||||
|
|
|
@ -35,6 +35,7 @@ freerange(void *pa_start, void *pa_end)
|
|||
{
|
||||
char *p;
|
||||
p = (char*)PGROUNDUP((uint64)pa_start);
|
||||
p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol.
|
||||
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
|
||||
kfree(p);
|
||||
}
|
||||
|
|
|
@ -28,4 +28,5 @@ SECTIONS
|
|||
*(.bss)
|
||||
PROVIDE(end = .);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -26,7 +26,8 @@ main()
|
|||
plicinithart(); // ask PLIC for device interrupts
|
||||
binit(); // buffer cache
|
||||
fileinit(); // file table
|
||||
ramdiskinit(); // disk
|
||||
virtio_disk_init(); // emulated hard disk
|
||||
ramdiskinit(); // in-memory disk
|
||||
userinit(); // first user process
|
||||
started = 1;
|
||||
} else {
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
// 00001000 -- boot ROM, provided by qemu
|
||||
// 02000000 -- CLINT
|
||||
// 0C000000 -- PLIC
|
||||
// 10000000 -- uart0 registers
|
||||
// 10000000 -- uart0
|
||||
// 10001000 -- virtio disk
|
||||
// 80000000 -- boot ROM jumps here in machine mode
|
||||
// -kernel loads the kernel here
|
||||
// 88000000 -- -initrd fs.img ramdisk image.
|
||||
|
@ -21,6 +22,9 @@
|
|||
#define UART0 0x10000000L
|
||||
#define UART0_IRQ 10
|
||||
|
||||
#define VIRTIO 0x10001000
|
||||
#define VIRTIO_IRQ 1 // really the first of 8 units
|
||||
|
||||
// local interrupt controller, which contains the timer.
|
||||
#define CLINT 0x2000000L
|
||||
#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid))
|
||||
|
|
|
@ -11,8 +11,9 @@
|
|||
void
|
||||
plicinit(void)
|
||||
{
|
||||
// set uart's priority to be non-zero (otherwise disabled).
|
||||
// set desired IRQ priorities non-zero (otherwise disabled).
|
||||
*(uint32*)(PLIC + UART0_IRQ*4) = 1;
|
||||
*(uint32*)(PLIC + VIRTIO_IRQ*4) = 1;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -21,11 +22,9 @@ plicinithart(void)
|
|||
int hart = cpuid();
|
||||
|
||||
// set uart's enable bit for this hart's S-mode.
|
||||
//*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ);
|
||||
*(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ);
|
||||
*(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO_IRQ);
|
||||
|
||||
// set this hart's S-mode priority threshold to 0.
|
||||
//*(uint32*)(PLIC + 0x201000) = 0;
|
||||
*(uint32*)PLIC_SPRIORITY(hart) = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -159,6 +159,8 @@ devintr()
|
|||
|
||||
if(irq == UART0_IRQ){
|
||||
uartintr();
|
||||
} else if(irq == VIRTIO_IRQ){
|
||||
virtio_disk_intr();
|
||||
}
|
||||
|
||||
plic_complete(irq);
|
||||
|
|
59
kernel/virtio.h
Normal file
59
kernel/virtio.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
//
|
||||
// virtio device definitions.
|
||||
// for both the mmio interface, and virtio descriptors.
|
||||
// only tested with qemu.
|
||||
// this is the "legacy" virtio interface.
|
||||
//
|
||||
|
||||
// virtio mmio control registers, mapped starting at 0x10001000.
|
||||
// from qemu virtio_mmio.h
|
||||
#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976
|
||||
#define VIRTIO_MMIO_VERSION 0x004 // 1 -- version, 1 is legacy
|
||||
#define VIRTIO_MMIO_DEVICE_ID 0x008 // 2 -- block device type
|
||||
#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551
|
||||
#define VIRTIO_MMIO_DEVICE_FEATURES 0x010
|
||||
#define VIRTIO_MMIO_DRIVER_FEATURES 0x020
|
||||
#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 // page size for PFN, write-only
|
||||
#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only
|
||||
#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only
|
||||
#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only
|
||||
#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c // used ring alignment, write-only
|
||||
#define VIRTIO_MMIO_QUEUE_PFN 0x040 // physical page number for queue, read/write
|
||||
#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit
|
||||
#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only
|
||||
#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only
|
||||
#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only
|
||||
#define VIRTIO_MMIO_STATUS 0x070 // read/write
|
||||
|
||||
// status register bits, from qemu virtio_config.h
|
||||
#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
|
||||
#define VIRTIO_CONFIG_S_DRIVER 2
|
||||
#define VIRTIO_CONFIG_S_DRIVER_OK 4
|
||||
#define VIRTIO_CONFIG_S_FEATURES_OK 8
|
||||
|
||||
// device feature bits
|
||||
#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
|
||||
#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
|
||||
#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
|
||||
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
|
||||
#define VIRTIO_F_ANY_LAYOUT 27
|
||||
#define VIRTIO_RING_F_INDIRECT_DESC 28
|
||||
#define VIRTIO_RING_F_EVENT_IDX 29
|
||||
|
||||
struct VRingDesc {
|
||||
uint64 addr;
|
||||
uint32 len;
|
||||
uint16 flags;
|
||||
uint16 next;
|
||||
};
|
||||
#define VRING_DESC_F_NEXT 1
|
||||
#define VRING_DESC_F_WRITE 2 // device writes (vs read)
|
||||
|
||||
struct VRingUsedElem {
|
||||
uint32 id; // index of start of completed descriptor chain
|
||||
uint32 len;
|
||||
};
|
||||
|
||||
// for disk ops
|
||||
#define VIRTIO_BLK_T_IN 0
|
||||
#define VIRTIO_BLK_T_OUT 1
|
268
kernel/virtio_disk.c
Normal file
268
kernel/virtio_disk.c
Normal file
|
@ -0,0 +1,268 @@
|
|||
//
|
||||
// driver for qemu's virtio disk device.
|
||||
// uses qemu's mmio interface to virtio.
|
||||
// qemu presents a "legacy" virtio interface.
|
||||
//
|
||||
// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
|
||||
//
|
||||
|
||||
#include "types.h"
|
||||
#include "riscv.h"
|
||||
#include "defs.h"
|
||||
#include "param.h"
|
||||
#include "memlayout.h"
|
||||
#include "spinlock.h"
|
||||
#include "sleeplock.h"
|
||||
#include "fs.h"
|
||||
#include "buf.h"
|
||||
#include "virtio.h"
|
||||
|
||||
// the address of a virtio mmio register.
|
||||
#define R(off) ((volatile uint32 *)(VIRTIO + (off)))
|
||||
|
||||
struct spinlock virtio_disk_lock;
|
||||
|
||||
// this many virtio descriptors.
|
||||
// must be a power of two.
|
||||
#define NUM 8
|
||||
|
||||
// memory for virtio descriptors &c for queue 0.
|
||||
// this is a global instead of allocated because it has
|
||||
// to be multiple contiguous pages, which kalloc()
|
||||
// doesn't support.
|
||||
__attribute__ ((aligned (PGSIZE)))
|
||||
static char pages[2*PGSIZE];
|
||||
static struct VRingDesc *desc;
|
||||
static uint16 *avail;
|
||||
static char *used;
|
||||
|
||||
// our own book-keeping.
|
||||
static char free[NUM]; // is a descriptor free?
|
||||
static uint16 used_idx; // we've looked this far in used[2..NUM].
|
||||
|
||||
// track info about in-flight operations,
|
||||
// for use when completion interrupt arrives.
|
||||
// indexed by first descriptor index of chain.
|
||||
static struct {
|
||||
struct buf *b;
|
||||
} info[NUM];
|
||||
|
||||
void
|
||||
virtio_disk_init(void)
|
||||
{
|
||||
uint32 status = 0;
|
||||
|
||||
initlock(&virtio_disk_lock, "virtio_disk");
|
||||
|
||||
// qemu's virtio-mmio.c
|
||||
|
||||
if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
|
||||
*R(VIRTIO_MMIO_VERSION) != 1 ||
|
||||
*R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
|
||||
*R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
|
||||
panic("could not find virtio disk");
|
||||
}
|
||||
|
||||
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
|
||||
*R(VIRTIO_MMIO_STATUS) = status;
|
||||
|
||||
status |= VIRTIO_CONFIG_S_DRIVER;
|
||||
*R(VIRTIO_MMIO_STATUS) = status;
|
||||
|
||||
// negotiate features
|
||||
uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
|
||||
features &= ~(1 << VIRTIO_BLK_F_RO);
|
||||
features &= ~(1 << VIRTIO_BLK_F_SCSI);
|
||||
features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
|
||||
features &= ~(1 << VIRTIO_BLK_F_MQ);
|
||||
features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
|
||||
features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
|
||||
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
|
||||
*R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
|
||||
|
||||
// tell device that feature negotiation is complete.
|
||||
status |= VIRTIO_CONFIG_S_FEATURES_OK;
|
||||
*R(VIRTIO_MMIO_STATUS) = status;
|
||||
|
||||
// tell device we're completely ready.
|
||||
status |= VIRTIO_CONFIG_S_DRIVER_OK;
|
||||
*R(VIRTIO_MMIO_STATUS) = status;
|
||||
|
||||
*R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
|
||||
|
||||
// qemu's hw/virtio/virtio.c
|
||||
|
||||
// initialize queue 0
|
||||
*R(VIRTIO_MMIO_QUEUE_SEL) = 0;
|
||||
uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
|
||||
if(max == 0)
|
||||
panic("virtio disk has no queue 0");
|
||||
if(max < NUM)
|
||||
panic("virtio disk max queue too short");
|
||||
*R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
|
||||
memset(pages, 0, sizeof(pages));
|
||||
*R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)pages) >> PGSHIFT;
|
||||
|
||||
// desc = pages -- num * VRingDesc
|
||||
// avail = pages + 0x40 -- 2 * uint16, then num * uint16
|
||||
// used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
|
||||
|
||||
desc = (struct VRingDesc *) pages;
|
||||
avail = (uint16*)(((char*)desc) + NUM*sizeof(struct VRingDesc));
|
||||
used = pages + PGSIZE;
|
||||
|
||||
for(int i = 0; i < NUM; i++)
|
||||
free[i] = 1;
|
||||
}
|
||||
|
||||
// find a free descriptor, mark it non-free, return its index.
|
||||
static int
|
||||
alloc_desc()
|
||||
{
|
||||
for(int i = 0; i < NUM; i++){
|
||||
if(free[i]){
|
||||
free[i] = 0;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
free_desc(int i)
|
||||
{
|
||||
if(i >= NUM)
|
||||
panic("virtio_disk_intr 1");
|
||||
if(free[i])
|
||||
panic("virtio_disk_intr 2");
|
||||
free[i] = 1;
|
||||
}
|
||||
|
||||
void
|
||||
virtio_disk_rw(struct buf *b)
|
||||
{
|
||||
uint64 sector = b->blockno * (BSIZE / 512);
|
||||
|
||||
acquire(&virtio_disk_lock);
|
||||
|
||||
// the spec says that legacy block operations always use three
|
||||
// descriptors: one for type/reserved/sector, one for
|
||||
// the data, one for a 1-byte status result.
|
||||
|
||||
// allocate the three descriptors.
|
||||
int idx[3];
|
||||
while(1){
|
||||
int done = 1;
|
||||
for(int i = 0; i < 3; i++){
|
||||
idx[i] = alloc_desc();
|
||||
if(idx[i] < 0){
|
||||
for(int j = 0; j < i; j++)
|
||||
free_desc(idx[j]);
|
||||
wakeup(&free[0]);
|
||||
done = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(done)
|
||||
break;
|
||||
sleep(&free[0], &virtio_disk_lock);
|
||||
}
|
||||
|
||||
// format the three descriptors.
|
||||
// qemu's virtio-blk.c reads them.
|
||||
|
||||
struct virtio_blk_outhdr {
|
||||
uint32 type;
|
||||
uint32 reserved;
|
||||
uint64 sector;
|
||||
} buf0;
|
||||
|
||||
if(b->flags & B_DIRTY)
|
||||
buf0.type = VIRTIO_BLK_T_OUT; // write the disk
|
||||
else
|
||||
buf0.type = VIRTIO_BLK_T_IN; // read the disk
|
||||
buf0.reserved = 0;
|
||||
buf0.sector = sector;
|
||||
|
||||
desc[idx[0]].addr = (uint64) &buf0;
|
||||
desc[idx[0]].len = sizeof(buf0);
|
||||
desc[idx[0]].flags = VRING_DESC_F_NEXT;
|
||||
desc[idx[0]].next = idx[1];
|
||||
|
||||
desc[idx[1]].addr = (uint64) b->data;
|
||||
desc[idx[1]].len = BSIZE;
|
||||
if(b->flags & B_DIRTY)
|
||||
desc[idx[1]].flags = 0; // device reads b->data
|
||||
else
|
||||
desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
|
||||
desc[idx[1]].flags |= VRING_DESC_F_NEXT;
|
||||
desc[idx[1]].next = idx[2];
|
||||
|
||||
char status = 0;
|
||||
desc[idx[2]].addr = (uint64) &status;
|
||||
desc[idx[2]].len = 1;
|
||||
desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
|
||||
desc[idx[2]].next = 0;
|
||||
|
||||
// record struct buf for virtio_disk_intr().
|
||||
info[idx[0]].b = b;
|
||||
|
||||
// avail[0] is flags
|
||||
// avail[1] tells the device how far to look in avail[2...].
|
||||
// avail[2...] are desc[] indices the device should process.
|
||||
// we only tell device the first index in our chain of descriptors.
|
||||
avail[2 + (avail[1] % NUM)] = idx[0];
|
||||
__sync_synchronize();
|
||||
avail[1] = avail[1] + 1;
|
||||
|
||||
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
|
||||
|
||||
// Wait for virtio_disk_intr() to say request has finished.
|
||||
while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){
|
||||
sleep(b, &virtio_disk_lock);
|
||||
}
|
||||
|
||||
release(&virtio_disk_lock);
|
||||
}
|
||||
|
||||
void
|
||||
virtio_disk_intr()
|
||||
{
|
||||
// the used area is:
|
||||
// uint16 flags
|
||||
// uint16 idx
|
||||
// array of VRingUsedElem
|
||||
|
||||
// XXX spec says to read INTERRUPT_STATUS and
|
||||
// write INTERRUPT_ACK
|
||||
|
||||
acquire(&virtio_disk_lock);
|
||||
|
||||
while((used_idx % NUM) != (*(volatile uint16 *)(used+2) % NUM)){
|
||||
struct VRingUsedElem *ue = (struct VRingUsedElem *) (used + 4 + 8*used_idx);
|
||||
|
||||
// XXX check the one-byte status in the 3rd descriptor.
|
||||
|
||||
info[ue->id].b->flags |= B_VALID;
|
||||
info[ue->id].b->flags &= ~B_DIRTY;
|
||||
|
||||
wakeup(info[ue->id].b);
|
||||
|
||||
info[ue->id].b = 0;
|
||||
|
||||
uint i = ue->id;
|
||||
while(1){
|
||||
desc[i].addr = 0;
|
||||
free_desc(i);
|
||||
if(desc[i].flags & VRING_DESC_F_NEXT)
|
||||
i = desc[i].next;
|
||||
else
|
||||
break;
|
||||
}
|
||||
wakeup(&free[0]);
|
||||
|
||||
used_idx = (used_idx + 1) % NUM;
|
||||
}
|
||||
|
||||
release(&virtio_disk_lock);
|
||||
}
|
|
@ -30,6 +30,10 @@ kvminit()
|
|||
mappages(kernel_pagetable, UART0, PGSIZE,
|
||||
UART0, PTE_R | PTE_W);
|
||||
|
||||
// virtio disk interface
|
||||
mappages(kernel_pagetable, VIRTIO, PGSIZE,
|
||||
VIRTIO, PTE_R | PTE_W);
|
||||
|
||||
// CLINT
|
||||
mappages(kernel_pagetable, CLINT, 0x10000,
|
||||
CLINT, PTE_R | PTE_W);
|
||||
|
|
Loading…
Reference in a new issue