fork/wait/exit work

This commit is contained in:
Robert Morris 2019-05-31 09:45:59 -04:00
parent 0f90388c89
commit 2ec1959fd1
30 changed files with 1098 additions and 1863 deletions

3
.gdbinit.tmpl-riscv Normal file
View file

@ -0,0 +1,3 @@
set architecture riscv
target remote 127.0.0.1:1234
symbol-file kernel

101
Makefile
View file

@ -1,4 +1,20 @@
OBJS = \ OBJS = \
start.o \
console.o \
uart.o \
kalloc.o \
spinlock.o \
string.o \
main.o \
vm.o \
proc.o \
swtch.o \
trampoline.o \
trap.o \
syscall.o \
sysproc.o
XXXOBJS = \
bio.o\ bio.o\
console.o\ console.o\
exec.o\ exec.o\
@ -28,48 +44,23 @@ OBJS = \
vectors.o\ vectors.o\
vm.o\ vm.o\
# Cross-compiling (e.g., on Mac OS X) # riscv64-unknown-elf- or riscv64-linux-gnu-
# TOOLPREFIX = i386-jos-elf # perhaps in /opt/riscv/bin
# Using native tools (e.g., on X86 Linux)
#TOOLPREFIX = #TOOLPREFIX =
# Try to infer the correct TOOLPREFIX if not set # Try to infer the correct TOOLPREFIX if not set
ifndef TOOLPREFIX ifndef TOOLPREFIX
TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \
then echo 'i386-jos-elf-'; \ then echo 'riscv64-unknown-elf-'; \
elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \
then echo ''; \ then echo 'riscv64-linux-gnu-'; \
else echo "***" 1>&2; \ else echo "***" 1>&2; \
echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ echo "*** Error: Couldn't find an riscv64 version of GCC/binutils." 1>&2; \
echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \
echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \
echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \
echo "*** environment variable to that prefix and run 'make' again." 1>&2; \
echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \
echo "***" 1>&2; exit 1; fi) echo "***" 1>&2; exit 1; fi)
endif endif
# If the makefile can't find QEMU, specify its path here QEMU = qemu-system-riscv64
QEMU = qemu-system-x86_64
# Try to infer the correct QEMU
ifndef QEMU
QEMU = $(shell if which qemu > /dev/null; \
then echo qemu; exit; \
elif which qemu-system-i386 > /dev/null; \
then echo qemu-system-i386; exit; \
elif which qemu-system-x86_64 > /dev/null; \
then echo qemu-system-x86_64; exit; \
else \
qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \
if test -x $$qemu; then echo $$qemu; exit; fi; fi; \
echo "***" 1>&2; \
echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \
echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \
echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \
echo "***" 1>&2; exit 1)
endif
CC = $(TOOLPREFIX)gcc CC = $(TOOLPREFIX)gcc
AS = $(TOOLPREFIX)gas AS = $(TOOLPREFIX)gas
@ -77,15 +68,10 @@ LD = $(TOOLPREFIX)ld
OBJCOPY = $(TOOLPREFIX)objcopy OBJCOPY = $(TOOLPREFIX)objcopy
OBJDUMP = $(TOOLPREFIX)objdump OBJDUMP = $(TOOLPREFIX)objdump
XFLAGS = -m64 -mcmodel=large -ggdb CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -O
# CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -Werror -fno-omit-frame-pointer CFLAGS = -mcmodel=medany
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer CFLAGS += -ffreestanding -fno-common -nostdlib -mno-relax
CFLAGS += -ffreestanding -fno-common -nostdlib $(XFLAGS)
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
ASFLAGS = -gdwarf-2 -Wa,-divide $(XFLAGS)
# FreeBSD ld wants ``elf_i386_fbsd''
LDFLAGS += -m $(shell $(LD) -V | grep elf_x86_64 2>/dev/null | head -n 1)
LDFLAGS += -z max-page-size=4096
# Disable PIE when possible (for Ubuntu 16.10 toolchain) # Disable PIE when possible (for Ubuntu 16.10 toolchain)
ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),)
@ -95,21 +81,17 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),)
CFLAGS += -fno-pie -nopie CFLAGS += -fno-pie -nopie
endif endif
kernel: $(OBJS) entry.o entryother initcode kernel.ld LDFLAGS = -z max-page-size=4096
$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother
kernel: $(OBJS) entry.o kernel.ld
$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS)
$(OBJDUMP) -S kernel > kernel.asm $(OBJDUMP) -S kernel > kernel.asm
$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym
entryother: entryother.S
$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S
$(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o
$(OBJCOPY) -S -O binary -j .text bootblockother.o entryother
$(OBJDUMP) -S bootblockother.o > entryother.asm
initcode: initcode.S initcode: initcode.S
$(CC) $(CFLAGS) -nostdinc -I. -c initcode.S $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S
$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o #$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o
$(OBJCOPY) -S -O binary initcode.out initcode #$(OBJCOPY) -S -O binary initcode.out initcode
$(OBJDUMP) -S initcode.o > initcode.asm $(OBJDUMP) -S initcode.o > initcode.asm
tags: $(OBJS) entryother.S _init tags: $(OBJS) entryother.S _init
@ -186,19 +168,18 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \
then echo "-gdb tcp::$(GDBPORT)"; \ then echo "-gdb tcp::$(GDBPORT)"; \
else echo "-s -p $(GDBPORT)"; fi) else echo "-s -p $(GDBPORT)"; fi)
ifndef CPUS ifndef CPUS
CPUS := 2 CPUS := 1
endif endif
QEMUOPTS = -kernel kernel -drive file=fs.img,index=1,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) QEMUOPTS = -machine virt -kernel kernel -m 3G -smp $(CPUS) -nographic
qemu: fs.img #QEMUOPTS += -initrd fs.img
$(QEMU) -serial mon:stdio $(QEMUOPTS)
qemu-nox: fs.img kernel qemu: kernel
$(QEMU) -nographic $(QEMUOPTS) $(QEMU) $(QEMUOPTS)
.gdbinit: .gdbinit.tmpl-x64 .gdbinit: .gdbinit.tmpl-riscv
sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ sed "s/:1234/:$(GDBPORT)/" < $^ > $@
qemu-gdb: fs.img kernel .gdbinit qemu-gdb: kernel .gdbinit
@echo "*** Now run 'gdb'." 1>&2 @echo "*** Now run 'gdb'." 1>&2
$(QEMU) $(QEMUOPTS) -S $(QEMUGDB) $(QEMU) $(QEMUOPTS) -S $(QEMUGDB)

189
console.c
View file

@ -5,17 +5,14 @@
#include <stdarg.h> #include <stdarg.h>
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "traps.h"
#include "spinlock.h" #include "spinlock.h"
#include "sleeplock.h" #include "sleeplock.h"
#include "fs.h" #include "fs.h"
#include "file.h" #include "file.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h" #include "riscv.h"
#include "proc.h" #include "defs.h"
#include "x86.h"
static void consputc(int); static void consputc(int);
@ -28,6 +25,12 @@ static struct {
static char digits[] = "0123456789abcdef"; static char digits[] = "0123456789abcdef";
void
consoleinit(void)
{
initlock(&cons.lock, "console");
}
static void static void
printint(int xx, int base, int sign) printint(int xx, int base, int sign)
{ {
@ -66,7 +69,7 @@ printptr(uint64 x) {
// Print to the console. only understands %d, %x, %p, %s. // Print to the console. only understands %d, %x, %p, %s.
void void
cprintf(char *fmt, ...) printf(char *fmt, ...)
{ {
va_list ap; va_list ap;
int i, c, locking; int i, c, locking;
@ -122,67 +125,20 @@ cprintf(char *fmt, ...)
void void
panic(char *s) panic(char *s)
{ {
int i; printf("panic: ");
uint64 pcs[10]; printf(s);
printf("\n");
cli();
cons.locking = 0;
// use lapiccpunum so that we can call panic from mycpu()
cprintf("lapicid %d: panic: ", lapicid());
cprintf(s);
cprintf("\n");
getcallerpcs(&s, pcs);
for(i=0; i<10; i++)
cprintf(" %p", pcs[i]);
panicked = 1; // freeze other CPU panicked = 1; // freeze other CPU
for(;;) for(;;)
; ;
} }
//PAGEBREAK: 50
#define BACKSPACE 0x100 #define BACKSPACE 0x100
#define CRTPORT 0x3d4
static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory
static void
cgaputc(int c)
{
int pos;
// Cursor position: col + 80*row.
outb(CRTPORT, 14);
pos = inb(CRTPORT+1) << 8;
outb(CRTPORT, 15);
pos |= inb(CRTPORT+1);
if(c == '\n')
pos += 80 - pos%80;
else if(c == BACKSPACE){
if(pos > 0) --pos;
} else
crt[pos++] = (c&0xff) | 0x0700; // black on white
if(pos < 0 || pos > 25*80)
panic("pos under/overflow");
if((pos/80) >= 24){ // Scroll up.
memmove(crt, crt+80, sizeof(crt[0])*23*80);
pos -= 80;
memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos));
}
outb(CRTPORT, 14);
outb(CRTPORT+1, pos>>8);
outb(CRTPORT, 15);
outb(CRTPORT+1, pos);
crt[pos] = ' ' | 0x0700;
}
void void
consputc(int c) consputc(int c)
{ {
if(panicked){ if(panicked){
cli();
for(;;) for(;;)
; ;
} }
@ -191,125 +147,4 @@ consputc(int c)
uartputc('\b'); uartputc(' '); uartputc('\b'); uartputc('\b'); uartputc(' '); uartputc('\b');
} else } else
uartputc(c); uartputc(c);
cgaputc(c);
} }
#define INPUT_BUF 128
struct {
char buf[INPUT_BUF];
uint r; // Read index
uint w; // Write index
uint e; // Edit index
} input;
#define C(x) ((x)-'@') // Control-x
void
consoleintr(int (*getc)(void))
{
int c, doprocdump = 0;
acquire(&cons.lock);
while((c = getc()) >= 0){
switch(c){
case C('P'): // Process listing.
// procdump() locks cons.lock indirectly; invoke later
doprocdump = 1;
break;
case C('U'): // Kill line.
while(input.e != input.w &&
input.buf[(input.e-1) % INPUT_BUF] != '\n'){
input.e--;
consputc(BACKSPACE);
}
break;
case C('H'): case '\x7f': // Backspace
if(input.e != input.w){
input.e--;
consputc(BACKSPACE);
}
break;
default:
if(c != 0 && input.e-input.r < INPUT_BUF){
c = (c == '\r') ? '\n' : c;
input.buf[input.e++ % INPUT_BUF] = c;
consputc(c);
if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){
input.w = input.e;
wakeup(&input.r);
}
}
break;
}
}
release(&cons.lock);
if(doprocdump) {
procdump(); // now call procdump() wo. cons.lock held
}
}
int
consoleread(struct inode *ip, char *dst, int n)
{
uint target;
int c;
iunlock(ip);
target = n;
acquire(&cons.lock);
while(n > 0){
while(input.r == input.w){
if(myproc()->killed){
release(&cons.lock);
ilock(ip);
return -1;
}
sleep(&input.r, &cons.lock);
}
c = input.buf[input.r++ % INPUT_BUF];
if(c == C('D')){ // EOF
if(n < target){
// Save ^D for next time, to make sure
// caller gets a 0-byte result.
input.r--;
}
break;
}
*dst++ = c;
--n;
if(c == '\n')
break;
}
release(&cons.lock);
ilock(ip);
return target - n;
}
int
consolewrite(struct inode *ip, char *buf, int n)
{
int i;
iunlock(ip);
acquire(&cons.lock);
for(i = 0; i < n; i++)
consputc(buf[i] & 0xff);
release(&cons.lock);
ilock(ip);
return n;
}
void
consoleinit(void)
{
initlock(&cons.lock, "console");
devsw[CONSOLE].write = consolewrite;
devsw[CONSOLE].read = consoleread;
cons.locking = 1;
ioapicenable(IRQ_KBD, 0);
}

42
defs.h
View file

@ -19,7 +19,7 @@ void bwrite(struct buf*);
// console.c // console.c
void consoleinit(void); void consoleinit(void);
void cprintf(char*, ...); void printf(char*, ...);
void consoleintr(int(*)(void)); void consoleintr(int(*)(void));
void panic(char*) __attribute__((noreturn)); void panic(char*) __attribute__((noreturn));
@ -65,10 +65,9 @@ extern uchar ioapicid;
void ioapicinit(void); void ioapicinit(void);
// kalloc.c // kalloc.c
char* kalloc(void); void* kalloc(void);
void kfree(char*); void kfree(void *);
void kinit1(void*, void*); void kinit();
void kinit2(void*, void*);
// kbd.c // kbd.c
void kbdintr(void); void kbdintr(void);
@ -112,7 +111,7 @@ int kill(int);
struct cpu* mycpu(void); struct cpu* mycpu(void);
struct cpu* getmycpu(void); struct cpu* getmycpu(void);
struct proc* myproc(); struct proc* myproc();
void pinit(void); void procinit(void);
void procdump(void); void procdump(void);
void scheduler(void) __attribute__((noreturn)); void scheduler(void) __attribute__((noreturn));
void sched(void); void sched(void);
@ -124,7 +123,7 @@ void wakeup(void*);
void yield(void); void yield(void);
// swtch.S // swtch.S
void swtch(struct context**, struct context*); void swtch(struct context*, struct context*);
// spinlock.c // spinlock.c
void acquire(struct spinlock*); void acquire(struct spinlock*);
@ -158,16 +157,16 @@ int argaddr(int, uint64 *);
int fetchint(uint64, int*); int fetchint(uint64, int*);
int fetchstr(uint64, char**); int fetchstr(uint64, char**);
int fetchaddr(uint64, uint64*); int fetchaddr(uint64, uint64*);
void syscall(struct sysframe*); void syscall();
// timer.c // timer.c
void timerinit(void); void timerinit(void);
// trap.c // trap.c
void idtinit(void);
extern uint ticks; extern uint ticks;
void tvinit(void); void trapinit(void);
extern struct spinlock tickslock; extern struct spinlock tickslock;
void usertrapret(void);
// uart.c // uart.c
void uartinit(void); void uartinit(void);
@ -175,20 +174,15 @@ void uartintr(void);
void uartputc(int); void uartputc(int);
// vm.c // vm.c
void seginit(void); void kvminit(void);
void kvmalloc(void); void kvmswitch(void);
pde_t* setupkvm(void); pagetable_t uvmcreate(void);
char* uva2ka(pde_t*, char*); void uvminit(pagetable_t, char *, uint);
int allocuvm(pde_t*, uint, uint); int uvmdealloc(pagetable_t, uint64, uint64);
int deallocuvm(pde_t*, uint64, uint64); void uvmcopy(pagetable_t, pagetable_t, uint64);
void freevm(pde_t*, uint64); void uvmfree(pagetable_t, uint64);
void inituvm(pde_t*, char*, uint); void mappages(pagetable_t, uint64, uint64, uint64, int);
int loaduvm(pde_t*, char*, struct inode*, uint, uint); void unmappages(pagetable_t, uint64, uint64, int);
pde_t* copyuvm(pde_t*, uint);
void switchuvm(struct proc*);
void switchkvm(void);
int copyout(pde_t*, uint, void*, uint);
void clearpteu(pde_t *pgdir, char *uva);
// number of elements in fixed-size array // number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0])) #define NELEM(x) (sizeof(x)/sizeof((x)[0]))

245
entry.S
View file

@ -1,223 +1,22 @@
# x86-64 bootstrap, assuming load by MultiBoot-compliant loader. # qemu -kernel starts at 0x1000. the instructions
# The MutliBoot specification is at: # there seem to be provided by qemu, as if it
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html # were a ROM. the code at 0x1000 jumps to
# GRUB is a MultiBoot loader, as is qemu's -kernel option. # 0x8000000, the _start function here,
# in machine mode.
#include "mmu.h" .section .data
#include "memlayout.h" .globl stack0
.section .text
# STACK is the size of the bootstrap stack. .globl mstart
#define STACK 8192 .section .text
.globl _entry
# MultiBoot header. _entry:
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Header-layout # set up a stack for C; stack0 is declared in start.
.align 4 la sp, stack0
.text addi sp, sp, 1024
.globl multiboot_header addi sp, sp, 1024
multiboot_header: addi sp, sp, 1024
#define magic 0x1badb002 addi sp, sp, 1024
#define flags (1<<16 | 1<<0) # jump to mstart() in start.c
.long magic call mstart
.long flags junk:
.long (- magic - flags) # checksum j junk
.long V2P_WO(multiboot_header) # header address
.long V2P_WO(multiboot_header) # load address
.long V2P_WO(edata) # load end address
.long V2P_WO(end) # bss end address
.long V2P_WO(start) # entry address
# Entry point jumped to by boot loader. Running in 32-bit mode.
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Machine-state
#
# EAX = 0x2badb002
# EBX = address of multiboot information structure
# CS = 32-bit read/execute code segment with identity map
# DS, ES, FS, GS, SS = 32-bit read/write data segment with identity map
# A20 gate = enabled
# CR0 = PE set, PG clear
# EFLAGS = VM clear, IF clear
#
.code32
.globl start
start:
# Tell BIOS to do "warm reboot" when we shut down.
movw $0x1234, 0x472
# Set up multiboot arguments for main.
movl %eax, %edi
movl %ebx, %esi
# Initialize stack.
movl $V2P_WO(stack+STACK), %esp
# Zero bss. QEMU's MultiBoot seems not to.
# It's possible that the header above is not right, but it looks right.
# %edi is holding multiboot argument, so save in another register.
# (The stack is in the bss.)
movl %edi, %edx
movl $V2P_WO(edata), %edi
movl $V2P_WO(end), %ecx
subl $V2P_WO(edata), %ecx
movl $0, %eax
cld
rep stosb
movl %edx, %edi
call loadgdt
# Enter new 32-bit code segment (already in 32-bit mode).
ljmp $SEG_KCODE32, $V2P_WO(start32) // code32 segment selector
start32:
# Initialize page table.
call initpagetables
call init32e
movl $V2P_WO(start64), %eax
# Enter 64-bit mode.
ljmp $SEG_KCODE, $V2P_WO(tramp64) // code64 segment selector
.code64
start64:
# Load VA of stack
movabsq $(stack+STACK), %rsp
# Clear frame pointer for stack walks
movl $0, %ebp
# Call into C code.
call main
# should not return from main
jmp .
.code32
.global apstart
apstart:
call loadgdt
ljmp $SEG_KCODE32, $V2P_WO(apstart32) // code32 segment selector
apstart32:
call init32e
movl $V2P_WO(apstart64), %eax
ljmp $SEG_KCODE, $V2P_WO(tramp64) // code64 segment selector
.code64
apstart64:
# Remember (from bootothers), that our kernel stack pointer is
# at the top of our temporary stack.
popq %rax
movq %rax, %rsp
movq $0, %rbp
call apmain
jmp .
.code64
tramp64:
# The linker thinks we are running at tramp64, but we're actually
# running at PADDR(tramp64), so use an explicit calculation to
# load and jump to the correct address. %rax should hold the
# physical address of the jmp target.
movq $KERNBASE, %r11
addq %r11, %rax
jmp *%rax
# Initial stack
.comm stack, STACK
# Page tables. See section 4.5 of 253668.pdf.
# We map the first GB of physical memory at 0 and at 1 TB (not GB) before
# the end of virtual memory. At boot time we are using the mapping at 0
# but during ordinary execution we use the high mapping.
# The intent is that after bootstrap the kernel can expand this mapping
# to cover all the available physical memory.
# This would be easier if we could use the PS bit to create GB-sized entries
# and skip the pdt table, but not all chips support it, and QEMU doesn't.
.align 4096
pml4:
.quad V2P_WO(pdpt) + PTE_P + PTE_W // present, read/write
.quad 0
.space 4096 - 2*16
.quad V2P_WO(pdpt) + PTE_P + PTE_W
.quad 0
.align 4096
pdpt:
.quad V2P_WO(pdt) + PTE_P + PTE_W
.space 4096 - 8
.align 4096
pdt:
// Filled in below.
.space 4096
.code32
initpagetables:
pushl %edi
pushl %ecx
pushl %eax
// Set up 64-bit entry in %edx:%eax.
// Base address 0, present, read/write, large page.
movl $(0 | PTE_P | PTE_W | PTE_PS), %eax
movl $0, %edx
// Fill in 512 entries at pdt.
movl $V2P_WO(pdt), %edi
movl $512, %ecx
1:
// Write this 64-bit entry.
movl %eax, 0(%edi)
movl %edx, 4(%edi)
addl $8, %edi
// 64-bit add to prepare address for next entry.
// Because this is a large page entry, it covers 512 4k pages (2 MB).
add $(512*4096), %eax
adc $0, %edx
loop 1b
popl %eax
popl %ecx
popl %edi
ret
# Initialize IA-32e mode. See section 9.8.5 of 253668.pdf.
init32e:
# Set CR4.PAE and CR4.PSE = 1.
movl %cr4, %eax
orl $0x30, %eax
movl %eax, %cr4
# Load CR3 with physical base address of level 4 page table.
movl $V2P_WO(pml4), %eax
movl %eax, %cr3
# Enable IA-32e mode by setting IA32_EFER.LME = 1.
# Also turn on IA32_EFER.SCE (syscall enable).
movl $0xc0000080, %ecx
rdmsr
orl $0x101, %eax
wrmsr
# Enable paging by setting CR0.PG = 1.
movl %cr0, %eax
orl $0x80000000, %eax
movl %eax, %cr0
nop
nop
ret
loadgdt:
subl $8, %esp
movl $V2P_WO(bootgdt), 4(%esp)
movw $(8*NSEGS-1), 2(%esp)
lgdt 2(%esp)
addl $8, %esp
movl $SEG_KDATA, %eax // data segment selector
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
movl $0, %eax // null segment selector
movw %ax, %fs
movw %ax, %gs
ret

22
exec.c
View file

@ -19,8 +19,8 @@ exec(char *path, char **argv)
struct inode *ip; struct inode *ip;
struct proghdr ph; struct proghdr ph;
pde_t *pgdir, *oldpgdir; pde_t *pgdir, *oldpgdir;
struct proc *curproc = myproc(); struct proc *p = myproc();
uint64 oldsz = curproc->sz; uint64 oldsz = p->sz;
begin_op(); begin_op();
@ -85,8 +85,8 @@ exec(char *path, char **argv)
ustack[1] = argc; ustack[1] = argc;
ustack[2] = sp - (argc+1)*sizeof(uint64); // argv pointer ustack[2] = sp - (argc+1)*sizeof(uint64); // argv pointer
curproc->sf->rdi = argc; p->sf->rdi = argc;
curproc->sf->rsi = sp - (argc+1)*sizeof(uint64); p->sf->rsi = sp - (argc+1)*sizeof(uint64);
sp -= (3+argc+1) * sizeof(uint64); sp -= (3+argc+1) * sizeof(uint64);
if(copyout(pgdir, sp, ustack, (3+argc+1)*sizeof(uint64)) < 0) if(copyout(pgdir, sp, ustack, (3+argc+1)*sizeof(uint64)) < 0)
@ -96,15 +96,15 @@ exec(char *path, char **argv)
for(last=s=path; *s; s++) for(last=s=path; *s; s++)
if(*s == '/') if(*s == '/')
last = s+1; last = s+1;
safestrcpy(curproc->name, last, sizeof(curproc->name)); safestrcpy(p->name, last, sizeof(p->name));
// Commit to the user image. // Commit to the user image.
oldpgdir = curproc->pgdir; oldpgdir = p->pgdir;
curproc->pgdir = pgdir; p->pgdir = pgdir;
curproc->sz = sz; p->sz = sz;
curproc->sf->rcx = elf.entry; // main p->sf->rcx = elf.entry; // main
curproc->sf->rsp = sp; p->sf->rsp = sp;
switchuvm(curproc); switchuvm(p);
freevm(oldpgdir, oldsz); freevm(oldpgdir, oldsz);
return 0; return 0;

View file

@ -2,22 +2,20 @@
# This code runs in user space. # This code runs in user space.
#include "syscall.h" #include "syscall.h"
#include "traps.h"
# exec(init, argv) # exec(init, argv)
.globl start .globl start
start: start:
mov $init, %rdi la a0, init
mov $argv, %rsi la a1, argv
mov $SYS_exec, %rax li a7, SYS_exec
syscall ecall
# for(;;) exit(); # for(;;) exit();
exit: exit:
mov $SYS_exit, %rax li a7, SYS_exit
syscall ecall
jmp exit jal exit
# char init[] = "/init\0"; # char init[] = "/init\0";
init: init:
@ -28,4 +26,3 @@ init:
argv: argv:
.long init .long init
.long 0 .long 0

View file

@ -3,13 +3,14 @@
// and pipe buffers. Allocates 4096-byte pages. // and pipe buffers. Allocates 4096-byte pages.
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h"
#include "spinlock.h" #include "spinlock.h"
#include "riscv.h"
#include "defs.h"
void freerange(void *pa_start, void *pa_end);
void freerange(void *vstart, void *vend);
extern char end[]; // first address after kernel loaded from ELF file extern char end[]; // first address after kernel loaded from ELF file
// defined by the kernel linker script in kernel.ld // defined by the kernel linker script in kernel.ld
@ -19,36 +20,22 @@ struct run {
struct { struct {
struct spinlock lock; struct spinlock lock;
int use_lock;
struct run *freelist; struct run *freelist;
} kmem; } kmem;
// Initialization happens in two phases.
// 1. main() calls kinit1() while still using entrypgdir to place just
// the pages mapped by entrypgdir on free list.
// 2. main() calls kinit2() with the rest of the physical pages
// after installing a full page table that maps them on all cores.
void void
kinit1(void *vstart, void *vend) kinit()
{ {
initlock(&kmem.lock, "kmem"); initlock(&kmem.lock, "kmem");
kmem.use_lock = 0; freerange(end, (void*)PHYSTOP);
freerange(vstart, vend);
} }
void void
kinit2(void *vstart, void *vend) freerange(void *pa_start, void *pa_end)
{
freerange(vstart, vend);
kmem.use_lock = 1;
}
void
freerange(void *vstart, void *vend)
{ {
char *p; char *p;
p = (char*)PGROUNDUP((uint64)vstart); p = (char*)PGROUNDUP((uint64)pa_start);
for(; p + PGSIZE <= (char*)vend; p += PGSIZE) for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
kfree(p); kfree(p);
} }
//PAGEBREAK: 21 //PAGEBREAK: 21
@ -57,42 +44,37 @@ freerange(void *vstart, void *vend)
// call to kalloc(). (The exception is when // call to kalloc(). (The exception is when
// initializing the allocator; see kinit above.) // initializing the allocator; see kinit above.)
void void
kfree(char *v) kfree(void *pa)
{ {
struct run *r; struct run *r;
if((uint64)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree"); panic("kfree");
// Fill with junk to catch dangling refs. // Fill with junk to catch dangling refs.
memset(v, 1, PGSIZE); memset(pa, 1, PGSIZE);
if(kmem.use_lock) acquire(&kmem.lock);
acquire(&kmem.lock); r = (struct run*)pa;
r = (struct run*)v;
r->next = kmem.freelist; r->next = kmem.freelist;
kmem.freelist = r; kmem.freelist = r;
if(kmem.use_lock) release(&kmem.lock);
release(&kmem.lock);
} }
// Allocate one 4096-byte page of physical memory. // Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use. // Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated. // Returns 0 if the memory cannot be allocated.
char* void *
kalloc(void) kalloc(void)
{ {
struct run *r; struct run *r;
if(kmem.use_lock) acquire(&kmem.lock);
acquire(&kmem.lock);
r = kmem.freelist; r = kmem.freelist;
if(r) if(r)
kmem.freelist = r->next; kmem.freelist = r->next;
if(kmem.use_lock) release(&kmem.lock);
release(&kmem.lock); memset((char*)r, 5, PGSIZE); // fill with junk
if(r != 0 && (uint64) r < KERNBASE) return (void*)r;
panic("kalloc");
return (char*)r;
} }

View file

@ -1,50 +1,33 @@
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH( "riscv" )
OUTPUT_ARCH(i386:x86-64) ENTRY( _entry )
SECTIONS SECTIONS
{ {
. = 0xFFFFFF0000100000; /*
PROVIDE(text = .); * ensure that entry.S / _entry is at 0x80000000,
.text : AT(0x100000) { * where qemu's -kernel jumps.
*(.text .stub .text.* .gnu.linkonce.t.*) */
} . = 0x80000000;
.rodata : { .text :
*(.rodata .rodata.* .gnu.linkonce.r.*) {
} *(.text)
. = ALIGN(0x1000);
*(trampoline)
}
/* Include debugging information in kernel memory */ . = ALIGN(0x1000);
.stab : { PROVIDE(etext = .);
PROVIDE(__STAB_BEGIN__ = .);
*(.stab);
PROVIDE(__STAB_END__ = .);
BYTE(0) /* Force the linker to allocate space
for this section */
}
.stabstr : { /*
PROVIDE(__STABSTR_BEGIN__ = .); * make sure end is after data and bss.
*(.stabstr); */
PROVIDE(__STABSTR_END__ = .); .data : {
BYTE(0) /* Force the linker to allocate space *(.data)
for this section */ }
} bss : {
*(.bss)
}
. = ALIGN(0x1000); . = ALIGN(0x1000);
PROVIDE(end = .);
/* Conventionally, Unix linkers provide pseudo-symbols
* etext, edata, and end, at the end of the text, data, and bss.
* For the kernel mapping, we need the address at the beginning
* of the data section, but that's not one of the conventional
* symbols, because the convention started before there was a
* read-only rodata section between text and data. */
PROVIDE(data = .);
.data : {
*(.data)
}
bss : {
PROVIDE(edata = .);
*(.bss)
*(COMMON)
PROVIDE(end = .);
}
} }

105
main.c
View file

@ -1,105 +1,28 @@
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h" #include "riscv.h"
#include "proc.h" #include "defs.h"
#include "x86.h"
extern pde_t *kpgdir;
extern char end[]; // first address after kernel loaded from ELF file
static void mpmain(void) __attribute__((noreturn));
static void startothers(void);
// Bootstrap processor starts running C code here. // Bootstrap processor starts running C code here.
// Allocate a real stack and switch to it, first // Allocate a real stack and switch to it, first
// doing some setup required for memory allocator to work. // doing some setup required for memory allocator to work.
int void
main(uint64 mbmagic, uint64 mbaddr) main()
{ {
if(mbmagic != 0x2badb002)
panic("multiboot header not found");
kinit1(end, P2V(4*1024*1024)); // phys page allocator
kvmalloc(); // kernel page table
mpinit(); // detect other processors
lapicinit(); // interrupt controller
seginit(); // segment descriptors
picinit(); // disable pic
ioapicinit(); // another interrupt controller
consoleinit(); // console hardware
uartinit(); // serial port uartinit(); // serial port
pinit(); // process table consoleinit();
tvinit(); // trap vectors printf("entering main()\n");
kinit(); // physical page allocator
kvminit(); // kernel page table
procinit(); // process table
trapinit(); // trap vectors
#if 0
binit(); // buffer cache binit(); // buffer cache
fileinit(); // file table fileinit(); // file table
ideinit(); // disk ideinit(); // disk
#endif
startothers(); // start other processors
kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers()
userinit(); // first user process userinit(); // first user process
mpmain();
return 0; scheduler();
} }
extern struct cpu* getmycpu();
// Common CPU setup code.
static void
mpmain(void)
{
cprintf("cpu%d: starting %d\n", cpuid(), cpuid());
idtinit(); // load idt register
xchg(&(mycpu()->started), 1); // tell startothers() we're up
scheduler(); // start running processes
}
// AP processors jump here from entryother.S.
void
apmain(void)
{
switchkvm();
seginit();
lapicinit();
mpmain();
}
void apstart(void);
// Start the non-boot (AP) processors.
static void
startothers(void)
{
extern uchar _binary_entryother_start[], _binary_entryother_size[];
uchar *code;
struct cpu *c;
char *stack;
// Write entry code to unused memory at 0x7000.
// The linker has placed the image of entryother.S in
// _binary_entryother_start.
code = P2V(0x7000);
memmove(code, _binary_entryother_start, (uint64)_binary_entryother_size);
for(c = cpus; c < cpus+ncpu; c++){
if(c == mycpu()) // We've started already.
continue;
// Tell entryother.S what stack to use, where to enter, and what
// pgdir to use. We cannot use kpgdir yet, because the AP processor
// is running in low memory, so we use entrypgdir for the APs too.
stack = kalloc();
*(uint32*)(code-4) = V2P(apstart);
*(uint64*)(code-12) = (uint64) (stack+KSTACKSIZE);
lapicstartap(c->apicid, V2P(code));
// wait for cpu to finish mpmain()
while(c->started == 0)
;
}
}

View file

@ -1,16 +1,25 @@
// Memory layout // Physical memory layout
#define EXTMEM 0x100000 // Start of extended memory // qemu -machine virt is set up like this:
#define PHYSTOP 0xE000000 // Top physical memory // 00001000 -- boot ROM, provided by qemu
#define DEVSPACE 0xFE000000 // Other devices are top of 32-bit address space // 10000000 -- uart0 registers
#define DEVSPACETOP 0x100000000 // 80000000 -- boot ROM jumps here in machine mode
// unused RAM after 80000000.
// Key addresses for address space layout (see kmap in vm.c for layout) // the kernel uses physical memory thus:
#define KERNBASE 0xFFFFFF0000000000 // First kernel virtual address // 80000000 -- entry.S, then kernel text and data
#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked // end -- start of kernel page allocation area
// PHYSTOP -- end RAM used by the kernel
#define V2P(a) (((uint64) (a)) - KERNBASE) // registers start here in physical memory.
#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) #define UART0 0x10000000L
#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts // the kernel expects there to be RAM
#define P2V_WO(x) ((x) + KERNBASE) // same as P2V, but without casts // for use by the kernel and user pages
// from physical address 0x80000000 to PHYSTOP.
#define KERNBASE 0x80000000L
#define PHYSTOP (KERNBASE + 64*1024*1024)
// map the trampoline page to the highest address,
// in both user and kernel space.
#define TRAMPOLINE (MAXVA - PGSIZE)

160
mmu.h
View file

@ -1,160 +0,0 @@
// This file contains definitions for the
// x86 memory management unit (MMU).
// Eflags register
#define FL_TF 0x00000100 // Trap Flag
#define FL_IF 0x00000200 // Interrupt Enable
// Control Register flags
#define CR0_PE 0x00000001 // Protection Enable
#define CR0_WP 0x00010000 // Write Protect
#define CR0_PG 0x80000000 // Paging
#define CR4_PSE 0x00000010 // Page size extension
// Segment selectors (indexes) in our GDTs.
// Defined by our convention, not the architecture.
#define SEG_KCODE32 (1<<3) // kernel 32-bit code segment
#define SEG_KCODE (2<<3) // kernel code segment
#define SEG_KDATA (3<<3) // kernel data segment
#define SEG_TSS (4<<3) // tss segment - takes two slots
#define SEG_UDATA (6<<3) // user data segment
#define SEG_UCODE (7<<3) // user code segment
#define NSEGS 8
#ifndef __ASSEMBLER__
struct segdesc {
uint16 limit0;
uint16 base0;
uint8 base1;
uint8 bits;
uint8 bitslimit1;
uint8 base2;
};
// SEGDESC constructs a segment descriptor literal
// with the given, base, limit, and type bits.
#define SEGDESC(base, limit, bits) (struct segdesc){ \
(limit)&0xffff, (base)&0xffff, \
((base)>>16)&0xff, \
(bits)&0xff, \
(((bits)>>4)&0xf0) | ((limit>>16)&0xf), \
((base)>>24)&0xff, \
}
// SEGDESCHI constructs an extension segment descriptor
// literal that records the high bits of base.
#define SEGDESCHI(base) (struct segdesc) { \
(((base)>>32)&0xffff), (((base)>>48)&0xffff), \
}
#endif
#define DPL_USER 0x3 // User DPL
#define SEG_A (1<<0) // segment accessed bit
#define SEG_R (1<<1) // readable (code)
#define SEG_W (1<<1) // writable (data)
#define SEG_C (1<<2) // conforming segment (code)
#define SEG_E (1<<2) // expand-down bit (data)
#define SEG_CODE (1<<3) // code segment (instead of data)
// User and system segment bits.
#define SEG_S (1<<4) // if 0, system descriptor
#define SEG_DPL(x) ((x)<<5) // descriptor privilege level (2 bits)
#define SEG_P (1<<7) // segment present
#define SEG_AVL (1<<8) // available for operating system use
#define SEG_L (1<<9) // long mode
#define SEG_D (1<<10) // default operation size 32-bit
#define SEG_G (1<<11) // granularity
// Application segment type bits
#define STA_X 0x8 // Executable segment
#define STA_W 0x2 // Writeable (non-executable segments)
#define STA_R 0x2 // Readable (executable segments)
// System segment type bits
#define SEG_LDT (2<<0) // local descriptor table
#define SEG_TSS64A (9<<0) // available 64-bit TSS
#define SEG_TSS64B (11<<0) // busy 64-bit TSS
#define SEG_CALL64 (12<<0) // 64-bit call gate
#define SEG_INTR64 (14<<0) // 64-bit interrupt gate
#define SEG_TRAP64 (15<<0) // 64-bit trap gate
// A virtual address 'la' has a six-part structure as follows:
//
// +--16--+---9---+------9-------+-----9----+----9-------+----12-------+
// | Sign | PML4 |Page Directory| Page Dir |Page Table | Offset Page |
// |Extend| Index | Pointer Index| Index | Index | in Page |
// +------+-------+--------------+----------+------------+-------------+
// L3 pgtab L2 pgtab L1 pgtab L0 pgtab
// Page directory and page table constants.
#define NPDENTRIES 512 // # directory entries per page directory
#define PGSIZE 4096 // bytes mapped by a page
#define PGSHIFT 12 // offset of PTX in a linear address
#define PXMASK 0x1FF
#define PXSHIFT(n) (PGSHIFT+(9*(n))) // shift for index into level n page table
#define PX(n, va) ((((uint64) (va)) >> PXSHIFT(n)) & PXMASK)
#define L_PML4 3
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
// Page table/directory entry flags.
#define PTE_P 0x001 // Present
#define PTE_W 0x002 // Writeable
#define PTE_U 0x004 // User
#define PTE_PS 0x080 // Page Size
#define PTE_PWT 0x008 // Write-Through
#define PTE_PCD 0x010 // Cache-Disable
// Address in page table or page directory entry
#define PTE_ADDR(pte) ((uint64)(pte) & ~0xFFF)
#define PTE_FLAGS(pte) ((uint64)(pte) & 0xFFF)
#ifndef __ASSEMBLER__
typedef uint64 pte_t;
struct taskstate {
uint8 reserved0[4];
uint64 rsp[3];
uint64 ist[8];
uint8 reserved1[10];
uint16 iomba;
uint8 iopb[0];
} __attribute__ ((packed));
#define INT_P (1<<7) // interrupt descriptor present
struct intgate
{
uint16 rip0;
uint16 cs;
uint8 reserved0;
uint8 bits;
uint16 rip1;
uint32 rip2;
uint32 reserved1;
};
// INTDESC constructs an interrupt descriptor literal
// that records the given code segment, instruction pointer,
// and type bits.
#define INTDESC(cs, rip, bits) (struct intgate){ \
(rip)&0xffff, (cs), 0, bits, ((rip)>>16)&0xffff, \
(uint64)(rip)>>32, 0, \
}
// See section 4.6 of amd64 vol2
struct desctr
{
uint16 limit;
uint64 base;
} __attribute__((packed, aligned(16))); // important!
#endif

25
msr.h
View file

@ -1,25 +0,0 @@
// SYSCALL and SYSRET registers
#define MSR_STAR 0xc0000081
#define MSR_LSTAR 0xc0000082
#define MSR_CSTAR 0xc0000083
#define MSR_SFMASK 0xc0000084
// GS
#define MSR_GS_BASE 0xc0000101
#define MSR_GS_KERNBASE 0xc0000102
static inline uint64
readmsr(uint32 msr)
{
uint32 hi, lo;
__asm volatile("rdmsr" : "=d" (hi), "=a" (lo) : "c" (msr));
return ((uint64) lo) | (((uint64) hi) << 32);
}
static inline void
writemsr(uint64 msr, uint64 val)
{
uint32 lo = val & 0xffffffff;
uint32 hi = val >> 32;
__asm volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi) : "memory");
}

View file

@ -1,5 +1,4 @@
#define NPROC 64 // maximum number of processes #define NPROC 64 // maximum number of processes
#define KSTACKSIZE 4096 // size of per-process kernel stack
#define NCPU 8 // maximum number of CPUs #define NCPU 8 // maximum number of CPUs
#define NOFILE 16 // open files per process #define NOFILE 16 // open files per process
#define NFILE 100 // open files per system #define NFILE 100 // open files per system

283
proc.c
View file

@ -1,18 +1,20 @@
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h" #include "riscv.h"
#include "x86.h"
#include "proc.h" #include "proc.h"
#include "spinlock.h" #include "spinlock.h"
#include "defs.h"
struct { struct {
struct spinlock lock; struct spinlock lock;
struct proc proc[NPROC]; struct proc proc[NPROC];
} ptable; } ptable;
static struct proc *initproc; // XXX riscv move somewhere else
struct cpu cpus[NCPU];
struct proc *initproc;
int nextpid = 1; int nextpid = 1;
extern void forkret(void); extern void forkret(void);
@ -22,57 +24,36 @@ extern void sysexit(void);
static void wakeup1(void *chan); static void wakeup1(void *chan);
extern char trampstart[]; // trampoline.S
void void
pinit(void) procinit(void)
{ {
initlock(&ptable.lock, "ptable"); initlock(&ptable.lock, "ptable");
} }
// Must be called with interrupts disabled // Must be called with interrupts disabled.
// XXX riscv
int int
cpuid() { cpuid() {
return mycpu()-cpus; return 0;
} }
// Must be called with interrupts disabled to avoid the caller being // Return this core's cpu struct.
// rescheduled between reading lapicid and running through the loop. // XXX riscv
struct cpu*
getmycpu(void)
{
int apicid, i;
if(readeflags()&FL_IF)
panic("getmycpu called with interrupts enabled\n");
apicid = lapicid();
// APIC IDs are not guaranteed to be contiguous.
for (i = 0; i < ncpu; ++i) {
if (cpus[i].apicid == apicid)
return &cpus[i];
}
panic("unknown apicid\n");
}
// Return this core's cpu struct using %gs. %gs points this core's struct
// cpu. Offet 24 in struct cpu is cpu.
struct cpu* struct cpu*
mycpu(void) { mycpu(void) {
struct cpu *c; struct cpu *c;
asm volatile("mov %%gs:24, %0" : "=r" (c)); c = &cpus[0];
return c; return c;
} }
// Disable interrupts so that we are not rescheduled // Disable interrupts so that we are not rescheduled
// while reading proc from the cpu structure // while reading proc from the cpu structure
// XXX riscv
struct proc* struct proc*
myproc(void) { myproc(void) {
struct cpu *c; return cpus[0].proc;
struct proc *p;
pushcli();
c = mycpu();
p = c->proc;
popcli();
return p;
} }
//PAGEBREAK: 32 //PAGEBREAK: 32
@ -84,7 +65,6 @@ static struct proc*
allocproc(void) allocproc(void)
{ {
struct proc *p; struct proc *p;
char *sp;
acquire(&ptable.lock); acquire(&ptable.lock);
@ -101,56 +81,73 @@ found:
release(&ptable.lock); release(&ptable.lock);
// Allocate kernel stack. // Allocate a page for the kernel stack.
if((p->kstack = kalloc()) == 0){ if((p->kstack = kalloc()) == 0){
p->state = UNUSED; p->state = UNUSED;
return 0; return 0;
} }
sp = p->kstack + KSTACKSIZE;
// Leave room for syscall frame. // Allocate a trapframe page.
sp -= sizeof *p->sf; if((p->tf = (struct trapframe *)kalloc()) == 0){
p->state = UNUSED;
return 0;
}
if ((uint64) sp % 16) // An empty user page table.
panic("misaligned sp"); p->pagetable = uvmcreate();
p->sf = (struct sysframe*)sp; // map the trampoline code (for system call return)
// at the highest user virtual address.
// only the supervisor uses it, on the way
// to/from user space, so not PTE_U.
mappages(p->pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampstart, PTE_R | PTE_X);
// map the trapframe, for trampoline.S.
mappages(p->pagetable, (TRAMPOLINE - PGSIZE), PGSIZE,
(uint64)(p->tf), PTE_R | PTE_W);
// Set up new context to start executing at forkret, // Set up new context to start executing at forkret,
// which returns to sysexit. // which returns to user space.
sp -= sizeof(uint64); memset(&p->context, 0, sizeof p->context);
*(uint64*)sp = (uint64)sysexit; p->context.ra = (uint64)forkret;
p->context.sp = (uint64)p->kstack + PGSIZE;
sp -= sizeof *p->context;
p->context = (struct context*)sp;
memset(p->context, 0, sizeof *p->context);
p->context->rip = (uint64)forkret;
return p; return p;
} }
// XXX hack because I don't know how to incorporate initcode
// into the kernel binary. just the exec system call, no arguments.
// manually copied from initcode.asm.
unsigned char initcode[] = {
0x85, 0x48, // li a7, 1 -- SYS_fork
0x73, 0x00, 0x00, 0x00, // ecall
0x8d, 0x48, // li a7, 3 -- SYS_wait
0x73, 0x00, 0x00, 0x00, // ecall
0x89, 0x48, // li a7, 2 -- SYS_exit
0x73, 0x00, 0x00, 0x00, // ecall
};
//PAGEBREAK: 32 //PAGEBREAK: 32
// Set up first user process. // Set up first user process.
void void
userinit(void) userinit(void)
{ {
struct proc *p; struct proc *p;
extern char _binary_initcode_start[], _binary_initcode_size[];
p = allocproc(); p = allocproc();
initproc = p; initproc = p;
if((p->pgdir = setupkvm()) == 0)
panic("userinit: out of memory?"); uvminit(p->pagetable, initcode, sizeof(initcode));
inituvm(p->pgdir, _binary_initcode_start, (uint64)_binary_initcode_size);
p->sz = PGSIZE; p->sz = PGSIZE;
memset(p->sf, 0, sizeof(*p->sf));
p->sf->r11 = FL_IF; // prepare for the very first kernel->user.
p->sf->rsp = PGSIZE; p->tf->epc = 0;
p->sf->rcx = 0; // beginning of initcode.S p->tf->sp = PGSIZE;
safestrcpy(p->name, "initcode", sizeof(p->name)); safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/"); // XXX riscv
//p->cwd = namei("/");
// this assignment to p->state lets other cores // this assignment to p->state lets other cores
// run this process. the acquire forces the above // run this process. the acquire forces the above
@ -163,62 +160,65 @@ userinit(void)
release(&ptable.lock); release(&ptable.lock);
} }
#if 0
// Grow current process's memory by n bytes. // Grow current process's memory by n bytes.
// Return 0 on success, -1 on failure. // Return 0 on success, -1 on failure.
int int
growproc(int n) growproc(int n)
{ {
uint sz; uint sz;
struct proc *curproc = myproc(); struct proc *p = myproc();
sz = curproc->sz; sz = p->sz;
if(n > 0){ if(n > 0){
if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) if((sz = allocuvm(p->pagetable, sz, sz + n)) == 0)
return -1; return -1;
} else if(n < 0){ } else if(n < 0){
if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0)
return -1; return -1;
} }
curproc->sz = sz; p->sz = sz;
switchuvm(curproc); switchuvm(p);
return 0; return 0;
} }
#endif
// Create a new process copying p as the parent. // Create a new process, copying p as the parent.
// Sets up stack to return as if from system call. // Sets up child kernel stack to return as if from system call.
// Caller must set state of returned proc to RUNNABLE.
int int
fork(void) fork(void)
{ {
int i, pid; int i, pid;
struct proc *np; struct proc *np;
struct proc *curproc = myproc(); struct proc *p = myproc();
// Allocate process. // Allocate process.
if((np = allocproc()) == 0){ if((np = allocproc()) == 0){
return -1; return -1;
} }
// Copy process state from proc. // Copy user memory from parent to child.
if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ uvmcopy(p->pagetable, np->pagetable, p->sz);
kfree(np->kstack); np->sz = p->sz;
np->kstack = 0;
np->state = UNUSED;
return -1;
}
np->sz = curproc->sz;
np->parent = curproc;
*np->sf = *curproc->sf;
// Clear %eax so that fork returns 0 in the child. np->parent = p;
np->sf->rax = 0;
// copy saved user registers.
*(np->tf) = *(p->tf);
// Cause fork to return 0 in the child.
np->tf->a0 = 0;
#if 0 // XXX riscv
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++) for(i = 0; i < NOFILE; i++)
if(curproc->ofile[i]) if(p->ofile[i])
np->ofile[i] = filedup(curproc->ofile[i]); np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(curproc->cwd); np->cwd = idup(p->cwd);
#endif
safestrcpy(np->name, curproc->name, sizeof(curproc->name)); safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid; pid = np->pid;
@ -233,46 +233,48 @@ fork(void)
// Exit the current process. Does not return. // Exit the current process. Does not return.
// An exited process remains in the zombie state // An exited process remains in the zombie state
// until its parent calls wait() to find out it exited. // until its parent calls wait().
void void
exit(void) exit(void)
{ {
struct proc *curproc = myproc(); struct proc *p = myproc();
struct proc *p; struct proc *pp;
int fd; int fd;
if(curproc == initproc) if(p == initproc)
panic("init exiting"); panic("init exiting");
#if 0 // XXX riscv
// Close all open files. // Close all open files.
for(fd = 0; fd < NOFILE; fd++){ for(fd = 0; fd < NOFILE; fd++){
if(curproc->ofile[fd]){ if(p->ofile[fd]){
fileclose(curproc->ofile[fd]); fileclose(p->ofile[fd]);
curproc->ofile[fd] = 0; p->ofile[fd] = 0;
} }
} }
begin_op(); begin_op();
iput(curproc->cwd); iput(p->cwd);
end_op(); end_op();
curproc->cwd = 0; #endif
p->cwd = 0;
acquire(&ptable.lock); acquire(&ptable.lock);
// Parent might be sleeping in wait(). // Parent might be sleeping in wait().
wakeup1(curproc->parent); wakeup1(p->parent);
// Pass abandoned children to init. // Pass abandoned children to init.
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){
if(p->parent == curproc){ if(pp->parent == p){
p->parent = initproc; pp->parent = initproc;
if(p->state == ZOMBIE) if(pp->state == ZOMBIE)
wakeup1(initproc); wakeup1(initproc);
} }
} }
// Jump into the scheduler, never to return. // Jump into the scheduler, never to return.
curproc->state = ZOMBIE; p->state = ZOMBIE;
sched(); sched();
panic("zombie exit"); panic("zombie exit");
} }
@ -282,42 +284,47 @@ exit(void)
int int
wait(void) wait(void)
{ {
struct proc *p; struct proc *np;
int havekids, pid; int havekids, pid;
struct proc *curproc = myproc(); struct proc *p = myproc();
acquire(&ptable.lock); acquire(&ptable.lock);
for(;;){ for(;;){
// Scan through table looking for exited children. // Scan through table looking for exited children.
havekids = 0; havekids = 0;
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){
if(p->parent != curproc) if(np->parent != p)
continue; continue;
havekids = 1; havekids = 1;
if(p->state == ZOMBIE){ if(np->state == ZOMBIE){
// Found one. // Found one.
pid = p->pid; pid = np->pid;
kfree(p->kstack); kfree(np->kstack);
p->kstack = 0; np->kstack = 0;
freevm(p->pgdir, p->sz); kfree((void*)np->tf);
p->pid = 0; np->tf = 0;
p->parent = 0; unmappages(np->pagetable, TRAMPOLINE, PGSIZE, 0);
p->name[0] = 0; unmappages(np->pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0);
p->killed = 0; uvmfree(np->pagetable, np->sz);
p->state = UNUSED; np->pagetable = 0;
np->pid = 0;
np->parent = 0;
np->name[0] = 0;
np->killed = 0;
np->state = UNUSED;
release(&ptable.lock); release(&ptable.lock);
return pid; return pid;
} }
} }
// No point waiting if we don't have any children. // No point waiting if we don't have any children.
if(!havekids || curproc->killed){ if(!havekids || p->killed){
release(&ptable.lock); release(&ptable.lock);
return -1; return -1;
} }
// Wait for children to exit. (See wakeup1 call in proc_exit.) // Wait for children to exit. (See wakeup1 call in proc_exit.)
sleep(curproc, &ptable.lock); //DOC: wait-sleep sleep(p, &ptable.lock); //DOC: wait-sleep
} }
} }
@ -338,7 +345,8 @@ scheduler(void)
c->proc = 0; c->proc = 0;
for(;;){ for(;;){
// Enable interrupts on this processor. // Enable interrupts on this processor.
sti(); // XXX riscv
//sti();
// Loop over process table looking for process to run. // Loop over process table looking for process to run.
acquire(&ptable.lock); acquire(&ptable.lock);
@ -350,11 +358,11 @@ scheduler(void)
// to release ptable.lock and then reacquire it // to release ptable.lock and then reacquire it
// before jumping back to us. // before jumping back to us.
c->proc = p; c->proc = p;
switchuvm(p);
p->state = RUNNING; p->state = RUNNING;
swtch(&(c->scheduler), p->context); printf("switch...\n");
switchkvm(); swtch(&c->scheduler, &p->context);
printf("switch returned\n");
// Process is done running for now. // Process is done running for now.
// It should have changed its p->state before coming back. // It should have changed its p->state before coming back.
@ -380,14 +388,10 @@ sched(void)
if(!holding(&ptable.lock)) if(!holding(&ptable.lock))
panic("sched ptable.lock"); panic("sched ptable.lock");
if(mycpu()->ncli != 1)
panic("sched locks");
if(p->state == RUNNING) if(p->state == RUNNING)
panic("sched running"); panic("sched running");
if(readeflags()&FL_IF)
panic("sched interruptible");
intena = mycpu()->intena; intena = mycpu()->intena;
swtch(&p->context, mycpu()->scheduler); swtch(&p->context, &mycpu()->scheduler);
mycpu()->intena = intena; mycpu()->intena = intena;
} }
@ -402,24 +406,29 @@ yield(void)
} }
// A fork child's very first scheduling by scheduler() // A fork child's very first scheduling by scheduler()
// will swtch here. "Return" to user space. // will swtch to forkret.
void void
forkret(void) forkret(void)
{ {
struct proc *p = myproc();
static int first = 1; static int first = 1;
// Still holding ptable.lock from scheduler. // Still holding ptable.lock from scheduler.
release(&ptable.lock); release(&ptable.lock);
printf("entering forkret\n");
if (first) { if (first) {
// Some initialization functions must be run in the context // Some initialization functions must be run in the context
// of a regular process (e.g., they call sleep), and thus cannot // of a regular process (e.g., they call sleep), and thus cannot
// be run from main(). // be run from main().
first = 0; first = 0;
iinit(ROOTDEV); // XXX riscv
initlog(ROOTDEV); //iinit(ROOTDEV);
//initlog(ROOTDEV);
} }
// Return to "caller", actually trapret (see allocproc). usertrapret();
} }
// Atomically release lock and sleep on chan. // Atomically release lock and sleep on chan.
@ -483,6 +492,8 @@ wakeup(void *chan)
release(&ptable.lock); release(&ptable.lock);
} }
#if 0
// Kill the process with the given pid. // Kill the process with the given pid.
// Process won't exit until it returns // Process won't exit until it returns
// to user space (see trap in trap.c). // to user space (see trap in trap.c).
@ -533,12 +544,14 @@ procdump(void)
state = states[p->state]; state = states[p->state];
else else
state = "???"; state = "???";
cprintf("%d %s %s", p->pid, state, p->name); printf("%d %s %s", p->pid, state, p->name);
if(p->state == SLEEPING){ if(p->state == SLEEPING){
getcallerpcs((uint64*)p->context->rbp+2, pc); getcallerpcs((uint64*)p->context->rbp+2, pc);
for(i=0; i<10 && pc[i] != 0; i++) for(i=0; i<10 && pc[i] != 0; i++)
cprintf(" %p", pc[i]); printf(" %p", pc[i]);
} }
cprintf("\n"); printf("\n");
} }
} }
#endif

84
proc.h
View file

@ -1,13 +1,30 @@
// Saved registers for kernel context switches.
struct context {
uint64 ra;
uint64 sp;
// callee-saved
uint64 s0;
uint64 s1;
uint64 s2;
uint64 s3;
uint64 s4;
uint64 s5;
uint64 s6;
uint64 s7;
uint64 s8;
uint64 s9;
uint64 s10;
uint64 s11;
};
// Per-CPU state // Per-CPU state
struct cpu { struct cpu {
uint64 syscallno; // Temporary used by sysentry uint64 syscallno; // Temporary used by sysentry
uint64 usp; // Temporary used by sysentry uint64 usp; // Temporary used by sysentry
struct proc *proc; // The process running on this cpu or null struct proc *proc; // The process running on this cpu or null
struct cpu *cpu; // XXX struct cpu *cpu; // XXX
uchar apicid; // Local APIC ID struct context scheduler; // swtch() here to enter scheduler
struct context *scheduler; // swtch() here to enter scheduler
struct taskstate ts; // Used by x86 to find stack for interrupt
struct segdesc gdt[NSEGS]; // x86 global descriptor table
volatile uint started; // Has the CPU started? volatile uint started; // Has the CPU started?
int ncli; // Depth of pushcli nesting. int ncli; // Depth of pushcli nesting.
int intena; // Were interrupts enabled before pushcli? int intena; // Were interrupts enabled before pushcli?
@ -17,39 +34,52 @@ extern struct cpu cpus[NCPU];
extern int ncpu; extern int ncpu;
//PAGEBREAK: 17 //PAGEBREAK: 17
// Saved registers for kernel context switches.
// Don't need to save all the segment registers (%cs, etc), // per-process data for the early trap handling code in trampoline.S.
// because they are constant across kernel contexts. // sits in a page by itself just under the trampoline page in the
// Don't need to save %eax, %ecx, %edx, because the // user page table. not specially mapped in the kernel page table.
// x86 convention is that the caller has saved them. // the sscratch register points here.
// Contexts are stored at the bottom of the stack they // trampoline.S saves user registers, then restores kernel_sp and
// describe; the stack pointer is the address of the context. // kernel_satp.
// The layout of the context matches the layout of the stack in swtch.S // no need to save s0-s11 (callee-saved) since C code and swtch() save them.
// at the "Switch stacks" comment. Switch doesn't save eip explicitly, struct trapframe {
// but it is on the stack and allocproc() manipulates it. /* 0 */ uint64 kernel_satp;
struct context { /* 8 */ uint64 kernel_sp;
uint64 r15; /* 16 */ uint64 kernel_trap; // address of trap()
uint64 r14; /* 24 */ uint64 epc; // saved user program counter
uint64 r13; /* 32 */ uint64 ra;
uint64 r12; /* 40 */ uint64 sp;
uint64 r11; /* 48 */ uint64 gp;
uint64 rbx; /* 56 */ uint64 tp;
uint64 rbp; /* 64 */ uint64 t0;
uint64 rip; /* 72 */ uint64 t1;
/* 80 */ uint64 t2;
/* 88 */ uint64 a0;
/* 96 */ uint64 a1;
/* 104 */ uint64 a2;
/* 112 */ uint64 a3;
/* 120 */ uint64 a4;
/* 128 */ uint64 a5;
/* 136 */ uint64 a6;
/* 144 */ uint64 a7;
/* 152 */ uint64 t3;
/* 160 */ uint64 t4;
/* 168 */ uint64 t5;
/* 176 */ uint64 t6;
}; };
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state // Per-process state
struct proc { struct proc {
char *kstack; // Bottom of kernel stack for this process, must be first entry char *kstack; // Bottom of kernel stack for this process
uint64 sz; // Size of process memory (bytes) uint64 sz; // Size of process memory (bytes)
pde_t* pgdir; // Page table pagetable_t pagetable; // Page table
enum procstate state; // Process state enum procstate state; // Process state
int pid; // Process ID int pid; // Process ID
struct proc *parent; // Parent process struct proc *parent; // Parent process
struct sysframe *sf; // Syscall frame for current syscall struct trapframe *tf; // data page for trampoline.S
struct context *context; // swtch() here to run process struct context context; // swtch() here to run process
void *chan; // If non-zero, sleeping on chan void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed int killed; // If non-zero, have been killed
struct file *ofile[NOFILE]; // Open files struct file *ofile[NOFILE]; // Open files

172
riscv.h Normal file
View file

@ -0,0 +1,172 @@
// Machine Status Register, mstatus
#define MSTATUS_MPP_MASK (3L << 11)
#define MSTATUS_MPP_M (3L << 11)
#define MSTATUS_MPP_S (1L << 11)
#define MSTATUS_MPP_U (0L << 11)
static inline uint64
r_mstatus()
{
uint64 x;
asm("csrr %0, mstatus" : "=r" (x) );
return x;
}
static inline void
w_mstatus(uint64 x)
{
asm("csrw mstatus, %0" : : "r" (x));
}
// machine exception program counter, holds the
// instruction address to which a return from
// exception will go.
static inline void
w_mepc(uint64 x)
{
asm("csrw mepc, %0" : : "r" (x));
}
// Supervisor Status Register, sstatus
#define SSTATUS_SPP (1L << 8) // 1=Supervisor, 0=User
static inline uint64
r_sstatus()
{
uint64 x;
asm("csrr %0, sstatus" : "=r" (x) );
return x;
}
static inline void
w_sstatus(uint64 x)
{
asm("csrw sstatus, %0" : : "r" (x));
}
// machine exception program counter, holds the
// instruction address to which a return from
// exception will go.
static inline void
w_sepc(uint64 x)
{
asm("csrw sepc, %0" : : "r" (x));
}
static inline uint64
r_sepc()
{
uint64 x;
asm("csrr %0, sepc" : "=r" (x) );
return x;
}
// Machine Exception Delegation
static inline uint64
r_medeleg()
{
uint64 x;
asm("csrr %0, medeleg" : "=r" (x) );
return x;
}
static inline void
w_medeleg(uint64 x)
{
asm("csrw medeleg, %0" : : "r" (x));
}
// Machine Interrupt Delegation
static inline uint64
r_mideleg()
{
uint64 x;
asm("csrr %0, mideleg" : "=r" (x) );
return x;
}
static inline void
w_mideleg(uint64 x)
{
asm("csrw mideleg, %0" : : "r" (x));
}
// Supervisor Trap-Vector Base Address
// low two bits are mode.
static inline void
w_stvec(uint64 x)
{
asm("csrw stvec, %0" : : "r" (x));
}
// use riscv's sv39 page table scheme.
#define SATP_SV39 (8L << 60)
#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))
// supervisor address translation and protection;
// holds the address of the page table.
static inline void
w_satp(uint64 x)
{
asm("csrw satp, %0" : : "r" (x));
}
static inline uint64
r_satp()
{
uint64 x;
asm("csrr %0, satp" : "=r" (x) );
return x;
}
// Supervisor Scratch register, for early trap handler in trampoline.S.
static inline void
w_sscratch(uint64 x)
{
asm("csrw sscratch, %0" : : "r" (x));
}
// Supervisor trap cause
static inline uint64
r_scause()
{
uint64 x;
asm("csrr %0, scause" : "=r" (x) );
return x;
}
#define PGSIZE 4096 // bytes per page
#define PGSHIFT 12 // bits of offset within a page
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
#define PTE2PA(pte) (((pte) >> 10) << 12)
#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U))
// extract the three 9-bit page table indices from a virtual address.
#define PXMASK 0x1FF // 9 bits
#define PXSHIFT(level) (PGSHIFT+(9*(level)))
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
// one beyond the highest possible virtual address.
// MAXVA is actually one bit less than the max allowed by
// Sv39, to avoid having to sign-extend virtual addresses
// that have the high bit set.
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
typedef uint64 pte_t;
typedef uint64 *pagetable_t; // 512 PTEs

View file

@ -1,13 +1,11 @@
// Mutual exclusion spin locks. // Mutual exclusion spin locks.
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "x86.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h"
#include "proc.h"
#include "spinlock.h" #include "spinlock.h"
#include "riscv.h"
#include "defs.h"
void void
initlock(struct spinlock *lk, char *name) initlock(struct spinlock *lk, char *name)
@ -17,6 +15,27 @@ initlock(struct spinlock *lk, char *name)
lk->cpu = 0; lk->cpu = 0;
} }
void
acquire(struct spinlock *lk)
{
lk->locked = 1;
lk->cpu = mycpu();
}
void
release(struct spinlock *lk)
{
lk->locked = 0;
lk->cpu = 0;
}
int
holding(struct spinlock *lk)
{
return lk->locked && lk->cpu == mycpu();
}
#if 0
// Acquire the lock. // Acquire the lock.
// Loops (spins) until the lock is acquired. // Loops (spins) until the lock is acquired.
// Holding a lock for a long time may cause // Holding a lock for a long time may cause
@ -37,7 +56,7 @@ acquire(struct spinlock *lk)
// references happen after the lock is acquired. // references happen after the lock is acquired.
__sync_synchronize(); __sync_synchronize();
// Record info about lock acquisition for debugging. // Record info about lock acquisition for holding() and debugging.
lk->cpu = mycpu(); lk->cpu = mycpu();
getcallerpcs(&lk, lk->pcs); getcallerpcs(&lk, lk->pcs);
} }
@ -87,11 +106,11 @@ getcallerpcs(void *v, uint64 pcs[])
// Check whether this cpu is holding the lock. // Check whether this cpu is holding the lock.
int int
holding(struct spinlock *lock) holding(struct spinlock *lk)
{ {
int r; int r;
pushcli(); pushcli();
r = lock->locked && lock->cpu == mycpu(); r = lk->locked && lk->cpu == mycpu();
popcli(); popcli();
return r; return r;
} }
@ -123,4 +142,4 @@ popcli(void)
if(mycpu()->ncli == 0 && mycpu()->intena) if(mycpu()->ncli == 0 && mycpu()->intena)
sti(); sti();
} }
#endif

34
start.c Normal file
View file

@ -0,0 +1,34 @@
#include "types.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
void main();
// entry.S uses this as the initial stack.
char stack0[4096];
// entry.S jumps here in machine mode on stack0.
void
mstart()
{
// set M Previous Privilege mode to Supervisor, for mret.
unsigned long x = r_mstatus();
x &= ~MSTATUS_MPP_MASK;
x |= MSTATUS_MPP_S;
w_mstatus(x);
// set M Exception Program Counter to main, for mret.
// requires gcc -mcmodel=medany
w_mepc((uint64)main);
// disable paging for now.
w_satp(0);
// delegate all interrupts and exceptions to supervisor mode.
w_medeleg(0xffff);
w_mideleg(0xffff);
// jump to main in supervisor mode.
asm("mret");
}

View file

@ -1,14 +1,13 @@
#include "types.h" #include "types.h"
#include "x86.h"
void* void*
memset(void *dst, int c, uint n) memset(void *dst, int c, uint n)
{ {
if ((uint64)dst%4 == 0 && n%4 == 0){ char *cdst = (char *) dst;
c &= 0xFF; int i;
stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4); for(i = 0; i < n; i++){
} else cdst[i] = c;
stosb(dst, c, n); }
return dst; return dst;
} }

57
swtch.S
View file

@ -1,35 +1,42 @@
# Context switch # Context switch
# #
# void swtch(struct context **old, struct context *new); # void swtch(struct context *old, struct context *new);
# #
# Save the current registers on the stack, creating # Save current registers in old. Load from new.
# a struct context, and save its address in *old.
# Switch stacks to new and pop previously-saved registers.
.globl swtch .globl swtch
swtch: swtch:
# Save old callee-saved registers sd ra, 0(a0)
push %rbp sd sp, 8(a0)
push %rbx sd s0, 16(a0)
push %r11 sd s1, 24(a0)
push %r12 sd s2, 32(a0)
push %r13 sd s3, 40(a0)
push %r14 sd s4, 48(a0)
push %r15 sd s5, 56(a0)
sd s6, 64(a0)
sd s7, 72(a0)
sd s8, 80(a0)
sd s9, 88(a0)
sd s10, 96(a0)
sd s11, 104(a0)
# Switch stacks ld ra, 0(a1)
mov %rsp, (%rdi) # first arg of swtch is in rdi ld sp, 8(a1)
mov %rsi, %rsp # second arg of swtch is in rsi ld s0, 16(a1)
ld s1, 24(a1)
ld s2, 32(a1)
ld s3, 40(a1)
ld s4, 48(a1)
ld s5, 56(a1)
ld s6, 64(a1)
ld s7, 72(a1)
ld s8, 80(a1)
ld s9, 88(a1)
ld s10, 96(a1)
ld s11, 104(a1)
# Load new callee-saved registers ret
pop %r15
pop %r14
pop %r13
pop %r12
pop %r11
pop %rbx
pop %rbp
ret

View file

@ -1,11 +1,10 @@
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h" #include "riscv.h"
#include "proc.h" #include "proc.h"
#include "x86.h"
#include "syscall.h" #include "syscall.h"
#include "defs.h"
// User code makes a system call with INT T_SYSCALL. // User code makes a system call with INT T_SYSCALL.
// System call number in %eax. // System call number in %eax.
@ -17,9 +16,9 @@
int int
fetchint(uint64 addr, int *ip) fetchint(uint64 addr, int *ip)
{ {
struct proc *curproc = myproc(); struct proc *p = myproc();
if(addr >= curproc->sz || addr+4 > curproc->sz) if(addr >= p->sz || addr+4 > p->sz)
return -1; return -1;
*ip = *(uint64*)(addr); *ip = *(uint64*)(addr);
return 0; return 0;
@ -29,8 +28,8 @@ fetchint(uint64 addr, int *ip)
int int
fetchaddr(uint64 addr, uint64 *ip) fetchaddr(uint64 addr, uint64 *ip)
{ {
struct proc *curproc = myproc(); struct proc *p = myproc();
if(addr >= curproc->sz || addr+sizeof(uint64) > curproc->sz) if(addr >= p->sz || addr+sizeof(uint64) > p->sz)
return -1; return -1;
*ip = *(uint64*)(addr); *ip = *(uint64*)(addr);
return 0; return 0;
@ -43,12 +42,12 @@ int
fetchstr(uint64 addr, char **pp) fetchstr(uint64 addr, char **pp)
{ {
char *s, *ep; char *s, *ep;
struct proc *curproc = myproc(); struct proc *p = myproc();
if(addr >= curproc->sz) if(addr >= p->sz)
return -1; return -1;
*pp = (char*)addr; *pp = (char*)addr;
ep = (char*)curproc->sz; ep = (char*)p->sz;
for(s = *pp; s < ep; s++){ for(s = *pp; s < ep; s++){
if(*s == 0) if(*s == 0)
return s - *pp; return s - *pp;
@ -59,20 +58,20 @@ fetchstr(uint64 addr, char **pp)
static uint64 static uint64
fetcharg(int n) fetcharg(int n)
{ {
struct proc *curproc = myproc(); struct proc *p = myproc();
switch (n) { switch (n) {
case 0: case 0:
return curproc->sf->rdi; return p->tf->a0;
case 1: case 1:
return curproc->sf->rsi; return p->tf->a1;
case 2: case 2:
return curproc->sf->rdx; return p->tf->a2;
case 3: case 3:
return curproc->sf->r10; return p->tf->a3;
case 4: case 4:
return curproc->sf->r8; return p->tf->a4;
case 5: case 5:
return curproc->sf->r9; return p->tf->a5;
} }
panic("fetcharg"); panic("fetcharg");
return -1; return -1;
@ -100,11 +99,11 @@ int
argptr(int n, char **pp, int size) argptr(int n, char **pp, int size)
{ {
uint64 i; uint64 i;
struct proc *curproc = myproc(); struct proc *p = myproc();
if(argaddr(n, &i) < 0) if(argaddr(n, &i) < 0)
return -1; return -1;
if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz)
return -1; return -1;
*pp = (char*)i; *pp = (char*)i;
return 0; return 0;
@ -149,48 +148,47 @@ static int (*syscalls[])(void) = {
[SYS_fork] sys_fork, [SYS_fork] sys_fork,
[SYS_exit] sys_exit, [SYS_exit] sys_exit,
[SYS_wait] sys_wait, [SYS_wait] sys_wait,
[SYS_pipe] sys_pipe, //[SYS_pipe] sys_pipe,
[SYS_read] sys_read, //[SYS_read] sys_read,
[SYS_kill] sys_kill, //[SYS_kill] sys_kill,
[SYS_exec] sys_exec, //[SYS_exec] sys_exec,
[SYS_fstat] sys_fstat, //[SYS_fstat] sys_fstat,
[SYS_chdir] sys_chdir, //[SYS_chdir] sys_chdir,
[SYS_dup] sys_dup, //[SYS_dup] sys_dup,
[SYS_getpid] sys_getpid, [SYS_getpid] sys_getpid,
[SYS_sbrk] sys_sbrk, //[SYS_sbrk] sys_sbrk,
[SYS_sleep] sys_sleep, //[SYS_sleep] sys_sleep,
[SYS_uptime] sys_uptime, //[SYS_uptime] sys_uptime,
[SYS_open] sys_open, //[SYS_open] sys_open,
[SYS_write] sys_write, //[SYS_write] sys_write,
[SYS_mknod] sys_mknod, //[SYS_mknod] sys_mknod,
[SYS_unlink] sys_unlink, //[SYS_unlink] sys_unlink,
[SYS_link] sys_link, //[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir, //[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close, //[SYS_close] sys_close,
}; };
static void static void
dosyscall(void) dosyscall(void)
{ {
int num; int num;
struct proc *curproc = myproc(); struct proc *p = myproc();
num = curproc->sf->rax; num = p->tf->a7;
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
curproc->sf->rax = syscalls[num](); p->tf->a0 = syscalls[num]();
} else { } else {
cprintf("%d %s: unknown sys call %d\n", printf("%d %s: unknown sys call %d\n",
curproc->pid, curproc->name, num); p->pid, p->name, num);
curproc->sf->rax = -1; p->tf->a0 = -1;
} }
} }
void void
syscall(struct sysframe *sf) syscall()
{ {
if(myproc()->killed) if(myproc()->killed)
exit(); exit();
myproc()->sf = sf;
dosyscall(); dosyscall();
if(myproc()->killed) if(myproc()->killed)
exit(); exit();

View file

@ -41,11 +41,11 @@ static int
fdalloc(struct file *f) fdalloc(struct file *f)
{ {
int fd; int fd;
struct proc *curproc = myproc(); struct proc *p = myproc();
for(fd = 0; fd < NOFILE; fd++){ for(fd = 0; fd < NOFILE; fd++){
if(curproc->ofile[fd] == 0){ if(p->ofile[fd] == 0){
curproc->ofile[fd] = f; p->ofile[fd] = f;
return fd; return fd;
} }
} }
@ -374,7 +374,7 @@ sys_chdir(void)
{ {
char *path; char *path;
struct inode *ip; struct inode *ip;
struct proc *curproc = myproc(); struct proc *p = myproc();
begin_op(); begin_op();
if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){
@ -388,9 +388,9 @@ sys_chdir(void)
return -1; return -1;
} }
iunlock(ip); iunlock(ip);
iput(curproc->cwd); iput(p->cwd);
end_op(); end_op();
curproc->cwd = ip; p->cwd = ip;
return 0; return 0;
} }

View file

@ -1,18 +1,11 @@
#include "types.h" #include "types.h"
#include "x86.h" #include "riscv.h"
#include "defs.h" #include "defs.h"
#include "date.h" #include "date.h"
#include "param.h" #include "param.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h"
#include "proc.h" #include "proc.h"
int
sys_fork(void)
{
return fork();
}
int int
sys_exit(void) sys_exit(void)
{ {
@ -20,12 +13,25 @@ sys_exit(void)
return 0; // not reached return 0; // not reached
} }
int
sys_getpid(void)
{
return myproc()->pid;
}
int
sys_fork(void)
{
return fork();
}
int int
sys_wait(void) sys_wait(void)
{ {
return wait(); return wait();
} }
#if 0
int int
sys_kill(void) sys_kill(void)
{ {
@ -36,12 +42,6 @@ sys_kill(void)
return kill(pid); return kill(pid);
} }
int
sys_getpid(void)
{
return myproc()->pid;
}
int int
sys_sbrk(void) sys_sbrk(void)
{ {
@ -89,3 +89,4 @@ sys_uptime(void)
release(&tickslock); release(&tickslock);
return xticks; return xticks;
} }
#endif

108
trampoline.S Normal file
View file

@ -0,0 +1,108 @@
#
# code to switch between user and kernel space.
#
# this code is mapped at the same virtual address
# in user and kernel space so that it can switch
# page tables.
#
# kernel.ld causes trampstart to be aligned
# to a page boundary.
#
.globl usertrap
.section trampoline
.globl trampstart
trampstart:
# switch from kernel to user.
# a0: p->tf in user page table
# a1: new value for satp, for user page table
# switch to user page table
csrw satp, a1
# put the saved user a0 in sscratch, so we
# can swap it with our a0 (p->tf) in the last step.
ld t0, 80(a0)
csrw sscratch, t0
# restore all but a0 from p->tf
ld ra, 32(a0)
ld sp, 40(a0)
ld gp, 48(a0)
ld tp, 56(a0)
ld t0, 64(a0)
ld t1, 72(a0)
ld t2, 80(a0)
ld a1, 96(a0)
ld a2, 104(a0)
ld a3, 112(a0)
ld a4, 120(a0)
ld a5, 128(a0)
ld a6, 136(a0)
ld a7, 144(a0)
ld t3, 152(a0)
ld t4, 160(a0)
ld t5, 168(a0)
ld t6, 176(a0)
# restore user a0, and save p->tf
csrrw a0, sscratch, a0
# return to user mode and user pc.
# caller has set up sstatus and sepc.
sret
#
# trap.c set stvec to point here, so
# interrupts and exceptions start here,
# in supervisor mode, but with a
# user page table.
#
# sscratch points to where the process's p->tf is
# mapped into user space (TRAMPOLINE - 4096).
#
.align 4
.globl trampvec
trampvec:
# swap a0 and sscratch
# so that a0 is p->tf
csrrw a0, sscratch, a0
# save the user registers in p->tf
sd ra, 32(a0)
sd sp, 40(a0)
sd gp, 48(a0)
sd tp, 56(a0)
sd t0, 64(a0)
sd t1, 72(a0)
sd t2, 80(a0)
sd a1, 96(a0)
sd a2, 104(a0)
sd a3, 112(a0)
sd a4, 120(a0)
sd a5, 128(a0)
sd a6, 136(a0)
sd a7, 144(a0)
sd t3, 152(a0)
sd t4, 160(a0)
sd t5, 168(a0)
sd t6, 176(a0)
# save the user a0 in p->tf->a0
csrr t0, sscratch
sd t0, 80(a0)
# restore kernel stack pointer from p->tf->kernel_sp
ld sp, 8(a0)
# remember the address of usertrap(), p->tf->kernel_trap
ld t0, 16(a0)
# restore kernel page table from p->tf->kernel_satp
ld t1, 0(a0)
csrw satp, t1
# a0 is no longer valid, since the kernel page
# table does not specially map p->td.
# jump to usertrap(), which does not return
jr t0

168
trap.c
View file

@ -1,109 +1,113 @@
#include "types.h" #include "types.h"
#include "defs.h"
#include "param.h" #include "param.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h" #include "riscv.h"
#include "proc.h" #include "proc.h"
#include "x86.h"
#include "traps.h"
#include "spinlock.h" #include "spinlock.h"
#include "defs.h"
// Interrupt descriptor table (shared by all CPUs).
struct intgate idt[256];
extern uint64 vectors[]; // in vectors.S: array of 256 entry pointers
struct spinlock tickslock; struct spinlock tickslock;
uint ticks; uint ticks;
extern char trampstart[], trampvec[];
void kerneltrap();
void void
tvinit(void) trapinit(void)
{ {
int i; int i;
for(i=0; i<256; i++) { // send interrupts and exceptions to kerneltrap().
idt[i] = INTDESC(SEG_KCODE, vectors[i], INT_P | SEG_INTR64); w_stvec((uint64)kerneltrap);
}
idtinit();
initlock(&tickslock, "time"); initlock(&tickslock, "time");
} }
//
// handle an interrupt, exception, or system call from user space.
// called from trampoline.S
//
void void
idtinit(void) usertrap(void)
{ {
struct desctr dtr; if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
dtr.limit = sizeof(idt) - 1; // send interrupts and exceptions to kerneltrap(),
dtr.base = (uint64)idt; // since we're now in the kernel.
lidt((void *)&dtr.limit); w_stvec((uint64)kerneltrap);
}
//PAGEBREAK: 41 struct proc *p = myproc();
void
trap(struct trapframe *tf)
{
switch(tf->trapno){
case T_IRQ0 + IRQ_TIMER:
if(cpuid() == 0){
acquire(&tickslock);
ticks++;
wakeup(&ticks);
release(&tickslock);
}
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE:
ideintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE+1:
// Bochs generates spurious IDE1 interrupts.
break;
case T_IRQ0 + IRQ_KBD:
kbdintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_COM1:
uartintr();
lapiceoi();
break;
case T_IRQ0 + 7:
case T_IRQ0 + IRQ_SPURIOUS:
cprintf("cpu%d: spurious interrupt at %x:%x\n",
cpuid(), tf->cs, tf->rip);
lapiceoi();
break;
//PAGEBREAK: 13 // save user program counter.
default: p->tf->epc = r_sepc();
if(myproc() == 0 || (tf->cs&3) == 0){
// In kernel, it must be our mistake. if(r_scause() == 8){
cprintf("unexpected trap %d from cpu %d rip %x (cr2=0x%x)\n", // system call
tf->trapno, cpuid(), tf->rip, rcr2()); printf("usertrap(): system call pid=%d syscall=%d\n", p->pid, p->tf->a7);
panic("trap");
} // sepc points to the ecall instruction,
// In user space, assume process misbehaved. // but we want to return to the next instruction.
cprintf("pid %d %s: trap %d err %d on cpu %d " p->tf->epc += 4;
"rip 0x%x addr 0x%x--kill proc\n",
myproc()->pid, myproc()->name, tf->trapno, syscall();
tf->err, cpuid(), tf->rip, rcr2()); } else {
myproc()->killed = 1; printf("usertrap(): unexpected scause 0x%x pid=%d\n", r_scause(), p->pid);
panic("usertrap");
} }
// Force process exit if it has been killed and is in user space. usertrapret();
// (If it is still executing in the kernel, let it keep running
// until it gets to the regular system call return.)
if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER)
exit();
// Force process to give up CPU on clock tick.
// If interrupts were on while locks held, would need to check nlock.
if(myproc() && myproc()->state == RUNNING &&
tf->trapno == T_IRQ0+IRQ_TIMER)
yield();
// Check if the process has been killed since we yielded
if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER)
exit();
} }
//
// return to user space
//
void
usertrapret(void)
{
struct proc *p = myproc();
// XXX turn off interrupts, since we're switching
// now from kerneltrap() to usertrap().
// send interrupts and exceptions to trampoline.S
w_stvec(TRAMPOLINE + (trampvec - trampstart));
// set up values that trampoline.S will need when
// the process next re-enters the kernel.
p->tf->kernel_satp = r_satp();
p->tf->kernel_sp = (uint64)p->kstack + PGSIZE;
p->tf->kernel_trap = (uint64)usertrap;
// set up the registers that trampoline.S's sret will use
// to get to user space.
// set S Previous Privilege mode to User.
unsigned long x = r_sstatus();
x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
w_sstatus(x);
// set S Exception Program Counter to the saved user pc.
w_sepc(p->tf->epc);
// tell trampline.S the user page table to switch to.
uint64 satp = MAKE_SATP(p->pagetable);
// jump to trampoline.S at the top of memory, which
// switches to the user page table, restores user registers,
// and switches to user mode with sret.
((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp);
}
// interrupts and exceptions from kernel code go here,
// on whatever the current kernel stack is.
// must be 4-byte aligned to fit in stvec.
void __attribute__ ((aligned (4)))
kerneltrap()
{
if((r_sstatus() & SSTATUS_SPP) == 0)
panic("kerneltrap: not from supervisor mode");
panic("kerneltrap");
}

36
traps.h
View file

@ -1,36 +0,0 @@
// x86 trap and interrupt constants.
// Processor-defined:
#define T_DIVIDE 0 // divide error
#define T_DEBUG 1 // debug exception
#define T_NMI 2 // non-maskable interrupt
#define T_BRKPT 3 // breakpoint
#define T_OFLOW 4 // overflow
#define T_BOUND 5 // bounds check
#define T_ILLOP 6 // illegal opcode
#define T_DEVICE 7 // device not available
#define T_DBLFLT 8 // double fault
// #define T_COPROC 9 // reserved (not used since 486)
#define T_TSS 10 // invalid task switch segment
#define T_SEGNP 11 // segment not present
#define T_STACK 12 // stack exception
#define T_GPFLT 13 // general protection fault
#define T_PGFLT 14 // page fault
// #define T_RES 15 // reserved
#define T_FPERR 16 // floating point error
#define T_ALIGN 17 // aligment check
#define T_MCHK 18 // machine check
#define T_SIMDERR 19 // SIMD floating point error
#define T_DEFAULT 500 // catchall
#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ
#define IRQ_TIMER 0
#define IRQ_KBD 1
#define IRQ_COM1 4
#define IRQ_IDE 14
#define IRQ_ERROR 19
#define IRQ_SPURIOUS 31

74
uart.c
View file

@ -1,77 +1,51 @@
// Intel 8250 serial port (UART). #include "memlayout.h"
#include "types.h" //
#include "defs.h" // qemu -machine virt has a 16550a UART
#include "param.h" // qemu/hw/riscv/virt.c
#include "traps.h" // http://byterunner.com/16550.html
#include "spinlock.h" //
#include "sleeplock.h" // caller should lock.
#include "fs.h" //
#include "file.h"
#include "mmu.h"
#include "proc.h"
#include "x86.h"
#define COM1 0x3f8 // address of one of the registers
#define R(reg) ((unsigned int*)(UART0 + 4*(reg)))
static int uart; // is there a uart?
void void
uartinit(void) uartinit(void)
{ {
char *p; // disable interrupts
*R(1) = 0x00;
// Turn off the FIFO // special mode to set baud rate
outb(COM1+2, 0); *R(3) = 0x80;
// 9600 baud, 8 data bits, 1 stop bit, parity off. // LSB for baud rate of 38.4K
outb(COM1+3, 0x80); // Unlock divisor *R(0) = 0x03;
outb(COM1+0, 115200/9600);
outb(COM1+1, 0);
outb(COM1+3, 0x03); // Lock divisor, 8 data bits.
outb(COM1+4, 0);
outb(COM1+1, 0x01); // Enable receive interrupts.
// If status is 0xFF, no serial port. // MSB for baud rate of 38.4K
if(inb(COM1+5) == 0xFF) *R(1) = 0x00;
return;
uart = 1;
// Acknowledge pre-existing interrupt conditions; // leave set-baud mode,
// enable interrupts. // and set word length to 8 bits, no parity.
inb(COM1+2); *R(3) = 0x03;
inb(COM1+0);
ioapicenable(IRQ_COM1, 0);
// Announce that we're here. // reset and enable FIFOs.
for(p="xv6...\n"; *p; p++) *R(2) = 0x07;
uartputc(*p);
} }
void void
uartputc(int c) uartputc(int c)
{ {
int i; *R(0) = c;
if(!uart)
return;
for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++)
microdelay(10);
outb(COM1+0, c);
} }
static int static int
uartgetc(void) uartgetc(void)
{ {
if(!uart)
return -1;
if(!(inb(COM1+5) & 0x01))
return -1;
return inb(COM1+0);
} }
void void
uartintr(void) uartintr(void)
{ {
consoleintr(uartgetc);
} }

496
vm.c
View file

@ -1,230 +1,162 @@
#include "param.h" #include "param.h"
#include "types.h" #include "types.h"
#include "defs.h"
#include "x86.h"
#include "msr.h"
#include "memlayout.h" #include "memlayout.h"
#include "mmu.h"
#include "proc.h"
#include "elf.h" #include "elf.h"
#include "traps.h" #include "riscv.h"
#include "defs.h"
extern char data[]; // defined by kernel.ld /*
void sysentry(void); * the kernel's page table.
*/
pagetable_t kernel_pagetable;
static pde_t *kpml4; // kernel address space, used by scheduler and bootup extern char etext[]; // kernel.ld sets this to end of kernel code.
// Bootstrap GDT. Used by boot.S but defined in C extern char trampstart[]; // trampoline.S
// Map "logical" addresses to virtual addresses using identity map.
// Cannot share a CODE descriptor for both kernel and user
// because it would have to have DPL_USR, but the CPU forbids
// an interrupt from CPL=0 to DPL=3.
struct segdesc bootgdt[NSEGS] = {
[0] = SEGDESC(0, 0, 0), // null
[1] = SEGDESC(0, 0xfffff, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // 32-bit kernel code
[2] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_L|SEG_G), // 64-bit kernel code
[3] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // kernel data
// The order of the user data and user code segments is
// important for syscall instructions. See initseg.
[6] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(3)|SEG_P|SEG_D|SEG_G), // 64-bit user data
[7] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(3)|SEG_P|SEG_L|SEG_G), // 64-bit user code
};
/*
// Set up CPU's kernel segment descriptors. * create a direct-map page table for the kernel and
// Run once on entry on each CPU. * turn on paging. called early, in supervisor mode.
* the page allocator is already initialized.
*/
void void
seginit(void) kvminit()
{ {
struct cpu *c; kernel_pagetable = (pagetable_t) kalloc();
struct desctr dtr; memset(kernel_pagetable, 0, PGSIZE);
c = getmycpu(); // uart registers
mappages(kernel_pagetable, UART0, PGSIZE,
UART0, PTE_R | PTE_W);
memmove(c->gdt, bootgdt, sizeof bootgdt); // map kernel text executable and read-only.
dtr.limit = sizeof(c->gdt)-1; mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE,
dtr.base = (uint64) c->gdt; KERNBASE, PTE_R | PTE_X);
lgdt((void *)&dtr.limit);
// When executing a syscall instruction the CPU sets the SS selector // map kernel data and the physical RAM we'll make use of.
// to (star >> 32) + 8 and the CS selector to (star >> 32). mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext,
// When executing a sysret instruction the CPU sets the SS selector (uint64)etext, PTE_R | PTE_W);
// to (star >> 48) + 8 and the CS selector to (star >> 48) + 16.
uint64 star = ((((uint64)SEG_UCODE|0x3)- 16)<<48)|((uint64)(SEG_KCODE)<<32);
writemsr(MSR_STAR, star);
writemsr(MSR_LSTAR, (uint64)&sysentry);
writemsr(MSR_SFMASK, FL_TF | FL_IF);
// Initialize cpu-local storage so that each core can easily // map the trampoline for trap entry/exit to
// find its struct cpu using %gs. // the highest virtual address in the kernel.
writegs(SEG_KDATA); mappages(kernel_pagetable, TRAMPOLINE, PGSIZE,
writemsr(MSR_GS_BASE, (uint64)c); (uint64)trampstart, PTE_R | PTE_X);
writemsr(MSR_GS_KERNBASE, (uint64)c);
c->cpu = c; kvmswitch();
} }
// Return the address of the PTE in page table pgdir // Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvmswitch(void)
{
w_satp(MAKE_SATP(kernel_pagetable));
}
// Return the address of the PTE in page table pagetable
// that corresponds to virtual address va. If alloc!=0, // that corresponds to virtual address va. If alloc!=0,
// create any required page table pages. // create any required page table pages.
//
// The risc-v Sv39 scheme has three levels of page table
// pages. A page table page contains 512 64-bit PTEs.
// A 64-bit virtual address is split into five fields:
// 39..63 -- must be zero.
// 30..38 -- 9 bits of level-2 index.
// 21..39 -- 9 bits of level-1 index.
// 12..20 -- 9 bits of level-0 index.
// 0..12 -- 12 bits of byte offset within the page.
static pte_t * static pte_t *
walkpgdir(pde_t *pml4, const void *va, int alloc) walk(pagetable_t pagetable, const void *va, int alloc)
{ {
pde_t *pgdir = pml4; if((uint64)va >= MAXVA)
pde_t *pde; panic("walk");
int level;
for (level = L_PML4; level > 0; level--) { for(int level = 2; level > 0; level--) {
pde = &pgdir[PX(level, va)]; pte_t *pte = &pagetable[PX(level, va)];
if(*pde & PTE_P) if(*pte & PTE_V) {
pgdir = (pte_t*)P2V(PTE_ADDR(*pde)); pagetable = (pagetable_t)PTE2PA(*pte);
else { } else {
if(!alloc || (pgdir = (pde_t*)kalloc()) == 0) if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
return 0; return 0;
memset(pgdir, 0, PGSIZE); memset(pagetable, 0, PGSIZE);
*pde = V2P(pgdir) | PTE_P | PTE_W | PTE_U; *pte = PA2PTE(pagetable) | PTE_V;
} }
} }
return &pgdir[PX(level, va)]; return &pagetable[PX(0, va)];
} }
// Create PTEs for virtual addresses starting at va that refer to // Create PTEs for virtual addresses starting at va that refer to
// physical addresses starting at pa. va and size might not // physical addresses starting at pa. va and size might not
// be page-aligned. // be page-aligned.
static int void
mappages(pde_t *pgdir, void *va, uint64 size, uint64 pa, int perm) mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{ {
char *a, *last; char *a, *last;
pte_t *pte; pte_t *pte;
a = (char*)PGROUNDDOWN((uint64)va); a = (char*)PGROUNDDOWN(va);
last = (char*)PGROUNDDOWN(((uint64)va) + size - 1); last = (char*)PGROUNDDOWN(va + size - 1);
for(;;){ for(;;){
if((pte = walkpgdir(pgdir, a, 1)) == 0) if((pte = walk(pagetable, a, 1)) == 0)
return -1; panic("mappages: walk");
if(*pte & PTE_P) if(*pte & PTE_V)
panic("remap"); panic("remap");
*pte = pa | perm | PTE_P; *pte = PA2PTE(pa) | perm | PTE_V;
if(a == last) if(a == last)
break; break;
a += PGSIZE; a += PGSIZE;
pa += PGSIZE; pa += PGSIZE;
} }
return 0;
} }
// There is one page table per process, plus one that's used when // Remove mappings from a page table. The mappings in
// a CPU is not running any process (kpml4). The kernel uses the // the given range must exist. Optionally free the
// current process's page table during system calls and interrupts; // physical memory.
// page protection bits prevent user code from using the kernel's void
// mappings. unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free)
//
// setupkvm() and exec() set up every page table like this:
//
// 0..KERNBASE: user memory (text+data+stack+heap), mapped to
// phys memory allocated by the kernel
// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space)
// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data)
// for the kernel's instructions and r/o data
// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP,
// rw data + free physical memory
// 0xfe000000..0: mapped direct (devices such as ioapic)
//
// The kernel allocates physical memory for its heap and for user memory
// between V2P(end) and the end of physical memory (PHYSTOP)
// (directly addressable from end..P2V(PHYSTOP)).
// This table defines the kernel's mappings, which are present in
// every process's page table.
static struct kmap {
void *virt;
uint64 phys_start;
uint64 phys_end;
int perm;
} kmap[] = {
{ (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
{ (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
{ (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
{ (void*)P2V(DEVSPACE), DEVSPACE, DEVSPACETOP, PTE_W}, // more devices
};
// Set up kernel part of a page table.
pde_t*
setupkvm(void)
{ {
pde_t *pml4; char *a, *last;
struct kmap *k; pte_t *pte;
uint64 pa;
if((pml4 = (pde_t*)kalloc()) == 0) a = (char*)PGROUNDDOWN(va);
return 0; last = (char*)PGROUNDDOWN(va + size - 1);
memset(pml4, 0, PGSIZE); for(;;){
if (PHYSTOP > DEVSPACE) if((pte = walk(pagetable, a, 0)) == 0)
panic("PHYSTOP too high"); panic("unmappages: walk");
for(k = kmap; k < &kmap[NELEM(kmap)]; k++) { if((*pte & PTE_V) == 0)
if(mappages(pml4, k->virt, k->phys_end - k->phys_start, panic("unmappages: not mapped");
(uint)k->phys_start, k->perm) < 0) { if(PTE_FLAGS(*pte) == PTE_V)
freevm(pml4, 0); panic("unmappages: not a leaf");
return 0; if(do_free){
pa = PTE2PA(*pte);
kfree((void*)pa);
} }
*pte = 0;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
} }
return pml4;
} }
// Allocate one page table for the machine for the kernel address // create an empty user page table.
// space for scheduler processes. pagetable_t
void uvmcreate()
kvmalloc(void)
{ {
kpml4 = setupkvm(); pagetable_t pagetable;
switchkvm(); pagetable = (pagetable_t) kalloc();
if(pagetable == 0)
panic("uvmcreate: out of memory");
memset(pagetable, 0, PGSIZE);
return pagetable;
} }
// Switch h/w page table register to the kernel-only page table, // Load the user initcode into address 0 of pagetable,
// for when no process is running. // for the very first process.
void
switchkvm(void)
{
lcr3(V2P(kpml4)); // switch to the kernel page table
}
// Switch TSS and h/w page table to correspond to process p.
void
switchuvm(struct proc *p)
{
struct desctr dtr;
struct cpu *c;
if(p == 0)
panic("switchuvm: no process");
if(p->kstack == 0)
panic("switchuvm: no kstack");
if(p->pgdir == 0)
panic("switchuvm: no pgdir");
pushcli();
c = mycpu();
uint64 base = (uint64) &(c->ts);
c->gdt[SEG_TSS>>3] = SEGDESC(base, (sizeof(c->ts)-1), SEG_P|SEG_TSS64A);
c->gdt[(SEG_TSS>>3)+1] = SEGDESCHI(base);
c->ts.rsp[0] = (uint64) p->kstack + KSTACKSIZE;
c->ts.iomba = (ushort) 0xFFFF;
dtr.limit = sizeof(c->gdt) - 1;
dtr.base = (uint64)c->gdt;
lgdt((void *)&dtr.limit);
ltr(SEG_TSS);
lcr3(V2P(p->pgdir)); // switch to process's address space
popcli();
}
// Load the initcode into address 0 of pgdir.
// sz must be less than a page. // sz must be less than a page.
void void
inituvm(pde_t *pgdir, char *init, uint sz) uvminit(pagetable_t pagetable, char *src, uint sz)
{ {
char *mem; char *mem;
@ -232,63 +164,8 @@ inituvm(pde_t *pgdir, char *init, uint sz)
panic("inituvm: more than a page"); panic("inituvm: more than a page");
mem = kalloc(); mem = kalloc();
memset(mem, 0, PGSIZE); memset(mem, 0, PGSIZE);
mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
memmove(mem, init, sz); memmove(mem, src, sz);
}
// Load a program segment into pgdir. addr must be page-aligned
// and the pages from addr to addr+sz must already be mapped.
int
loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
{
uint i, n;
uint64 pa;
pte_t *pte;
if((uint64) addr % PGSIZE != 0)
panic("loaduvm: addr must be page aligned");
for(i = 0; i < sz; i += PGSIZE){
if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
panic("loaduvm: address should exist");
pa = PTE_ADDR(*pte);
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, P2V(pa), offset+i, n) != n)
return -1;
}
return 0;
}
// Allocate page tables and physical memory to grow process from oldsz to
// newsz, which need not be page aligned. Returns new size or 0 on error.
int
allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
{
char *mem;
uint64 a;
if(newsz >= KERNBASE)
return 0;
if(newsz < oldsz)
return oldsz;
a = PGROUNDUP(oldsz);
for(; a < newsz; a += PGSIZE){
mem = kalloc();
if(mem == 0){
deallocuvm(pgdir, newsz, oldsz);
return 0;
}
memset(mem, 0, PGSIZE);
if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){
deallocuvm(pgdir, newsz, oldsz);
kfree(mem);
return 0;
}
}
return newsz;
} }
// Deallocate user pages to bring the process size from oldsz to // Deallocate user pages to bring the process size from oldsz to
@ -296,153 +173,66 @@ allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
// need to be less than oldsz. oldsz can be larger than the actual // need to be less than oldsz. oldsz can be larger than the actual
// process size. Returns the new process size. // process size. Returns the new process size.
int int
deallocuvm(pde_t *pml4, uint64 oldsz, uint64 newsz) uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{ {
pte_t *pte;
uint64 a, pa;
if(newsz >= oldsz) if(newsz >= oldsz)
return oldsz; return oldsz;
unmappages(pagetable, newsz, oldsz - newsz, 1);
a = PGROUNDUP(newsz);
for(; a < oldsz; a += PGSIZE){
pte = walkpgdir(pml4, (char*)a, 0);
if(!pte)
continue;
else if((*pte & PTE_P) != 0){
pa = PTE_ADDR(*pte);
if(pa == 0)
panic("kfree");
char *v = P2V(pa);
kfree(v);
*pte = 0;
}
}
return newsz; return newsz;
} }
// Recursively free a page table // Recursively free page table pages.
void // All leaf mappings must already have been removed.
freelevel(pde_t *pgtab, int level) { static void
int i; freewalk(pagetable_t pagetable)
pde_t *pd; {
// there are 2^9 = 512 PTEs in a page table.
if (level > 0) { for(int i = 0; i < 512; i++){
for(i = 0; i < NPDENTRIES; i++) { pte_t pte = pagetable[i];
if(pgtab[i] & PTE_P){ if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
pd = (pde_t*)P2V(PTE_ADDR(pgtab[i])); // this PTE points to a lower-level page table.
freelevel(pd, level-1); uint64 child = PTE2PA(pte);
} freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
// XXX trampoline pages...
panic("freewalk: leaf");
} }
} }
kfree((char*)pgtab); kfree((void*)pagetable);
} }
// Free all the physical memory pages // Free user memory pages,
// in the user part and page table // then free page table pages.
void void
freevm(pde_t *pml4, uint64 sz) uvmfree(pagetable_t pagetable, uint64 sz)
{ {
if(pml4 == 0) unmappages(pagetable, 0, sz, 1);
panic("freevm: no pgdir"); freewalk(pagetable);
deallocuvm(pml4, sz, 0);
freelevel(pml4, L_PML4);
} }
// Clear PTE_U on a page. Used to create an inaccessible // Given a parent process's page table, copy
// page beneath the user stack. // its memory into a child's page table.
// Copies both the page table and the
// physical memory.
void void
clearpteu(pde_t *pgdir, char *uva) uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{ {
pte_t *pte; pte_t *pte;
pte = walkpgdir(pgdir, uva, 0);
if(pte == 0)
panic("clearpteu");
*pte &= ~PTE_U;
}
// Given a parent process's page table, create a copy
// of it for a child.
pde_t*
copyuvm(pde_t *pgdir, uint sz)
{
pde_t *d;
pte_t *pte;
uint64 pa, i; uint64 pa, i;
uint flags; uint flags;
char *mem; char *mem;
if((d = setupkvm()) == 0)
return 0;
for(i = 0; i < sz; i += PGSIZE){ for(i = 0; i < sz; i += PGSIZE){
if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) if((pte = walk(old, (void *) i, 0)) == 0)
panic("copyuvm: pte should exist"); panic("copyuvm: pte should exist");
if(!(*pte & PTE_P)) if((*pte & PTE_V) == 0)
panic("copyuvm: page not present"); panic("copyuvm: page not present");
pa = PTE_ADDR(*pte); pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte); flags = PTE_FLAGS(*pte);
if((mem = kalloc()) == 0) if((mem = kalloc()) == 0)
goto bad; panic("uvmcopy: kalloc failed");
memmove(mem, (char*)P2V(pa), PGSIZE); memmove(mem, (char*)pa, PGSIZE);
if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { mappages(new, i, PGSIZE, (uint64)mem, flags);
kfree(mem);
goto bad;
}
} }
return d;
bad:
freevm(d, sz);
return 0;
} }
//PAGEBREAK!
// Map user virtual address to kernel address.
char*
uva2ka(pde_t *pgdir, char *uva)
{
pte_t *pte;
pte = walkpgdir(pgdir, uva, 0);
if((*pte & PTE_P) == 0)
return 0;
if((*pte & PTE_U) == 0)
return 0;
return (char*)P2V(PTE_ADDR(*pte));
}
// Copy len bytes from p to user address va in page table pgdir.
// Most useful when pgdir is not the current page table.
// uva2ka ensures this only works for PTE_U pages.
int
copyout(pde_t *pgdir, uint va, void *p, uint len)
{
char *buf, *pa0;
uint64 n, va0;
buf = (char*)p;
while(len > 0){
va0 = (uint)PGROUNDDOWN(va);
pa0 = uva2ka(pgdir, (char*)va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (va - va0);
if(n > len)
n = len;
memmove(pa0 + (va - va0), buf, n);
len -= n;
buf += n;
va = va0 + PGSIZE;
}
return 0;
}
//PAGEBREAK!
// Blank page.
//PAGEBREAK!
// Blank page.
//PAGEBREAK!
// Blank page.

198
x86.h
View file

@ -1,198 +0,0 @@
// Routines to let C code use special x86 instructions.
#ifndef __ASSEMBLER__
static inline uchar
inb(ushort port)
{
uchar data;
asm volatile("in %1,%0" : "=a" (data) : "d" (port));
return data;
}
static inline void
insl(int port, void *addr, int cnt)
{
asm volatile("cld; rep insl" :
"=D" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"memory", "cc");
}
static inline void
outb(ushort port, uchar data)
{
asm volatile("out %0,%1" : : "a" (data), "d" (port));
}
static inline void
outw(ushort port, ushort data)
{
asm volatile("out %0,%1" : : "a" (data), "d" (port));
}
static inline void
outsl(int port, const void *addr, int cnt)
{
asm volatile("cld; rep outsl" :
"=S" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"cc");
}
static inline void
stosb(void *addr, int data, int cnt)
{
asm volatile("cld; rep stosb" :
"=D" (addr), "=c" (cnt) :
"0" (addr), "1" (cnt), "a" (data) :
"memory", "cc");
}
static inline void
stosl(void *addr, int data, int cnt)
{
asm volatile("cld; rep stosl" :
"=D" (addr), "=c" (cnt) :
"0" (addr), "1" (cnt), "a" (data) :
"memory", "cc");
}
static inline void
lgdt(void *p)
{
asm volatile("lgdt (%0)" : : "r" (p) : "memory");
}
static inline void
lidt(void *p)
{
asm volatile("lidt (%0)" : : "r" (p) : "memory");
}
static inline void
ltr(ushort sel)
{
asm volatile("ltr %0" : : "r" (sel));
}
static inline uint64
readeflags(void)
{
uint64 eflags;
asm volatile("pushf; pop %0" : "=r" (eflags));
return eflags;
}
static inline void
loadgs(ushort v)
{
asm volatile("movw %0, %%gs" : : "r" (v));
}
static inline void
cli(void)
{
asm volatile("cli");
}
static inline void
sti(void)
{
asm volatile("sti");
}
static inline uint
xchg(volatile uint *addr, uint newval)
{
uint result;
// The + in "+m" denotes a read-modify-write operand.
asm volatile("lock; xchgl %0, %1" :
"+m" (*addr), "=a" (result) :
"1" (newval) :
"cc");
return result;
}
static inline uint
rcr2(void)
{
uint64 val;
asm volatile("mov %%cr2,%0" : "=r" (val));
return val;
}
static inline void
lcr3(uint64 val)
{
asm volatile("mov %0,%%cr3" : : "r" (val));
}
static inline void
writegs(uint16 v)
{
__asm volatile("movw %0, %%gs" : : "r" (v));
}
//PAGEBREAK: 36
// Layout of the trap frame built on the stack by the
// hardware and by trapasm.S, and passed to trap().
struct trapframe {
uint64 rax;
uint64 rbx;
uint64 rcx;
uint64 rdx;
uint64 rbp;
uint64 rsi;
uint64 rdi;
uint64 r8;
uint64 r9;
uint64 r10;
uint64 r11;
uint64 r12;
uint64 r13;
uint64 r14;
uint64 r15;
uint64 trapno;
uint64 err;
uint64 rip;
uint16 cs;
uint16 padding[3];
uint64 rflags;
uint64 rsp;
uint64 ss;
}__attribute__((packed));
struct sysframe {
// arguments
uint64 rdi;
uint64 rsi;
uint64 rdx;
uint64 r10;
uint64 r8;
uint64 r9;
// callee-saved registers
uint64 r15;
uint64 r14;
uint64 r13;
uint64 r12;
uint64 rbx;
uint64 rbp;
// return value
uint64 rax;
// syscall registers
uint64 r11; // eflags
uint64 rcx; // rip
uint64 rsp;
}__attribute__((packed));
#endif
#define TF_CS 144 // offset in trapframe for saved cs