fork/wait/exit work

This commit is contained in:
Robert Morris 2019-05-31 09:45:59 -04:00
parent 0f90388c89
commit 2ec1959fd1
30 changed files with 1098 additions and 1863 deletions

3
.gdbinit.tmpl-riscv Normal file
View file

@ -0,0 +1,3 @@
set architecture riscv
target remote 127.0.0.1:1234
symbol-file kernel

101
Makefile
View file

@ -1,4 +1,20 @@
OBJS = \
start.o \
console.o \
uart.o \
kalloc.o \
spinlock.o \
string.o \
main.o \
vm.o \
proc.o \
swtch.o \
trampoline.o \
trap.o \
syscall.o \
sysproc.o
XXXOBJS = \
bio.o\
console.o\
exec.o\
@ -28,48 +44,23 @@ OBJS = \
vectors.o\
vm.o\
# Cross-compiling (e.g., on Mac OS X)
# TOOLPREFIX = i386-jos-elf
# Using native tools (e.g., on X86 Linux)
# riscv64-unknown-elf- or riscv64-linux-gnu-
# perhaps in /opt/riscv/bin
#TOOLPREFIX =
# Try to infer the correct TOOLPREFIX if not set
ifndef TOOLPREFIX
TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \
then echo 'i386-jos-elf-'; \
elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \
then echo ''; \
TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \
then echo 'riscv64-unknown-elf-'; \
elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \
then echo 'riscv64-linux-gnu-'; \
else echo "***" 1>&2; \
echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \
echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \
echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \
echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \
echo "*** environment variable to that prefix and run 'make' again." 1>&2; \
echo "*** Error: Couldn't find an riscv64 version of GCC/binutils." 1>&2; \
echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \
echo "***" 1>&2; exit 1; fi)
endif
# If the makefile can't find QEMU, specify its path here
QEMU = qemu-system-x86_64
# Try to infer the correct QEMU
ifndef QEMU
QEMU = $(shell if which qemu > /dev/null; \
then echo qemu; exit; \
elif which qemu-system-i386 > /dev/null; \
then echo qemu-system-i386; exit; \
elif which qemu-system-x86_64 > /dev/null; \
then echo qemu-system-x86_64; exit; \
else \
qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \
if test -x $$qemu; then echo $$qemu; exit; fi; fi; \
echo "***" 1>&2; \
echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \
echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \
echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \
echo "***" 1>&2; exit 1)
endif
QEMU = qemu-system-riscv64
CC = $(TOOLPREFIX)gcc
AS = $(TOOLPREFIX)gas
@ -77,15 +68,10 @@ LD = $(TOOLPREFIX)ld
OBJCOPY = $(TOOLPREFIX)objcopy
OBJDUMP = $(TOOLPREFIX)objdump
XFLAGS = -m64 -mcmodel=large -ggdb
# CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -Werror -fno-omit-frame-pointer
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer
CFLAGS += -ffreestanding -fno-common -nostdlib $(XFLAGS)
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -O
CFLAGS = -mcmodel=medany
CFLAGS += -ffreestanding -fno-common -nostdlib -mno-relax
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
ASFLAGS = -gdwarf-2 -Wa,-divide $(XFLAGS)
# FreeBSD ld wants ``elf_i386_fbsd''
LDFLAGS += -m $(shell $(LD) -V | grep elf_x86_64 2>/dev/null | head -n 1)
LDFLAGS += -z max-page-size=4096
# Disable PIE when possible (for Ubuntu 16.10 toolchain)
ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),)
@ -95,21 +81,17 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),)
CFLAGS += -fno-pie -nopie
endif
kernel: $(OBJS) entry.o entryother initcode kernel.ld
$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother
LDFLAGS = -z max-page-size=4096
kernel: $(OBJS) entry.o kernel.ld
$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS)
$(OBJDUMP) -S kernel > kernel.asm
$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym
entryother: entryother.S
$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S
$(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o
$(OBJCOPY) -S -O binary -j .text bootblockother.o entryother
$(OBJDUMP) -S bootblockother.o > entryother.asm
initcode: initcode.S
$(CC) $(CFLAGS) -nostdinc -I. -c initcode.S
$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o
$(OBJCOPY) -S -O binary initcode.out initcode
#$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o
#$(OBJCOPY) -S -O binary initcode.out initcode
$(OBJDUMP) -S initcode.o > initcode.asm
tags: $(OBJS) entryother.S _init
@ -186,19 +168,18 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \
then echo "-gdb tcp::$(GDBPORT)"; \
else echo "-s -p $(GDBPORT)"; fi)
ifndef CPUS
CPUS := 2
CPUS := 1
endif
QEMUOPTS = -kernel kernel -drive file=fs.img,index=1,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA)
qemu: fs.img
$(QEMU) -serial mon:stdio $(QEMUOPTS)
QEMUOPTS = -machine virt -kernel kernel -m 3G -smp $(CPUS) -nographic
#QEMUOPTS += -initrd fs.img
qemu-nox: fs.img kernel
$(QEMU) -nographic $(QEMUOPTS)
qemu: kernel
$(QEMU) $(QEMUOPTS)
.gdbinit: .gdbinit.tmpl-x64
sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@
.gdbinit: .gdbinit.tmpl-riscv
sed "s/:1234/:$(GDBPORT)/" < $^ > $@
qemu-gdb: fs.img kernel .gdbinit
qemu-gdb: kernel .gdbinit
@echo "*** Now run 'gdb'." 1>&2
$(QEMU) $(QEMUOPTS) -S $(QEMUGDB)

189
console.c
View file

@ -5,17 +5,14 @@
#include <stdarg.h>
#include "types.h"
#include "defs.h"
#include "param.h"
#include "traps.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "file.h"
#include "memlayout.h"
#include "mmu.h"
#include "proc.h"
#include "x86.h"
#include "riscv.h"
#include "defs.h"
static void consputc(int);
@ -28,6 +25,12 @@ static struct {
static char digits[] = "0123456789abcdef";
void
consoleinit(void)
{
initlock(&cons.lock, "console");
}
static void
printint(int xx, int base, int sign)
{
@ -66,7 +69,7 @@ printptr(uint64 x) {
// Print to the console. only understands %d, %x, %p, %s.
void
cprintf(char *fmt, ...)
printf(char *fmt, ...)
{
va_list ap;
int i, c, locking;
@ -122,67 +125,20 @@ cprintf(char *fmt, ...)
void
panic(char *s)
{
int i;
uint64 pcs[10];
cli();
cons.locking = 0;
// use lapiccpunum so that we can call panic from mycpu()
cprintf("lapicid %d: panic: ", lapicid());
cprintf(s);
cprintf("\n");
getcallerpcs(&s, pcs);
for(i=0; i<10; i++)
cprintf(" %p", pcs[i]);
printf("panic: ");
printf(s);
printf("\n");
panicked = 1; // freeze other CPU
for(;;)
;
}
//PAGEBREAK: 50
#define BACKSPACE 0x100
#define CRTPORT 0x3d4
static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory
static void
cgaputc(int c)
{
int pos;
// Cursor position: col + 80*row.
outb(CRTPORT, 14);
pos = inb(CRTPORT+1) << 8;
outb(CRTPORT, 15);
pos |= inb(CRTPORT+1);
if(c == '\n')
pos += 80 - pos%80;
else if(c == BACKSPACE){
if(pos > 0) --pos;
} else
crt[pos++] = (c&0xff) | 0x0700; // black on white
if(pos < 0 || pos > 25*80)
panic("pos under/overflow");
if((pos/80) >= 24){ // Scroll up.
memmove(crt, crt+80, sizeof(crt[0])*23*80);
pos -= 80;
memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos));
}
outb(CRTPORT, 14);
outb(CRTPORT+1, pos>>8);
outb(CRTPORT, 15);
outb(CRTPORT+1, pos);
crt[pos] = ' ' | 0x0700;
}
void
consputc(int c)
{
if(panicked){
cli();
for(;;)
;
}
@ -191,125 +147,4 @@ consputc(int c)
uartputc('\b'); uartputc(' '); uartputc('\b');
} else
uartputc(c);
cgaputc(c);
}
#define INPUT_BUF 128
struct {
char buf[INPUT_BUF];
uint r; // Read index
uint w; // Write index
uint e; // Edit index
} input;
#define C(x) ((x)-'@') // Control-x
void
consoleintr(int (*getc)(void))
{
int c, doprocdump = 0;
acquire(&cons.lock);
while((c = getc()) >= 0){
switch(c){
case C('P'): // Process listing.
// procdump() locks cons.lock indirectly; invoke later
doprocdump = 1;
break;
case C('U'): // Kill line.
while(input.e != input.w &&
input.buf[(input.e-1) % INPUT_BUF] != '\n'){
input.e--;
consputc(BACKSPACE);
}
break;
case C('H'): case '\x7f': // Backspace
if(input.e != input.w){
input.e--;
consputc(BACKSPACE);
}
break;
default:
if(c != 0 && input.e-input.r < INPUT_BUF){
c = (c == '\r') ? '\n' : c;
input.buf[input.e++ % INPUT_BUF] = c;
consputc(c);
if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){
input.w = input.e;
wakeup(&input.r);
}
}
break;
}
}
release(&cons.lock);
if(doprocdump) {
procdump(); // now call procdump() wo. cons.lock held
}
}
int
consoleread(struct inode *ip, char *dst, int n)
{
uint target;
int c;
iunlock(ip);
target = n;
acquire(&cons.lock);
while(n > 0){
while(input.r == input.w){
if(myproc()->killed){
release(&cons.lock);
ilock(ip);
return -1;
}
sleep(&input.r, &cons.lock);
}
c = input.buf[input.r++ % INPUT_BUF];
if(c == C('D')){ // EOF
if(n < target){
// Save ^D for next time, to make sure
// caller gets a 0-byte result.
input.r--;
}
break;
}
*dst++ = c;
--n;
if(c == '\n')
break;
}
release(&cons.lock);
ilock(ip);
return target - n;
}
int
consolewrite(struct inode *ip, char *buf, int n)
{
int i;
iunlock(ip);
acquire(&cons.lock);
for(i = 0; i < n; i++)
consputc(buf[i] & 0xff);
release(&cons.lock);
ilock(ip);
return n;
}
void
consoleinit(void)
{
initlock(&cons.lock, "console");
devsw[CONSOLE].write = consolewrite;
devsw[CONSOLE].read = consoleread;
cons.locking = 1;
ioapicenable(IRQ_KBD, 0);
}

42
defs.h
View file

@ -19,7 +19,7 @@ void bwrite(struct buf*);
// console.c
void consoleinit(void);
void cprintf(char*, ...);
void printf(char*, ...);
void consoleintr(int(*)(void));
void panic(char*) __attribute__((noreturn));
@ -65,10 +65,9 @@ extern uchar ioapicid;
void ioapicinit(void);
// kalloc.c
char* kalloc(void);
void kfree(char*);
void kinit1(void*, void*);
void kinit2(void*, void*);
void* kalloc(void);
void kfree(void *);
void kinit();
// kbd.c
void kbdintr(void);
@ -112,7 +111,7 @@ int kill(int);
struct cpu* mycpu(void);
struct cpu* getmycpu(void);
struct proc* myproc();
void pinit(void);
void procinit(void);
void procdump(void);
void scheduler(void) __attribute__((noreturn));
void sched(void);
@ -124,7 +123,7 @@ void wakeup(void*);
void yield(void);
// swtch.S
void swtch(struct context**, struct context*);
void swtch(struct context*, struct context*);
// spinlock.c
void acquire(struct spinlock*);
@ -158,16 +157,16 @@ int argaddr(int, uint64 *);
int fetchint(uint64, int*);
int fetchstr(uint64, char**);
int fetchaddr(uint64, uint64*);
void syscall(struct sysframe*);
void syscall();
// timer.c
void timerinit(void);
// trap.c
void idtinit(void);
extern uint ticks;
void tvinit(void);
void trapinit(void);
extern struct spinlock tickslock;
void usertrapret(void);
// uart.c
void uartinit(void);
@ -175,20 +174,15 @@ void uartintr(void);
void uartputc(int);
// vm.c
void seginit(void);
void kvmalloc(void);
pde_t* setupkvm(void);
char* uva2ka(pde_t*, char*);
int allocuvm(pde_t*, uint, uint);
int deallocuvm(pde_t*, uint64, uint64);
void freevm(pde_t*, uint64);
void inituvm(pde_t*, char*, uint);
int loaduvm(pde_t*, char*, struct inode*, uint, uint);
pde_t* copyuvm(pde_t*, uint);
void switchuvm(struct proc*);
void switchkvm(void);
int copyout(pde_t*, uint, void*, uint);
void clearpteu(pde_t *pgdir, char *uva);
void kvminit(void);
void kvmswitch(void);
pagetable_t uvmcreate(void);
void uvminit(pagetable_t, char *, uint);
int uvmdealloc(pagetable_t, uint64, uint64);
void uvmcopy(pagetable_t, pagetable_t, uint64);
void uvmfree(pagetable_t, uint64);
void mappages(pagetable_t, uint64, uint64, uint64, int);
void unmappages(pagetable_t, uint64, uint64, int);
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))

245
entry.S
View file

@ -1,223 +1,22 @@
# x86-64 bootstrap, assuming load by MultiBoot-compliant loader.
# The MutliBoot specification is at:
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html
# GRUB is a MultiBoot loader, as is qemu's -kernel option.
#include "mmu.h"
#include "memlayout.h"
# STACK is the size of the bootstrap stack.
#define STACK 8192
# MultiBoot header.
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Header-layout
.align 4
.text
.globl multiboot_header
multiboot_header:
#define magic 0x1badb002
#define flags (1<<16 | 1<<0)
.long magic
.long flags
.long (- magic - flags) # checksum
.long V2P_WO(multiboot_header) # header address
.long V2P_WO(multiboot_header) # load address
.long V2P_WO(edata) # load end address
.long V2P_WO(end) # bss end address
.long V2P_WO(start) # entry address
# Entry point jumped to by boot loader. Running in 32-bit mode.
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Machine-state
#
# EAX = 0x2badb002
# EBX = address of multiboot information structure
# CS = 32-bit read/execute code segment with identity map
# DS, ES, FS, GS, SS = 32-bit read/write data segment with identity map
# A20 gate = enabled
# CR0 = PE set, PG clear
# EFLAGS = VM clear, IF clear
#
.code32
.globl start
start:
# Tell BIOS to do "warm reboot" when we shut down.
movw $0x1234, 0x472
# Set up multiboot arguments for main.
movl %eax, %edi
movl %ebx, %esi
# Initialize stack.
movl $V2P_WO(stack+STACK), %esp
# Zero bss. QEMU's MultiBoot seems not to.
# It's possible that the header above is not right, but it looks right.
# %edi is holding multiboot argument, so save in another register.
# (The stack is in the bss.)
movl %edi, %edx
movl $V2P_WO(edata), %edi
movl $V2P_WO(end), %ecx
subl $V2P_WO(edata), %ecx
movl $0, %eax
cld
rep stosb
movl %edx, %edi
call loadgdt
# Enter new 32-bit code segment (already in 32-bit mode).
ljmp $SEG_KCODE32, $V2P_WO(start32) // code32 segment selector
start32:
# Initialize page table.
call initpagetables
call init32e
movl $V2P_WO(start64), %eax
# Enter 64-bit mode.
ljmp $SEG_KCODE, $V2P_WO(tramp64) // code64 segment selector
.code64
start64:
# Load VA of stack
movabsq $(stack+STACK), %rsp
# Clear frame pointer for stack walks
movl $0, %ebp
# Call into C code.
call main
# should not return from main
jmp .
.code32
.global apstart
apstart:
call loadgdt
ljmp $SEG_KCODE32, $V2P_WO(apstart32) // code32 segment selector
apstart32:
call init32e
movl $V2P_WO(apstart64), %eax
ljmp $SEG_KCODE, $V2P_WO(tramp64) // code64 segment selector
.code64
apstart64:
# Remember (from bootothers), that our kernel stack pointer is
# at the top of our temporary stack.
popq %rax
movq %rax, %rsp
movq $0, %rbp
call apmain
jmp .
.code64
tramp64:
# The linker thinks we are running at tramp64, but we're actually
# running at PADDR(tramp64), so use an explicit calculation to
# load and jump to the correct address. %rax should hold the
# physical address of the jmp target.
movq $KERNBASE, %r11
addq %r11, %rax
jmp *%rax
# Initial stack
.comm stack, STACK
# Page tables. See section 4.5 of 253668.pdf.
# We map the first GB of physical memory at 0 and at 1 TB (not GB) before
# the end of virtual memory. At boot time we are using the mapping at 0
# but during ordinary execution we use the high mapping.
# The intent is that after bootstrap the kernel can expand this mapping
# to cover all the available physical memory.
# This would be easier if we could use the PS bit to create GB-sized entries
# and skip the pdt table, but not all chips support it, and QEMU doesn't.
.align 4096
pml4:
.quad V2P_WO(pdpt) + PTE_P + PTE_W // present, read/write
.quad 0
.space 4096 - 2*16
.quad V2P_WO(pdpt) + PTE_P + PTE_W
.quad 0
.align 4096
pdpt:
.quad V2P_WO(pdt) + PTE_P + PTE_W
.space 4096 - 8
.align 4096
pdt:
// Filled in below.
.space 4096
.code32
initpagetables:
pushl %edi
pushl %ecx
pushl %eax
// Set up 64-bit entry in %edx:%eax.
// Base address 0, present, read/write, large page.
movl $(0 | PTE_P | PTE_W | PTE_PS), %eax
movl $0, %edx
// Fill in 512 entries at pdt.
movl $V2P_WO(pdt), %edi
movl $512, %ecx
1:
// Write this 64-bit entry.
movl %eax, 0(%edi)
movl %edx, 4(%edi)
addl $8, %edi
// 64-bit add to prepare address for next entry.
// Because this is a large page entry, it covers 512 4k pages (2 MB).
add $(512*4096), %eax
adc $0, %edx
loop 1b
popl %eax
popl %ecx
popl %edi
ret
# Initialize IA-32e mode. See section 9.8.5 of 253668.pdf.
init32e:
# Set CR4.PAE and CR4.PSE = 1.
movl %cr4, %eax
orl $0x30, %eax
movl %eax, %cr4
# Load CR3 with physical base address of level 4 page table.
movl $V2P_WO(pml4), %eax
movl %eax, %cr3
# Enable IA-32e mode by setting IA32_EFER.LME = 1.
# Also turn on IA32_EFER.SCE (syscall enable).
movl $0xc0000080, %ecx
rdmsr
orl $0x101, %eax
wrmsr
# Enable paging by setting CR0.PG = 1.
movl %cr0, %eax
orl $0x80000000, %eax
movl %eax, %cr0
nop
nop
ret
loadgdt:
subl $8, %esp
movl $V2P_WO(bootgdt), 4(%esp)
movw $(8*NSEGS-1), 2(%esp)
lgdt 2(%esp)
addl $8, %esp
movl $SEG_KDATA, %eax // data segment selector
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
movl $0, %eax // null segment selector
movw %ax, %fs
movw %ax, %gs
ret
# qemu -kernel starts at 0x1000. the instructions
# there seem to be provided by qemu, as if it
# were a ROM. the code at 0x1000 jumps to
# 0x8000000, the _start function here,
# in machine mode.
.section .data
.globl stack0
.section .text
.globl mstart
.section .text
.globl _entry
_entry:
# set up a stack for C; stack0 is declared in start.
la sp, stack0
addi sp, sp, 1024
addi sp, sp, 1024
addi sp, sp, 1024
addi sp, sp, 1024
# jump to mstart() in start.c
call mstart
junk:
j junk

22
exec.c
View file

@ -19,8 +19,8 @@ exec(char *path, char **argv)
struct inode *ip;
struct proghdr ph;
pde_t *pgdir, *oldpgdir;
struct proc *curproc = myproc();
uint64 oldsz = curproc->sz;
struct proc *p = myproc();
uint64 oldsz = p->sz;
begin_op();
@ -85,8 +85,8 @@ exec(char *path, char **argv)
ustack[1] = argc;
ustack[2] = sp - (argc+1)*sizeof(uint64); // argv pointer
curproc->sf->rdi = argc;
curproc->sf->rsi = sp - (argc+1)*sizeof(uint64);
p->sf->rdi = argc;
p->sf->rsi = sp - (argc+1)*sizeof(uint64);
sp -= (3+argc+1) * sizeof(uint64);
if(copyout(pgdir, sp, ustack, (3+argc+1)*sizeof(uint64)) < 0)
@ -96,15 +96,15 @@ exec(char *path, char **argv)
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(curproc->name, last, sizeof(curproc->name));
safestrcpy(p->name, last, sizeof(p->name));
// Commit to the user image.
oldpgdir = curproc->pgdir;
curproc->pgdir = pgdir;
curproc->sz = sz;
curproc->sf->rcx = elf.entry; // main
curproc->sf->rsp = sp;
switchuvm(curproc);
oldpgdir = p->pgdir;
p->pgdir = pgdir;
p->sz = sz;
p->sf->rcx = elf.entry; // main
p->sf->rsp = sp;
switchuvm(p);
freevm(oldpgdir, oldsz);
return 0;

View file

@ -2,22 +2,20 @@
# This code runs in user space.
#include "syscall.h"
#include "traps.h"
# exec(init, argv)
.globl start
start:
mov $init, %rdi
mov $argv, %rsi
mov $SYS_exec, %rax
syscall
la a0, init
la a1, argv
li a7, SYS_exec
ecall
# for(;;) exit();
exit:
mov $SYS_exit, %rax
syscall
jmp exit
li a7, SYS_exit
ecall
jal exit
# char init[] = "/init\0";
init:
@ -28,4 +26,3 @@ init:
argv:
.long init
.long 0

View file

@ -3,13 +3,14 @@
// and pipe buffers. Allocates 4096-byte pages.
#include "types.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "mmu.h"
#include "spinlock.h"
#include "riscv.h"
#include "defs.h"
void freerange(void *pa_start, void *pa_end);
void freerange(void *vstart, void *vend);
extern char end[]; // first address after kernel loaded from ELF file
// defined by the kernel linker script in kernel.ld
@ -19,36 +20,22 @@ struct run {
struct {
struct spinlock lock;
int use_lock;
struct run *freelist;
} kmem;
// Initialization happens in two phases.
// 1. main() calls kinit1() while still using entrypgdir to place just
// the pages mapped by entrypgdir on free list.
// 2. main() calls kinit2() with the rest of the physical pages
// after installing a full page table that maps them on all cores.
void
kinit1(void *vstart, void *vend)
kinit()
{
initlock(&kmem.lock, "kmem");
kmem.use_lock = 0;
freerange(vstart, vend);
freerange(end, (void*)PHYSTOP);
}
void
kinit2(void *vstart, void *vend)
{
freerange(vstart, vend);
kmem.use_lock = 1;
}
void
freerange(void *vstart, void *vend)
freerange(void *pa_start, void *pa_end)
{
char *p;
p = (char*)PGROUNDUP((uint64)vstart);
for(; p + PGSIZE <= (char*)vend; p += PGSIZE)
p = (char*)PGROUNDUP((uint64)pa_start);
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
kfree(p);
}
//PAGEBREAK: 21
@ -57,42 +44,37 @@ freerange(void *vstart, void *vend)
// call to kalloc(). (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(char *v)
kfree(void *pa)
{
struct run *r;
if((uint64)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(v, 1, PGSIZE);
memset(pa, 1, PGSIZE);
if(kmem.use_lock)
acquire(&kmem.lock);
r = (struct run*)v;
r = (struct run*)pa;
r->next = kmem.freelist;
kmem.freelist = r;
if(kmem.use_lock)
release(&kmem.lock);
}
// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
char*
void *
kalloc(void)
{
struct run *r;
if(kmem.use_lock)
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
if(kmem.use_lock)
release(&kmem.lock);
if(r != 0 && (uint64) r < KERNBASE)
panic("kalloc");
return (char*)r;
memset((char*)r, 5, PGSIZE); // fill with junk
return (void*)r;
}

View file

@ -1,50 +1,33 @@
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
OUTPUT_ARCH( "riscv" )
ENTRY( _entry )
SECTIONS
{
. = 0xFFFFFF0000100000;
PROVIDE(text = .);
.text : AT(0x100000) {
*(.text .stub .text.* .gnu.linkonce.t.*)
}
.rodata : {
*(.rodata .rodata.* .gnu.linkonce.r.*)
}
/* Include debugging information in kernel memory */
.stab : {
PROVIDE(__STAB_BEGIN__ = .);
*(.stab);
PROVIDE(__STAB_END__ = .);
BYTE(0) /* Force the linker to allocate space
for this section */
}
.stabstr : {
PROVIDE(__STABSTR_BEGIN__ = .);
*(.stabstr);
PROVIDE(__STABSTR_END__ = .);
BYTE(0) /* Force the linker to allocate space
for this section */
/*
* ensure that entry.S / _entry is at 0x80000000,
* where qemu's -kernel jumps.
*/
. = 0x80000000;
.text :
{
*(.text)
. = ALIGN(0x1000);
*(trampoline)
}
. = ALIGN(0x1000);
PROVIDE(etext = .);
/* Conventionally, Unix linkers provide pseudo-symbols
* etext, edata, and end, at the end of the text, data, and bss.
* For the kernel mapping, we need the address at the beginning
* of the data section, but that's not one of the conventional
* symbols, because the convention started before there was a
* read-only rodata section between text and data. */
PROVIDE(data = .);
/*
* make sure end is after data and bss.
*/
.data : {
*(.data)
}
bss : {
PROVIDE(edata = .);
*(.bss)
*(COMMON)
PROVIDE(end = .);
}
. = ALIGN(0x1000);
PROVIDE(end = .);
}

105
main.c
View file

@ -1,105 +1,28 @@
#include "types.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "mmu.h"
#include "proc.h"
#include "x86.h"
extern pde_t *kpgdir;
extern char end[]; // first address after kernel loaded from ELF file
static void mpmain(void) __attribute__((noreturn));
static void startothers(void);
#include "riscv.h"
#include "defs.h"
// Bootstrap processor starts running C code here.
// Allocate a real stack and switch to it, first
// doing some setup required for memory allocator to work.
int
main(uint64 mbmagic, uint64 mbaddr)
void
main()
{
if(mbmagic != 0x2badb002)
panic("multiboot header not found");
kinit1(end, P2V(4*1024*1024)); // phys page allocator
kvmalloc(); // kernel page table
mpinit(); // detect other processors
lapicinit(); // interrupt controller
seginit(); // segment descriptors
picinit(); // disable pic
ioapicinit(); // another interrupt controller
consoleinit(); // console hardware
uartinit(); // serial port
pinit(); // process table
tvinit(); // trap vectors
consoleinit();
printf("entering main()\n");
kinit(); // physical page allocator
kvminit(); // kernel page table
procinit(); // process table
trapinit(); // trap vectors
#if 0
binit(); // buffer cache
fileinit(); // file table
ideinit(); // disk
startothers(); // start other processors
kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers()
#endif
userinit(); // first user process
mpmain();
return 0;
scheduler();
}
extern struct cpu* getmycpu();
// Common CPU setup code.
static void
mpmain(void)
{
cprintf("cpu%d: starting %d\n", cpuid(), cpuid());
idtinit(); // load idt register
xchg(&(mycpu()->started), 1); // tell startothers() we're up
scheduler(); // start running processes
}
// AP processors jump here from entryother.S.
void
apmain(void)
{
switchkvm();
seginit();
lapicinit();
mpmain();
}
void apstart(void);
// Start the non-boot (AP) processors.
static void
startothers(void)
{
extern uchar _binary_entryother_start[], _binary_entryother_size[];
uchar *code;
struct cpu *c;
char *stack;
// Write entry code to unused memory at 0x7000.
// The linker has placed the image of entryother.S in
// _binary_entryother_start.
code = P2V(0x7000);
memmove(code, _binary_entryother_start, (uint64)_binary_entryother_size);
for(c = cpus; c < cpus+ncpu; c++){
if(c == mycpu()) // We've started already.
continue;
// Tell entryother.S what stack to use, where to enter, and what
// pgdir to use. We cannot use kpgdir yet, because the AP processor
// is running in low memory, so we use entrypgdir for the APs too.
stack = kalloc();
*(uint32*)(code-4) = V2P(apstart);
*(uint64*)(code-12) = (uint64) (stack+KSTACKSIZE);
lapicstartap(c->apicid, V2P(code));
// wait for cpu to finish mpmain()
while(c->started == 0)
;
}
}

View file

@ -1,16 +1,25 @@
// Memory layout
// Physical memory layout
#define EXTMEM 0x100000 // Start of extended memory
#define PHYSTOP 0xE000000 // Top physical memory
#define DEVSPACE 0xFE000000 // Other devices are top of 32-bit address space
#define DEVSPACETOP 0x100000000
// qemu -machine virt is set up like this:
// 00001000 -- boot ROM, provided by qemu
// 10000000 -- uart0 registers
// 80000000 -- boot ROM jumps here in machine mode
// unused RAM after 80000000.
// Key addresses for address space layout (see kmap in vm.c for layout)
#define KERNBASE 0xFFFFFF0000000000 // First kernel virtual address
#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked
// the kernel uses physical memory thus:
// 80000000 -- entry.S, then kernel text and data
// end -- start of kernel page allocation area
// PHYSTOP -- end RAM used by the kernel
#define V2P(a) (((uint64) (a)) - KERNBASE)
#define P2V(a) ((void *)(((char *) (a)) + KERNBASE))
// registers start here in physical memory.
#define UART0 0x10000000L
#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts
#define P2V_WO(x) ((x) + KERNBASE) // same as P2V, but without casts
// the kernel expects there to be RAM
// for use by the kernel and user pages
// from physical address 0x80000000 to PHYSTOP.
#define KERNBASE 0x80000000L
#define PHYSTOP (KERNBASE + 64*1024*1024)
// map the trampoline page to the highest address,
// in both user and kernel space.
#define TRAMPOLINE (MAXVA - PGSIZE)

160
mmu.h
View file

@ -1,160 +0,0 @@
// This file contains definitions for the
// x86 memory management unit (MMU).
// Eflags register
#define FL_TF 0x00000100 // Trap Flag
#define FL_IF 0x00000200 // Interrupt Enable
// Control Register flags
#define CR0_PE 0x00000001 // Protection Enable
#define CR0_WP 0x00010000 // Write Protect
#define CR0_PG 0x80000000 // Paging
#define CR4_PSE 0x00000010 // Page size extension
// Segment selectors (indexes) in our GDTs.
// Defined by our convention, not the architecture.
#define SEG_KCODE32 (1<<3) // kernel 32-bit code segment
#define SEG_KCODE (2<<3) // kernel code segment
#define SEG_KDATA (3<<3) // kernel data segment
#define SEG_TSS (4<<3) // tss segment - takes two slots
#define SEG_UDATA (6<<3) // user data segment
#define SEG_UCODE (7<<3) // user code segment
#define NSEGS 8
#ifndef __ASSEMBLER__
struct segdesc {
uint16 limit0;
uint16 base0;
uint8 base1;
uint8 bits;
uint8 bitslimit1;
uint8 base2;
};
// SEGDESC constructs a segment descriptor literal
// with the given, base, limit, and type bits.
#define SEGDESC(base, limit, bits) (struct segdesc){ \
(limit)&0xffff, (base)&0xffff, \
((base)>>16)&0xff, \
(bits)&0xff, \
(((bits)>>4)&0xf0) | ((limit>>16)&0xf), \
((base)>>24)&0xff, \
}
// SEGDESCHI constructs an extension segment descriptor
// literal that records the high bits of base.
#define SEGDESCHI(base) (struct segdesc) { \
(((base)>>32)&0xffff), (((base)>>48)&0xffff), \
}
#endif
#define DPL_USER 0x3 // User DPL
#define SEG_A (1<<0) // segment accessed bit
#define SEG_R (1<<1) // readable (code)
#define SEG_W (1<<1) // writable (data)
#define SEG_C (1<<2) // conforming segment (code)
#define SEG_E (1<<2) // expand-down bit (data)
#define SEG_CODE (1<<3) // code segment (instead of data)
// User and system segment bits.
#define SEG_S (1<<4) // if 0, system descriptor
#define SEG_DPL(x) ((x)<<5) // descriptor privilege level (2 bits)
#define SEG_P (1<<7) // segment present
#define SEG_AVL (1<<8) // available for operating system use
#define SEG_L (1<<9) // long mode
#define SEG_D (1<<10) // default operation size 32-bit
#define SEG_G (1<<11) // granularity
// Application segment type bits
#define STA_X 0x8 // Executable segment
#define STA_W 0x2 // Writeable (non-executable segments)
#define STA_R 0x2 // Readable (executable segments)
// System segment type bits
#define SEG_LDT (2<<0) // local descriptor table
#define SEG_TSS64A (9<<0) // available 64-bit TSS
#define SEG_TSS64B (11<<0) // busy 64-bit TSS
#define SEG_CALL64 (12<<0) // 64-bit call gate
#define SEG_INTR64 (14<<0) // 64-bit interrupt gate
#define SEG_TRAP64 (15<<0) // 64-bit trap gate
// A virtual address 'la' has a six-part structure as follows:
//
// +--16--+---9---+------9-------+-----9----+----9-------+----12-------+
// | Sign | PML4 |Page Directory| Page Dir |Page Table | Offset Page |
// |Extend| Index | Pointer Index| Index | Index | in Page |
// +------+-------+--------------+----------+------------+-------------+
// L3 pgtab L2 pgtab L1 pgtab L0 pgtab
// Page directory and page table constants.
#define NPDENTRIES 512 // # directory entries per page directory
#define PGSIZE 4096 // bytes mapped by a page
#define PGSHIFT 12 // offset of PTX in a linear address
#define PXMASK 0x1FF
#define PXSHIFT(n) (PGSHIFT+(9*(n))) // shift for index into level n page table
#define PX(n, va) ((((uint64) (va)) >> PXSHIFT(n)) & PXMASK)
#define L_PML4 3
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
// Page table/directory entry flags.
#define PTE_P 0x001 // Present
#define PTE_W 0x002 // Writeable
#define PTE_U 0x004 // User
#define PTE_PS 0x080 // Page Size
#define PTE_PWT 0x008 // Write-Through
#define PTE_PCD 0x010 // Cache-Disable
// Address in page table or page directory entry
#define PTE_ADDR(pte) ((uint64)(pte) & ~0xFFF)
#define PTE_FLAGS(pte) ((uint64)(pte) & 0xFFF)
#ifndef __ASSEMBLER__
typedef uint64 pte_t;
struct taskstate {
uint8 reserved0[4];
uint64 rsp[3];
uint64 ist[8];
uint8 reserved1[10];
uint16 iomba;
uint8 iopb[0];
} __attribute__ ((packed));
#define INT_P (1<<7) // interrupt descriptor present
struct intgate
{
uint16 rip0;
uint16 cs;
uint8 reserved0;
uint8 bits;
uint16 rip1;
uint32 rip2;
uint32 reserved1;
};
// INTDESC constructs an interrupt descriptor literal
// that records the given code segment, instruction pointer,
// and type bits.
#define INTDESC(cs, rip, bits) (struct intgate){ \
(rip)&0xffff, (cs), 0, bits, ((rip)>>16)&0xffff, \
(uint64)(rip)>>32, 0, \
}
// See section 4.6 of amd64 vol2
struct desctr
{
uint16 limit;
uint64 base;
} __attribute__((packed, aligned(16))); // important!
#endif

25
msr.h
View file

@ -1,25 +0,0 @@
// SYSCALL and SYSRET registers
#define MSR_STAR 0xc0000081
#define MSR_LSTAR 0xc0000082
#define MSR_CSTAR 0xc0000083
#define MSR_SFMASK 0xc0000084
// GS
#define MSR_GS_BASE 0xc0000101
#define MSR_GS_KERNBASE 0xc0000102
static inline uint64
readmsr(uint32 msr)
{
uint32 hi, lo;
__asm volatile("rdmsr" : "=d" (hi), "=a" (lo) : "c" (msr));
return ((uint64) lo) | (((uint64) hi) << 32);
}
static inline void
writemsr(uint64 msr, uint64 val)
{
uint32 lo = val & 0xffffffff;
uint32 hi = val >> 32;
__asm volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi) : "memory");
}

View file

@ -1,5 +1,4 @@
#define NPROC 64 // maximum number of processes
#define KSTACKSIZE 4096 // size of per-process kernel stack
#define NCPU 8 // maximum number of CPUs
#define NOFILE 16 // open files per process
#define NFILE 100 // open files per system

283
proc.c
View file

@ -1,18 +1,20 @@
#include "types.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "mmu.h"
#include "x86.h"
#include "riscv.h"
#include "proc.h"
#include "spinlock.h"
#include "defs.h"
struct {
struct spinlock lock;
struct proc proc[NPROC];
} ptable;
static struct proc *initproc;
// XXX riscv move somewhere else
struct cpu cpus[NCPU];
struct proc *initproc;
int nextpid = 1;
extern void forkret(void);
@ -22,57 +24,36 @@ extern void sysexit(void);
static void wakeup1(void *chan);
extern char trampstart[]; // trampoline.S
void
pinit(void)
procinit(void)
{
initlock(&ptable.lock, "ptable");
}
// Must be called with interrupts disabled
// Must be called with interrupts disabled.
// XXX riscv
int
cpuid() {
return mycpu()-cpus;
return 0;
}
// Must be called with interrupts disabled to avoid the caller being
// rescheduled between reading lapicid and running through the loop.
struct cpu*
getmycpu(void)
{
int apicid, i;
if(readeflags()&FL_IF)
panic("getmycpu called with interrupts enabled\n");
apicid = lapicid();
// APIC IDs are not guaranteed to be contiguous.
for (i = 0; i < ncpu; ++i) {
if (cpus[i].apicid == apicid)
return &cpus[i];
}
panic("unknown apicid\n");
}
// Return this core's cpu struct using %gs. %gs points this core's struct
// cpu. Offet 24 in struct cpu is cpu.
// Return this core's cpu struct.
// XXX riscv
struct cpu*
mycpu(void) {
struct cpu *c;
asm volatile("mov %%gs:24, %0" : "=r" (c));
c = &cpus[0];
return c;
}
// Disable interrupts so that we are not rescheduled
// while reading proc from the cpu structure
// XXX riscv
struct proc*
myproc(void) {
struct cpu *c;
struct proc *p;
pushcli();
c = mycpu();
p = c->proc;
popcli();
return p;
return cpus[0].proc;
}
//PAGEBREAK: 32
@ -84,7 +65,6 @@ static struct proc*
allocproc(void)
{
struct proc *p;
char *sp;
acquire(&ptable.lock);
@ -101,56 +81,73 @@ found:
release(&ptable.lock);
// Allocate kernel stack.
// Allocate a page for the kernel stack.
if((p->kstack = kalloc()) == 0){
p->state = UNUSED;
return 0;
}
sp = p->kstack + KSTACKSIZE;
// Leave room for syscall frame.
sp -= sizeof *p->sf;
// Allocate a trapframe page.
if((p->tf = (struct trapframe *)kalloc()) == 0){
p->state = UNUSED;
return 0;
}
if ((uint64) sp % 16)
panic("misaligned sp");
// An empty user page table.
p->pagetable = uvmcreate();
p->sf = (struct sysframe*)sp;
// map the trampoline code (for system call return)
// at the highest user virtual address.
// only the supervisor uses it, on the way
// to/from user space, so not PTE_U.
mappages(p->pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampstart, PTE_R | PTE_X);
// map the trapframe, for trampoline.S.
mappages(p->pagetable, (TRAMPOLINE - PGSIZE), PGSIZE,
(uint64)(p->tf), PTE_R | PTE_W);
// Set up new context to start executing at forkret,
// which returns to sysexit.
sp -= sizeof(uint64);
*(uint64*)sp = (uint64)sysexit;
sp -= sizeof *p->context;
p->context = (struct context*)sp;
memset(p->context, 0, sizeof *p->context);
p->context->rip = (uint64)forkret;
// which returns to user space.
memset(&p->context, 0, sizeof p->context);
p->context.ra = (uint64)forkret;
p->context.sp = (uint64)p->kstack + PGSIZE;
return p;
}
// XXX hack because I don't know how to incorporate initcode
// into the kernel binary. just the exec system call, no arguments.
// manually copied from initcode.asm.
unsigned char initcode[] = {
0x85, 0x48, // li a7, 1 -- SYS_fork
0x73, 0x00, 0x00, 0x00, // ecall
0x8d, 0x48, // li a7, 3 -- SYS_wait
0x73, 0x00, 0x00, 0x00, // ecall
0x89, 0x48, // li a7, 2 -- SYS_exit
0x73, 0x00, 0x00, 0x00, // ecall
};
//PAGEBREAK: 32
// Set up first user process.
void
userinit(void)
{
struct proc *p;
extern char _binary_initcode_start[], _binary_initcode_size[];
p = allocproc();
initproc = p;
if((p->pgdir = setupkvm()) == 0)
panic("userinit: out of memory?");
inituvm(p->pgdir, _binary_initcode_start, (uint64)_binary_initcode_size);
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
memset(p->sf, 0, sizeof(*p->sf));
p->sf->r11 = FL_IF;
p->sf->rsp = PGSIZE;
p->sf->rcx = 0; // beginning of initcode.S
// prepare for the very first kernel->user.
p->tf->epc = 0;
p->tf->sp = PGSIZE;
safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/");
// XXX riscv
//p->cwd = namei("/");
// this assignment to p->state lets other cores
// run this process. the acquire forces the above
@ -163,62 +160,65 @@ userinit(void)
release(&ptable.lock);
}
#if 0
// Grow current process's memory by n bytes.
// Return 0 on success, -1 on failure.
int
growproc(int n)
{
uint sz;
struct proc *curproc = myproc();
struct proc *p = myproc();
sz = curproc->sz;
sz = p->sz;
if(n > 0){
if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0)
if((sz = allocuvm(p->pagetable, sz, sz + n)) == 0)
return -1;
} else if(n < 0){
if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0)
if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0)
return -1;
}
curproc->sz = sz;
switchuvm(curproc);
p->sz = sz;
switchuvm(p);
return 0;
}
#endif
// Create a new process copying p as the parent.
// Sets up stack to return as if from system call.
// Caller must set state of returned proc to RUNNABLE.
// Create a new process, copying p as the parent.
// Sets up child kernel stack to return as if from system call.
int
fork(void)
{
int i, pid;
struct proc *np;
struct proc *curproc = myproc();
struct proc *p = myproc();
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
// Copy process state from proc.
if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){
kfree(np->kstack);
np->kstack = 0;
np->state = UNUSED;
return -1;
}
np->sz = curproc->sz;
np->parent = curproc;
*np->sf = *curproc->sf;
// Copy user memory from parent to child.
uvmcopy(p->pagetable, np->pagetable, p->sz);
np->sz = p->sz;
// Clear %eax so that fork returns 0 in the child.
np->sf->rax = 0;
np->parent = p;
// copy saved user registers.
*(np->tf) = *(p->tf);
// Cause fork to return 0 in the child.
np->tf->a0 = 0;
#if 0 // XXX riscv
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++)
if(curproc->ofile[i])
np->ofile[i] = filedup(curproc->ofile[i]);
np->cwd = idup(curproc->cwd);
if(p->ofile[i])
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
#endif
safestrcpy(np->name, curproc->name, sizeof(curproc->name));
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
@ -233,46 +233,48 @@ fork(void)
// Exit the current process. Does not return.
// An exited process remains in the zombie state
// until its parent calls wait() to find out it exited.
// until its parent calls wait().
void
exit(void)
{
struct proc *curproc = myproc();
struct proc *p;
struct proc *p = myproc();
struct proc *pp;
int fd;
if(curproc == initproc)
if(p == initproc)
panic("init exiting");
#if 0 // XXX riscv
// Close all open files.
for(fd = 0; fd < NOFILE; fd++){
if(curproc->ofile[fd]){
fileclose(curproc->ofile[fd]);
curproc->ofile[fd] = 0;
if(p->ofile[fd]){
fileclose(p->ofile[fd]);
p->ofile[fd] = 0;
}
}
begin_op();
iput(curproc->cwd);
iput(p->cwd);
end_op();
curproc->cwd = 0;
#endif
p->cwd = 0;
acquire(&ptable.lock);
// Parent might be sleeping in wait().
wakeup1(curproc->parent);
wakeup1(p->parent);
// Pass abandoned children to init.
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->parent == curproc){
p->parent = initproc;
if(p->state == ZOMBIE)
for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){
if(pp->parent == p){
pp->parent = initproc;
if(pp->state == ZOMBIE)
wakeup1(initproc);
}
}
// Jump into the scheduler, never to return.
curproc->state = ZOMBIE;
p->state = ZOMBIE;
sched();
panic("zombie exit");
}
@ -282,42 +284,47 @@ exit(void)
int
wait(void)
{
struct proc *p;
struct proc *np;
int havekids, pid;
struct proc *curproc = myproc();
struct proc *p = myproc();
acquire(&ptable.lock);
for(;;){
// Scan through table looking for exited children.
havekids = 0;
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->parent != curproc)
for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){
if(np->parent != p)
continue;
havekids = 1;
if(p->state == ZOMBIE){
if(np->state == ZOMBIE){
// Found one.
pid = p->pid;
kfree(p->kstack);
p->kstack = 0;
freevm(p->pgdir, p->sz);
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->killed = 0;
p->state = UNUSED;
pid = np->pid;
kfree(np->kstack);
np->kstack = 0;
kfree((void*)np->tf);
np->tf = 0;
unmappages(np->pagetable, TRAMPOLINE, PGSIZE, 0);
unmappages(np->pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0);
uvmfree(np->pagetable, np->sz);
np->pagetable = 0;
np->pid = 0;
np->parent = 0;
np->name[0] = 0;
np->killed = 0;
np->state = UNUSED;
release(&ptable.lock);
return pid;
}
}
// No point waiting if we don't have any children.
if(!havekids || curproc->killed){
if(!havekids || p->killed){
release(&ptable.lock);
return -1;
}
// Wait for children to exit. (See wakeup1 call in proc_exit.)
sleep(curproc, &ptable.lock); //DOC: wait-sleep
sleep(p, &ptable.lock); //DOC: wait-sleep
}
}
@ -338,7 +345,8 @@ scheduler(void)
c->proc = 0;
for(;;){
// Enable interrupts on this processor.
sti();
// XXX riscv
//sti();
// Loop over process table looking for process to run.
acquire(&ptable.lock);
@ -350,11 +358,11 @@ scheduler(void)
// to release ptable.lock and then reacquire it
// before jumping back to us.
c->proc = p;
switchuvm(p);
p->state = RUNNING;
swtch(&(c->scheduler), p->context);
switchkvm();
printf("switch...\n");
swtch(&c->scheduler, &p->context);
printf("switch returned\n");
// Process is done running for now.
// It should have changed its p->state before coming back.
@ -380,14 +388,10 @@ sched(void)
if(!holding(&ptable.lock))
panic("sched ptable.lock");
if(mycpu()->ncli != 1)
panic("sched locks");
if(p->state == RUNNING)
panic("sched running");
if(readeflags()&FL_IF)
panic("sched interruptible");
intena = mycpu()->intena;
swtch(&p->context, mycpu()->scheduler);
swtch(&p->context, &mycpu()->scheduler);
mycpu()->intena = intena;
}
@ -402,24 +406,29 @@ yield(void)
}
// A fork child's very first scheduling by scheduler()
// will swtch here. "Return" to user space.
// will swtch to forkret.
void
forkret(void)
{
struct proc *p = myproc();
static int first = 1;
// Still holding ptable.lock from scheduler.
release(&ptable.lock);
printf("entering forkret\n");
if (first) {
// Some initialization functions must be run in the context
// of a regular process (e.g., they call sleep), and thus cannot
// be run from main().
first = 0;
iinit(ROOTDEV);
initlog(ROOTDEV);
// XXX riscv
//iinit(ROOTDEV);
//initlog(ROOTDEV);
}
// Return to "caller", actually trapret (see allocproc).
usertrapret();
}
// Atomically release lock and sleep on chan.
@ -483,6 +492,8 @@ wakeup(void *chan)
release(&ptable.lock);
}
#if 0
// Kill the process with the given pid.
// Process won't exit until it returns
// to user space (see trap in trap.c).
@ -533,12 +544,14 @@ procdump(void)
state = states[p->state];
else
state = "???";
cprintf("%d %s %s", p->pid, state, p->name);
printf("%d %s %s", p->pid, state, p->name);
if(p->state == SLEEPING){
getcallerpcs((uint64*)p->context->rbp+2, pc);
for(i=0; i<10 && pc[i] != 0; i++)
cprintf(" %p", pc[i]);
printf(" %p", pc[i]);
}
cprintf("\n");
printf("\n");
}
}
#endif

84
proc.h
View file

@ -1,13 +1,30 @@
// Saved registers for kernel context switches.
struct context {
uint64 ra;
uint64 sp;
// callee-saved
uint64 s0;
uint64 s1;
uint64 s2;
uint64 s3;
uint64 s4;
uint64 s5;
uint64 s6;
uint64 s7;
uint64 s8;
uint64 s9;
uint64 s10;
uint64 s11;
};
// Per-CPU state
struct cpu {
uint64 syscallno; // Temporary used by sysentry
uint64 usp; // Temporary used by sysentry
struct proc *proc; // The process running on this cpu or null
struct cpu *cpu; // XXX
uchar apicid; // Local APIC ID
struct context *scheduler; // swtch() here to enter scheduler
struct taskstate ts; // Used by x86 to find stack for interrupt
struct segdesc gdt[NSEGS]; // x86 global descriptor table
struct context scheduler; // swtch() here to enter scheduler
volatile uint started; // Has the CPU started?
int ncli; // Depth of pushcli nesting.
int intena; // Were interrupts enabled before pushcli?
@ -17,39 +34,52 @@ extern struct cpu cpus[NCPU];
extern int ncpu;
//PAGEBREAK: 17
// Saved registers for kernel context switches.
// Don't need to save all the segment registers (%cs, etc),
// because they are constant across kernel contexts.
// Don't need to save %eax, %ecx, %edx, because the
// x86 convention is that the caller has saved them.
// Contexts are stored at the bottom of the stack they
// describe; the stack pointer is the address of the context.
// The layout of the context matches the layout of the stack in swtch.S
// at the "Switch stacks" comment. Switch doesn't save eip explicitly,
// but it is on the stack and allocproc() manipulates it.
struct context {
uint64 r15;
uint64 r14;
uint64 r13;
uint64 r12;
uint64 r11;
uint64 rbx;
uint64 rbp;
uint64 rip;
// per-process data for the early trap handling code in trampoline.S.
// sits in a page by itself just under the trampoline page in the
// user page table. not specially mapped in the kernel page table.
// the sscratch register points here.
// trampoline.S saves user registers, then restores kernel_sp and
// kernel_satp.
// no need to save s0-s11 (callee-saved) since C code and swtch() save them.
struct trapframe {
/* 0 */ uint64 kernel_satp;
/* 8 */ uint64 kernel_sp;
/* 16 */ uint64 kernel_trap; // address of trap()
/* 24 */ uint64 epc; // saved user program counter
/* 32 */ uint64 ra;
/* 40 */ uint64 sp;
/* 48 */ uint64 gp;
/* 56 */ uint64 tp;
/* 64 */ uint64 t0;
/* 72 */ uint64 t1;
/* 80 */ uint64 t2;
/* 88 */ uint64 a0;
/* 96 */ uint64 a1;
/* 104 */ uint64 a2;
/* 112 */ uint64 a3;
/* 120 */ uint64 a4;
/* 128 */ uint64 a5;
/* 136 */ uint64 a6;
/* 144 */ uint64 a7;
/* 152 */ uint64 t3;
/* 160 */ uint64 t4;
/* 168 */ uint64 t5;
/* 176 */ uint64 t6;
};
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state
struct proc {
char *kstack; // Bottom of kernel stack for this process, must be first entry
char *kstack; // Bottom of kernel stack for this process
uint64 sz; // Size of process memory (bytes)
pde_t* pgdir; // Page table
pagetable_t pagetable; // Page table
enum procstate state; // Process state
int pid; // Process ID
struct proc *parent; // Parent process
struct sysframe *sf; // Syscall frame for current syscall
struct context *context; // swtch() here to run process
struct trapframe *tf; // data page for trampoline.S
struct context context; // swtch() here to run process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
struct file *ofile[NOFILE]; // Open files

172
riscv.h Normal file
View file

@ -0,0 +1,172 @@
// Machine Status Register, mstatus
#define MSTATUS_MPP_MASK (3L << 11)
#define MSTATUS_MPP_M (3L << 11)
#define MSTATUS_MPP_S (1L << 11)
#define MSTATUS_MPP_U (0L << 11)
static inline uint64
r_mstatus()
{
uint64 x;
asm("csrr %0, mstatus" : "=r" (x) );
return x;
}
static inline void
w_mstatus(uint64 x)
{
asm("csrw mstatus, %0" : : "r" (x));
}
// machine exception program counter, holds the
// instruction address to which a return from
// exception will go.
static inline void
w_mepc(uint64 x)
{
asm("csrw mepc, %0" : : "r" (x));
}
// Supervisor Status Register, sstatus
#define SSTATUS_SPP (1L << 8) // 1=Supervisor, 0=User
static inline uint64
r_sstatus()
{
uint64 x;
asm("csrr %0, sstatus" : "=r" (x) );
return x;
}
static inline void
w_sstatus(uint64 x)
{
asm("csrw sstatus, %0" : : "r" (x));
}
// machine exception program counter, holds the
// instruction address to which a return from
// exception will go.
static inline void
w_sepc(uint64 x)
{
asm("csrw sepc, %0" : : "r" (x));
}
static inline uint64
r_sepc()
{
uint64 x;
asm("csrr %0, sepc" : "=r" (x) );
return x;
}
// Machine Exception Delegation
static inline uint64
r_medeleg()
{
uint64 x;
asm("csrr %0, medeleg" : "=r" (x) );
return x;
}
static inline void
w_medeleg(uint64 x)
{
asm("csrw medeleg, %0" : : "r" (x));
}
// Machine Interrupt Delegation
static inline uint64
r_mideleg()
{
uint64 x;
asm("csrr %0, mideleg" : "=r" (x) );
return x;
}
static inline void
w_mideleg(uint64 x)
{
asm("csrw mideleg, %0" : : "r" (x));
}
// Supervisor Trap-Vector Base Address
// low two bits are mode.
static inline void
w_stvec(uint64 x)
{
asm("csrw stvec, %0" : : "r" (x));
}
// use riscv's sv39 page table scheme.
#define SATP_SV39 (8L << 60)
#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))
// supervisor address translation and protection;
// holds the address of the page table.
static inline void
w_satp(uint64 x)
{
asm("csrw satp, %0" : : "r" (x));
}
static inline uint64
r_satp()
{
uint64 x;
asm("csrr %0, satp" : "=r" (x) );
return x;
}
// Supervisor Scratch register, for early trap handler in trampoline.S.
static inline void
w_sscratch(uint64 x)
{
asm("csrw sscratch, %0" : : "r" (x));
}
// Supervisor trap cause
static inline uint64
r_scause()
{
uint64 x;
asm("csrr %0, scause" : "=r" (x) );
return x;
}
#define PGSIZE 4096 // bytes per page
#define PGSHIFT 12 // bits of offset within a page
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
#define PTE2PA(pte) (((pte) >> 10) << 12)
#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U))
// extract the three 9-bit page table indices from a virtual address.
#define PXMASK 0x1FF // 9 bits
#define PXSHIFT(level) (PGSHIFT+(9*(level)))
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
// one beyond the highest possible virtual address.
// MAXVA is actually one bit less than the max allowed by
// Sv39, to avoid having to sign-extend virtual addresses
// that have the high bit set.
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
typedef uint64 pte_t;
typedef uint64 *pagetable_t; // 512 PTEs

View file

@ -1,13 +1,11 @@
// Mutual exclusion spin locks.
#include "types.h"
#include "defs.h"
#include "param.h"
#include "x86.h"
#include "memlayout.h"
#include "mmu.h"
#include "proc.h"
#include "spinlock.h"
#include "riscv.h"
#include "defs.h"
void
initlock(struct spinlock *lk, char *name)
@ -17,6 +15,27 @@ initlock(struct spinlock *lk, char *name)
lk->cpu = 0;
}
void
acquire(struct spinlock *lk)
{
lk->locked = 1;
lk->cpu = mycpu();
}
void
release(struct spinlock *lk)
{
lk->locked = 0;
lk->cpu = 0;
}
int
holding(struct spinlock *lk)
{
return lk->locked && lk->cpu == mycpu();
}
#if 0
// Acquire the lock.
// Loops (spins) until the lock is acquired.
// Holding a lock for a long time may cause
@ -37,7 +56,7 @@ acquire(struct spinlock *lk)
// references happen after the lock is acquired.
__sync_synchronize();
// Record info about lock acquisition for debugging.
// Record info about lock acquisition for holding() and debugging.
lk->cpu = mycpu();
getcallerpcs(&lk, lk->pcs);
}
@ -87,11 +106,11 @@ getcallerpcs(void *v, uint64 pcs[])
// Check whether this cpu is holding the lock.
int
holding(struct spinlock *lock)
holding(struct spinlock *lk)
{
int r;
pushcli();
r = lock->locked && lock->cpu == mycpu();
r = lk->locked && lk->cpu == mycpu();
popcli();
return r;
}
@ -123,4 +142,4 @@ popcli(void)
if(mycpu()->ncli == 0 && mycpu()->intena)
sti();
}
#endif

34
start.c Normal file
View file

@ -0,0 +1,34 @@
#include "types.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
void main();
// entry.S uses this as the initial stack.
char stack0[4096];
// entry.S jumps here in machine mode on stack0.
void
mstart()
{
// set M Previous Privilege mode to Supervisor, for mret.
unsigned long x = r_mstatus();
x &= ~MSTATUS_MPP_MASK;
x |= MSTATUS_MPP_S;
w_mstatus(x);
// set M Exception Program Counter to main, for mret.
// requires gcc -mcmodel=medany
w_mepc((uint64)main);
// disable paging for now.
w_satp(0);
// delegate all interrupts and exceptions to supervisor mode.
w_medeleg(0xffff);
w_mideleg(0xffff);
// jump to main in supervisor mode.
asm("mret");
}

View file

@ -1,14 +1,13 @@
#include "types.h"
#include "x86.h"
void*
memset(void *dst, int c, uint n)
{
if ((uint64)dst%4 == 0 && n%4 == 0){
c &= 0xFF;
stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4);
} else
stosb(dst, c, n);
char *cdst = (char *) dst;
int i;
for(i = 0; i < n; i++){
cdst[i] = c;
}
return dst;
}

55
swtch.S
View file

@ -1,34 +1,41 @@
# Context switch
#
# void swtch(struct context **old, struct context *new);
# void swtch(struct context *old, struct context *new);
#
# Save the current registers on the stack, creating
# a struct context, and save its address in *old.
# Switch stacks to new and pop previously-saved registers.
# Save current registers in old. Load from new.
.globl swtch
swtch:
# Save old callee-saved registers
push %rbp
push %rbx
push %r11
push %r12
push %r13
push %r14
push %r15
sd ra, 0(a0)
sd sp, 8(a0)
sd s0, 16(a0)
sd s1, 24(a0)
sd s2, 32(a0)
sd s3, 40(a0)
sd s4, 48(a0)
sd s5, 56(a0)
sd s6, 64(a0)
sd s7, 72(a0)
sd s8, 80(a0)
sd s9, 88(a0)
sd s10, 96(a0)
sd s11, 104(a0)
# Switch stacks
mov %rsp, (%rdi) # first arg of swtch is in rdi
mov %rsi, %rsp # second arg of swtch is in rsi
# Load new callee-saved registers
pop %r15
pop %r14
pop %r13
pop %r12
pop %r11
pop %rbx
pop %rbp
ld ra, 0(a1)
ld sp, 8(a1)
ld s0, 16(a1)
ld s1, 24(a1)
ld s2, 32(a1)
ld s3, 40(a1)
ld s4, 48(a1)
ld s5, 56(a1)
ld s6, 64(a1)
ld s7, 72(a1)
ld s8, 80(a1)
ld s9, 88(a1)
ld s10, 96(a1)
ld s11, 104(a1)
ret

View file

@ -1,11 +1,10 @@
#include "types.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "mmu.h"
#include "riscv.h"
#include "proc.h"
#include "x86.h"
#include "syscall.h"
#include "defs.h"
// User code makes a system call with INT T_SYSCALL.
// System call number in %eax.
@ -17,9 +16,9 @@
int
fetchint(uint64 addr, int *ip)
{
struct proc *curproc = myproc();
struct proc *p = myproc();
if(addr >= curproc->sz || addr+4 > curproc->sz)
if(addr >= p->sz || addr+4 > p->sz)
return -1;
*ip = *(uint64*)(addr);
return 0;
@ -29,8 +28,8 @@ fetchint(uint64 addr, int *ip)
int
fetchaddr(uint64 addr, uint64 *ip)
{
struct proc *curproc = myproc();
if(addr >= curproc->sz || addr+sizeof(uint64) > curproc->sz)
struct proc *p = myproc();
if(addr >= p->sz || addr+sizeof(uint64) > p->sz)
return -1;
*ip = *(uint64*)(addr);
return 0;
@ -43,12 +42,12 @@ int
fetchstr(uint64 addr, char **pp)
{
char *s, *ep;
struct proc *curproc = myproc();
struct proc *p = myproc();
if(addr >= curproc->sz)
if(addr >= p->sz)
return -1;
*pp = (char*)addr;
ep = (char*)curproc->sz;
ep = (char*)p->sz;
for(s = *pp; s < ep; s++){
if(*s == 0)
return s - *pp;
@ -59,20 +58,20 @@ fetchstr(uint64 addr, char **pp)
static uint64
fetcharg(int n)
{
struct proc *curproc = myproc();
struct proc *p = myproc();
switch (n) {
case 0:
return curproc->sf->rdi;
return p->tf->a0;
case 1:
return curproc->sf->rsi;
return p->tf->a1;
case 2:
return curproc->sf->rdx;
return p->tf->a2;
case 3:
return curproc->sf->r10;
return p->tf->a3;
case 4:
return curproc->sf->r8;
return p->tf->a4;
case 5:
return curproc->sf->r9;
return p->tf->a5;
}
panic("fetcharg");
return -1;
@ -100,11 +99,11 @@ int
argptr(int n, char **pp, int size)
{
uint64 i;
struct proc *curproc = myproc();
struct proc *p = myproc();
if(argaddr(n, &i) < 0)
return -1;
if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz)
if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz)
return -1;
*pp = (char*)i;
return 0;
@ -149,48 +148,47 @@ static int (*syscalls[])(void) = {
[SYS_fork] sys_fork,
[SYS_exit] sys_exit,
[SYS_wait] sys_wait,
[SYS_pipe] sys_pipe,
[SYS_read] sys_read,
[SYS_kill] sys_kill,
[SYS_exec] sys_exec,
[SYS_fstat] sys_fstat,
[SYS_chdir] sys_chdir,
[SYS_dup] sys_dup,
//[SYS_pipe] sys_pipe,
//[SYS_read] sys_read,
//[SYS_kill] sys_kill,
//[SYS_exec] sys_exec,
//[SYS_fstat] sys_fstat,
//[SYS_chdir] sys_chdir,
//[SYS_dup] sys_dup,
[SYS_getpid] sys_getpid,
[SYS_sbrk] sys_sbrk,
[SYS_sleep] sys_sleep,
[SYS_uptime] sys_uptime,
[SYS_open] sys_open,
[SYS_write] sys_write,
[SYS_mknod] sys_mknod,
[SYS_unlink] sys_unlink,
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
//[SYS_sbrk] sys_sbrk,
//[SYS_sleep] sys_sleep,
//[SYS_uptime] sys_uptime,
//[SYS_open] sys_open,
//[SYS_write] sys_write,
//[SYS_mknod] sys_mknod,
//[SYS_unlink] sys_unlink,
//[SYS_link] sys_link,
//[SYS_mkdir] sys_mkdir,
//[SYS_close] sys_close,
};
static void
dosyscall(void)
{
int num;
struct proc *curproc = myproc();
struct proc *p = myproc();
num = curproc->sf->rax;
num = p->tf->a7;
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
curproc->sf->rax = syscalls[num]();
p->tf->a0 = syscalls[num]();
} else {
cprintf("%d %s: unknown sys call %d\n",
curproc->pid, curproc->name, num);
curproc->sf->rax = -1;
printf("%d %s: unknown sys call %d\n",
p->pid, p->name, num);
p->tf->a0 = -1;
}
}
void
syscall(struct sysframe *sf)
syscall()
{
if(myproc()->killed)
exit();
myproc()->sf = sf;
dosyscall();
if(myproc()->killed)
exit();

View file

@ -41,11 +41,11 @@ static int
fdalloc(struct file *f)
{
int fd;
struct proc *curproc = myproc();
struct proc *p = myproc();
for(fd = 0; fd < NOFILE; fd++){
if(curproc->ofile[fd] == 0){
curproc->ofile[fd] = f;
if(p->ofile[fd] == 0){
p->ofile[fd] = f;
return fd;
}
}
@ -374,7 +374,7 @@ sys_chdir(void)
{
char *path;
struct inode *ip;
struct proc *curproc = myproc();
struct proc *p = myproc();
begin_op();
if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){
@ -388,9 +388,9 @@ sys_chdir(void)
return -1;
}
iunlock(ip);
iput(curproc->cwd);
iput(p->cwd);
end_op();
curproc->cwd = ip;
p->cwd = ip;
return 0;
}

View file

@ -1,18 +1,11 @@
#include "types.h"
#include "x86.h"
#include "riscv.h"
#include "defs.h"
#include "date.h"
#include "param.h"
#include "memlayout.h"
#include "mmu.h"
#include "proc.h"
int
sys_fork(void)
{
return fork();
}
int
sys_exit(void)
{
@ -20,12 +13,25 @@ sys_exit(void)
return 0; // not reached
}
int
sys_getpid(void)
{
return myproc()->pid;
}
int
sys_fork(void)
{
return fork();
}
int
sys_wait(void)
{
return wait();
}
#if 0
int
sys_kill(void)
{
@ -36,12 +42,6 @@ sys_kill(void)
return kill(pid);
}
int
sys_getpid(void)
{
return myproc()->pid;
}
int
sys_sbrk(void)
{
@ -89,3 +89,4 @@ sys_uptime(void)
release(&tickslock);
return xticks;
}
#endif

108
trampoline.S Normal file
View file

@ -0,0 +1,108 @@
#
# code to switch between user and kernel space.
#
# this code is mapped at the same virtual address
# in user and kernel space so that it can switch
# page tables.
#
# kernel.ld causes trampstart to be aligned
# to a page boundary.
#
.globl usertrap
.section trampoline
.globl trampstart
trampstart:
# switch from kernel to user.
# a0: p->tf in user page table
# a1: new value for satp, for user page table
# switch to user page table
csrw satp, a1
# put the saved user a0 in sscratch, so we
# can swap it with our a0 (p->tf) in the last step.
ld t0, 80(a0)
csrw sscratch, t0
# restore all but a0 from p->tf
ld ra, 32(a0)
ld sp, 40(a0)
ld gp, 48(a0)
ld tp, 56(a0)
ld t0, 64(a0)
ld t1, 72(a0)
ld t2, 80(a0)
ld a1, 96(a0)
ld a2, 104(a0)
ld a3, 112(a0)
ld a4, 120(a0)
ld a5, 128(a0)
ld a6, 136(a0)
ld a7, 144(a0)
ld t3, 152(a0)
ld t4, 160(a0)
ld t5, 168(a0)
ld t6, 176(a0)
# restore user a0, and save p->tf
csrrw a0, sscratch, a0
# return to user mode and user pc.
# caller has set up sstatus and sepc.
sret
#
# trap.c set stvec to point here, so
# interrupts and exceptions start here,
# in supervisor mode, but with a
# user page table.
#
# sscratch points to where the process's p->tf is
# mapped into user space (TRAMPOLINE - 4096).
#
.align 4
.globl trampvec
trampvec:
# swap a0 and sscratch
# so that a0 is p->tf
csrrw a0, sscratch, a0
# save the user registers in p->tf
sd ra, 32(a0)
sd sp, 40(a0)
sd gp, 48(a0)
sd tp, 56(a0)
sd t0, 64(a0)
sd t1, 72(a0)
sd t2, 80(a0)
sd a1, 96(a0)
sd a2, 104(a0)
sd a3, 112(a0)
sd a4, 120(a0)
sd a5, 128(a0)
sd a6, 136(a0)
sd a7, 144(a0)
sd t3, 152(a0)
sd t4, 160(a0)
sd t5, 168(a0)
sd t6, 176(a0)
# save the user a0 in p->tf->a0
csrr t0, sscratch
sd t0, 80(a0)
# restore kernel stack pointer from p->tf->kernel_sp
ld sp, 8(a0)
# remember the address of usertrap(), p->tf->kernel_trap
ld t0, 16(a0)
# restore kernel page table from p->tf->kernel_satp
ld t1, 0(a0)
csrw satp, t1
# a0 is no longer valid, since the kernel page
# table does not specially map p->td.
# jump to usertrap(), which does not return
jr t0

160
trap.c
View file

@ -1,109 +1,113 @@
#include "types.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "mmu.h"
#include "riscv.h"
#include "proc.h"
#include "x86.h"
#include "traps.h"
#include "spinlock.h"
#include "defs.h"
// Interrupt descriptor table (shared by all CPUs).
struct intgate idt[256];
extern uint64 vectors[]; // in vectors.S: array of 256 entry pointers
struct spinlock tickslock;
uint ticks;
extern char trampstart[], trampvec[];
void kerneltrap();
void
tvinit(void)
trapinit(void)
{
int i;
for(i=0; i<256; i++) {
idt[i] = INTDESC(SEG_KCODE, vectors[i], INT_P | SEG_INTR64);
}
idtinit();
// send interrupts and exceptions to kerneltrap().
w_stvec((uint64)kerneltrap);
initlock(&tickslock, "time");
}
//
// handle an interrupt, exception, or system call from user space.
// called from trampoline.S
//
void
idtinit(void)
usertrap(void)
{
struct desctr dtr;
if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
dtr.limit = sizeof(idt) - 1;
dtr.base = (uint64)idt;
lidt((void *)&dtr.limit);
// send interrupts and exceptions to kerneltrap(),
// since we're now in the kernel.
w_stvec((uint64)kerneltrap);
struct proc *p = myproc();
// save user program counter.
p->tf->epc = r_sepc();
if(r_scause() == 8){
// system call
printf("usertrap(): system call pid=%d syscall=%d\n", p->pid, p->tf->a7);
// sepc points to the ecall instruction,
// but we want to return to the next instruction.
p->tf->epc += 4;
syscall();
} else {
printf("usertrap(): unexpected scause 0x%x pid=%d\n", r_scause(), p->pid);
panic("usertrap");
}
usertrapret();
}
//PAGEBREAK: 41
//
// return to user space
//
void
trap(struct trapframe *tf)
usertrapret(void)
{
switch(tf->trapno){
case T_IRQ0 + IRQ_TIMER:
if(cpuid() == 0){
acquire(&tickslock);
ticks++;
wakeup(&ticks);
release(&tickslock);
}
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE:
ideintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE+1:
// Bochs generates spurious IDE1 interrupts.
break;
case T_IRQ0 + IRQ_KBD:
kbdintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_COM1:
uartintr();
lapiceoi();
break;
case T_IRQ0 + 7:
case T_IRQ0 + IRQ_SPURIOUS:
cprintf("cpu%d: spurious interrupt at %x:%x\n",
cpuid(), tf->cs, tf->rip);
lapiceoi();
break;
struct proc *p = myproc();
//PAGEBREAK: 13
default:
if(myproc() == 0 || (tf->cs&3) == 0){
// In kernel, it must be our mistake.
cprintf("unexpected trap %d from cpu %d rip %x (cr2=0x%x)\n",
tf->trapno, cpuid(), tf->rip, rcr2());
panic("trap");
}
// In user space, assume process misbehaved.
cprintf("pid %d %s: trap %d err %d on cpu %d "
"rip 0x%x addr 0x%x--kill proc\n",
myproc()->pid, myproc()->name, tf->trapno,
tf->err, cpuid(), tf->rip, rcr2());
myproc()->killed = 1;
}
// XXX turn off interrupts, since we're switching
// now from kerneltrap() to usertrap().
// Force process exit if it has been killed and is in user space.
// (If it is still executing in the kernel, let it keep running
// until it gets to the regular system call return.)
if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER)
exit();
// send interrupts and exceptions to trampoline.S
w_stvec(TRAMPOLINE + (trampvec - trampstart));
// Force process to give up CPU on clock tick.
// If interrupts were on while locks held, would need to check nlock.
if(myproc() && myproc()->state == RUNNING &&
tf->trapno == T_IRQ0+IRQ_TIMER)
yield();
// set up values that trampoline.S will need when
// the process next re-enters the kernel.
p->tf->kernel_satp = r_satp();
p->tf->kernel_sp = (uint64)p->kstack + PGSIZE;
p->tf->kernel_trap = (uint64)usertrap;
// Check if the process has been killed since we yielded
if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER)
exit();
// set up the registers that trampoline.S's sret will use
// to get to user space.
// set S Previous Privilege mode to User.
unsigned long x = r_sstatus();
x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
w_sstatus(x);
// set S Exception Program Counter to the saved user pc.
w_sepc(p->tf->epc);
// tell trampline.S the user page table to switch to.
uint64 satp = MAKE_SATP(p->pagetable);
// jump to trampoline.S at the top of memory, which
// switches to the user page table, restores user registers,
// and switches to user mode with sret.
((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp);
}
// interrupts and exceptions from kernel code go here,
// on whatever the current kernel stack is.
// must be 4-byte aligned to fit in stvec.
void __attribute__ ((aligned (4)))
kerneltrap()
{
if((r_sstatus() & SSTATUS_SPP) == 0)
panic("kerneltrap: not from supervisor mode");
panic("kerneltrap");
}

36
traps.h
View file

@ -1,36 +0,0 @@
// x86 trap and interrupt constants.
// Processor-defined:
#define T_DIVIDE 0 // divide error
#define T_DEBUG 1 // debug exception
#define T_NMI 2 // non-maskable interrupt
#define T_BRKPT 3 // breakpoint
#define T_OFLOW 4 // overflow
#define T_BOUND 5 // bounds check
#define T_ILLOP 6 // illegal opcode
#define T_DEVICE 7 // device not available
#define T_DBLFLT 8 // double fault
// #define T_COPROC 9 // reserved (not used since 486)
#define T_TSS 10 // invalid task switch segment
#define T_SEGNP 11 // segment not present
#define T_STACK 12 // stack exception
#define T_GPFLT 13 // general protection fault
#define T_PGFLT 14 // page fault
// #define T_RES 15 // reserved
#define T_FPERR 16 // floating point error
#define T_ALIGN 17 // aligment check
#define T_MCHK 18 // machine check
#define T_SIMDERR 19 // SIMD floating point error
#define T_DEFAULT 500 // catchall
#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ
#define IRQ_TIMER 0
#define IRQ_KBD 1
#define IRQ_COM1 4
#define IRQ_IDE 14
#define IRQ_ERROR 19
#define IRQ_SPURIOUS 31

74
uart.c
View file

@ -1,77 +1,51 @@
// Intel 8250 serial port (UART).
#include "memlayout.h"
#include "types.h"
#include "defs.h"
#include "param.h"
#include "traps.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "file.h"
#include "mmu.h"
#include "proc.h"
#include "x86.h"
//
// qemu -machine virt has a 16550a UART
// qemu/hw/riscv/virt.c
// http://byterunner.com/16550.html
//
// caller should lock.
//
#define COM1 0x3f8
static int uart; // is there a uart?
// address of one of the registers
#define R(reg) ((unsigned int*)(UART0 + 4*(reg)))
void
uartinit(void)
{
char *p;
// disable interrupts
*R(1) = 0x00;
// Turn off the FIFO
outb(COM1+2, 0);
// special mode to set baud rate
*R(3) = 0x80;
// 9600 baud, 8 data bits, 1 stop bit, parity off.
outb(COM1+3, 0x80); // Unlock divisor
outb(COM1+0, 115200/9600);
outb(COM1+1, 0);
outb(COM1+3, 0x03); // Lock divisor, 8 data bits.
outb(COM1+4, 0);
outb(COM1+1, 0x01); // Enable receive interrupts.
// LSB for baud rate of 38.4K
*R(0) = 0x03;
// If status is 0xFF, no serial port.
if(inb(COM1+5) == 0xFF)
return;
uart = 1;
// MSB for baud rate of 38.4K
*R(1) = 0x00;
// Acknowledge pre-existing interrupt conditions;
// enable interrupts.
inb(COM1+2);
inb(COM1+0);
ioapicenable(IRQ_COM1, 0);
// leave set-baud mode,
// and set word length to 8 bits, no parity.
*R(3) = 0x03;
// Announce that we're here.
for(p="xv6...\n"; *p; p++)
uartputc(*p);
// reset and enable FIFOs.
*R(2) = 0x07;
}
void
uartputc(int c)
{
int i;
if(!uart)
return;
for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++)
microdelay(10);
outb(COM1+0, c);
*R(0) = c;
}
static int
uartgetc(void)
{
if(!uart)
return -1;
if(!(inb(COM1+5) & 0x01))
return -1;
return inb(COM1+0);
}
void
uartintr(void)
{
consoleintr(uartgetc);
}

496
vm.c
View file

@ -1,230 +1,162 @@
#include "param.h"
#include "types.h"
#include "defs.h"
#include "x86.h"
#include "msr.h"
#include "memlayout.h"
#include "mmu.h"
#include "proc.h"
#include "elf.h"
#include "traps.h"
#include "riscv.h"
#include "defs.h"
extern char data[]; // defined by kernel.ld
void sysentry(void);
/*
* the kernel's page table.
*/
pagetable_t kernel_pagetable;
static pde_t *kpml4; // kernel address space, used by scheduler and bootup
extern char etext[]; // kernel.ld sets this to end of kernel code.
// Bootstrap GDT. Used by boot.S but defined in C
// Map "logical" addresses to virtual addresses using identity map.
// Cannot share a CODE descriptor for both kernel and user
// because it would have to have DPL_USR, but the CPU forbids
// an interrupt from CPL=0 to DPL=3.
struct segdesc bootgdt[NSEGS] = {
[0] = SEGDESC(0, 0, 0), // null
[1] = SEGDESC(0, 0xfffff, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // 32-bit kernel code
[2] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_L|SEG_G), // 64-bit kernel code
[3] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // kernel data
// The order of the user data and user code segments is
// important for syscall instructions. See initseg.
[6] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(3)|SEG_P|SEG_D|SEG_G), // 64-bit user data
[7] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(3)|SEG_P|SEG_L|SEG_G), // 64-bit user code
};
extern char trampstart[]; // trampoline.S
// Set up CPU's kernel segment descriptors.
// Run once on entry on each CPU.
/*
* create a direct-map page table for the kernel and
* turn on paging. called early, in supervisor mode.
* the page allocator is already initialized.
*/
void
seginit(void)
kvminit()
{
struct cpu *c;
struct desctr dtr;
kernel_pagetable = (pagetable_t) kalloc();
memset(kernel_pagetable, 0, PGSIZE);
c = getmycpu();
// uart registers
mappages(kernel_pagetable, UART0, PGSIZE,
UART0, PTE_R | PTE_W);
memmove(c->gdt, bootgdt, sizeof bootgdt);
dtr.limit = sizeof(c->gdt)-1;
dtr.base = (uint64) c->gdt;
lgdt((void *)&dtr.limit);
// map kernel text executable and read-only.
mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE,
KERNBASE, PTE_R | PTE_X);
// When executing a syscall instruction the CPU sets the SS selector
// to (star >> 32) + 8 and the CS selector to (star >> 32).
// When executing a sysret instruction the CPU sets the SS selector
// to (star >> 48) + 8 and the CS selector to (star >> 48) + 16.
uint64 star = ((((uint64)SEG_UCODE|0x3)- 16)<<48)|((uint64)(SEG_KCODE)<<32);
writemsr(MSR_STAR, star);
writemsr(MSR_LSTAR, (uint64)&sysentry);
writemsr(MSR_SFMASK, FL_TF | FL_IF);
// map kernel data and the physical RAM we'll make use of.
mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext,
(uint64)etext, PTE_R | PTE_W);
// Initialize cpu-local storage so that each core can easily
// find its struct cpu using %gs.
writegs(SEG_KDATA);
writemsr(MSR_GS_BASE, (uint64)c);
writemsr(MSR_GS_KERNBASE, (uint64)c);
c->cpu = c;
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
mappages(kernel_pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampstart, PTE_R | PTE_X);
kvmswitch();
}
// Return the address of the PTE in page table pgdir
// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvmswitch(void)
{
w_satp(MAKE_SATP(kernel_pagetable));
}
// Return the address of the PTE in page table pagetable
// that corresponds to virtual address va. If alloc!=0,
// create any required page table pages.
//
// The risc-v Sv39 scheme has three levels of page table
// pages. A page table page contains 512 64-bit PTEs.
// A 64-bit virtual address is split into five fields:
// 39..63 -- must be zero.
// 30..38 -- 9 bits of level-2 index.
// 21..39 -- 9 bits of level-1 index.
// 12..20 -- 9 bits of level-0 index.
// 0..12 -- 12 bits of byte offset within the page.
static pte_t *
walkpgdir(pde_t *pml4, const void *va, int alloc)
walk(pagetable_t pagetable, const void *va, int alloc)
{
pde_t *pgdir = pml4;
pde_t *pde;
int level;
if((uint64)va >= MAXVA)
panic("walk");
for (level = L_PML4; level > 0; level--) {
pde = &pgdir[PX(level, va)];
if(*pde & PTE_P)
pgdir = (pte_t*)P2V(PTE_ADDR(*pde));
else {
if(!alloc || (pgdir = (pde_t*)kalloc()) == 0)
for(int level = 2; level > 0; level--) {
pte_t *pte = &pagetable[PX(level, va)];
if(*pte & PTE_V) {
pagetable = (pagetable_t)PTE2PA(*pte);
} else {
if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
return 0;
memset(pgdir, 0, PGSIZE);
*pde = V2P(pgdir) | PTE_P | PTE_W | PTE_U;
memset(pagetable, 0, PGSIZE);
*pte = PA2PTE(pagetable) | PTE_V;
}
}
return &pgdir[PX(level, va)];
return &pagetable[PX(0, va)];
}
// Create PTEs for virtual addresses starting at va that refer to
// physical addresses starting at pa. va and size might not
// be page-aligned.
static int
mappages(pde_t *pgdir, void *va, uint64 size, uint64 pa, int perm)
void
mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{
char *a, *last;
pte_t *pte;
a = (char*)PGROUNDDOWN((uint64)va);
last = (char*)PGROUNDDOWN(((uint64)va) + size - 1);
a = (char*)PGROUNDDOWN(va);
last = (char*)PGROUNDDOWN(va + size - 1);
for(;;){
if((pte = walkpgdir(pgdir, a, 1)) == 0)
return -1;
if(*pte & PTE_P)
if((pte = walk(pagetable, a, 1)) == 0)
panic("mappages: walk");
if(*pte & PTE_V)
panic("remap");
*pte = pa | perm | PTE_P;
*pte = PA2PTE(pa) | perm | PTE_V;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
// There is one page table per process, plus one that's used when
// a CPU is not running any process (kpml4). The kernel uses the
// current process's page table during system calls and interrupts;
// page protection bits prevent user code from using the kernel's
// mappings.
//
// setupkvm() and exec() set up every page table like this:
//
// 0..KERNBASE: user memory (text+data+stack+heap), mapped to
// phys memory allocated by the kernel
// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space)
// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data)
// for the kernel's instructions and r/o data
// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP,
// rw data + free physical memory
// 0xfe000000..0: mapped direct (devices such as ioapic)
//
// The kernel allocates physical memory for its heap and for user memory
// between V2P(end) and the end of physical memory (PHYSTOP)
// (directly addressable from end..P2V(PHYSTOP)).
// This table defines the kernel's mappings, which are present in
// every process's page table.
static struct kmap {
void *virt;
uint64 phys_start;
uint64 phys_end;
int perm;
} kmap[] = {
{ (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
{ (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
{ (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
{ (void*)P2V(DEVSPACE), DEVSPACE, DEVSPACETOP, PTE_W}, // more devices
};
// Set up kernel part of a page table.
pde_t*
setupkvm(void)
// Remove mappings from a page table. The mappings in
// the given range must exist. Optionally free the
// physical memory.
void
unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free)
{
pde_t *pml4;
struct kmap *k;
char *a, *last;
pte_t *pte;
uint64 pa;
if((pml4 = (pde_t*)kalloc()) == 0)
return 0;
memset(pml4, 0, PGSIZE);
if (PHYSTOP > DEVSPACE)
panic("PHYSTOP too high");
for(k = kmap; k < &kmap[NELEM(kmap)]; k++) {
if(mappages(pml4, k->virt, k->phys_end - k->phys_start,
(uint)k->phys_start, k->perm) < 0) {
freevm(pml4, 0);
return 0;
a = (char*)PGROUNDDOWN(va);
last = (char*)PGROUNDDOWN(va + size - 1);
for(;;){
if((pte = walk(pagetable, a, 0)) == 0)
panic("unmappages: walk");
if((*pte & PTE_V) == 0)
panic("unmappages: not mapped");
if(PTE_FLAGS(*pte) == PTE_V)
panic("unmappages: not a leaf");
if(do_free){
pa = PTE2PA(*pte);
kfree((void*)pa);
}
*pte = 0;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return pml4;
}
// Allocate one page table for the machine for the kernel address
// space for scheduler processes.
void
kvmalloc(void)
// create an empty user page table.
pagetable_t
uvmcreate()
{
kpml4 = setupkvm();
switchkvm();
pagetable_t pagetable;
pagetable = (pagetable_t) kalloc();
if(pagetable == 0)
panic("uvmcreate: out of memory");
memset(pagetable, 0, PGSIZE);
return pagetable;
}
// Switch h/w page table register to the kernel-only page table,
// for when no process is running.
void
switchkvm(void)
{
lcr3(V2P(kpml4)); // switch to the kernel page table
}
// Switch TSS and h/w page table to correspond to process p.
void
switchuvm(struct proc *p)
{
struct desctr dtr;
struct cpu *c;
if(p == 0)
panic("switchuvm: no process");
if(p->kstack == 0)
panic("switchuvm: no kstack");
if(p->pgdir == 0)
panic("switchuvm: no pgdir");
pushcli();
c = mycpu();
uint64 base = (uint64) &(c->ts);
c->gdt[SEG_TSS>>3] = SEGDESC(base, (sizeof(c->ts)-1), SEG_P|SEG_TSS64A);
c->gdt[(SEG_TSS>>3)+1] = SEGDESCHI(base);
c->ts.rsp[0] = (uint64) p->kstack + KSTACKSIZE;
c->ts.iomba = (ushort) 0xFFFF;
dtr.limit = sizeof(c->gdt) - 1;
dtr.base = (uint64)c->gdt;
lgdt((void *)&dtr.limit);
ltr(SEG_TSS);
lcr3(V2P(p->pgdir)); // switch to process's address space
popcli();
}
// Load the initcode into address 0 of pgdir.
// Load the user initcode into address 0 of pagetable,
// for the very first process.
// sz must be less than a page.
void
inituvm(pde_t *pgdir, char *init, uint sz)
uvminit(pagetable_t pagetable, char *src, uint sz)
{
char *mem;
@ -232,63 +164,8 @@ inituvm(pde_t *pgdir, char *init, uint sz)
panic("inituvm: more than a page");
mem = kalloc();
memset(mem, 0, PGSIZE);
mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U);
memmove(mem, init, sz);
}
// Load a program segment into pgdir. addr must be page-aligned
// and the pages from addr to addr+sz must already be mapped.
int
loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
{
uint i, n;
uint64 pa;
pte_t *pte;
if((uint64) addr % PGSIZE != 0)
panic("loaduvm: addr must be page aligned");
for(i = 0; i < sz; i += PGSIZE){
if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
panic("loaduvm: address should exist");
pa = PTE_ADDR(*pte);
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, P2V(pa), offset+i, n) != n)
return -1;
}
return 0;
}
// Allocate page tables and physical memory to grow process from oldsz to
// newsz, which need not be page aligned. Returns new size or 0 on error.
int
allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
{
char *mem;
uint64 a;
if(newsz >= KERNBASE)
return 0;
if(newsz < oldsz)
return oldsz;
a = PGROUNDUP(oldsz);
for(; a < newsz; a += PGSIZE){
mem = kalloc();
if(mem == 0){
deallocuvm(pgdir, newsz, oldsz);
return 0;
}
memset(mem, 0, PGSIZE);
if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){
deallocuvm(pgdir, newsz, oldsz);
kfree(mem);
return 0;
}
}
return newsz;
mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
memmove(mem, src, sz);
}
// Deallocate user pages to bring the process size from oldsz to
@ -296,153 +173,66 @@ allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
// need to be less than oldsz. oldsz can be larger than the actual
// process size. Returns the new process size.
int
deallocuvm(pde_t *pml4, uint64 oldsz, uint64 newsz)
uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
pte_t *pte;
uint64 a, pa;
if(newsz >= oldsz)
return oldsz;
a = PGROUNDUP(newsz);
for(; a < oldsz; a += PGSIZE){
pte = walkpgdir(pml4, (char*)a, 0);
if(!pte)
continue;
else if((*pte & PTE_P) != 0){
pa = PTE_ADDR(*pte);
if(pa == 0)
panic("kfree");
char *v = P2V(pa);
kfree(v);
*pte = 0;
}
}
unmappages(pagetable, newsz, oldsz - newsz, 1);
return newsz;
}
// Recursively free a page table
void
freelevel(pde_t *pgtab, int level) {
int i;
pde_t *pd;
if (level > 0) {
for(i = 0; i < NPDENTRIES; i++) {
if(pgtab[i] & PTE_P){
pd = (pde_t*)P2V(PTE_ADDR(pgtab[i]));
freelevel(pd, level-1);
// Recursively free page table pages.
// All leaf mappings must already have been removed.
static void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
// XXX trampoline pages...
panic("freewalk: leaf");
}
}
}
kfree((char*)pgtab);
kfree((void*)pagetable);
}
// Free all the physical memory pages
// in the user part and page table
// Free user memory pages,
// then free page table pages.
void
freevm(pde_t *pml4, uint64 sz)
uvmfree(pagetable_t pagetable, uint64 sz)
{
if(pml4 == 0)
panic("freevm: no pgdir");
deallocuvm(pml4, sz, 0);
freelevel(pml4, L_PML4);
unmappages(pagetable, 0, sz, 1);
freewalk(pagetable);
}
// Clear PTE_U on a page. Used to create an inaccessible
// page beneath the user stack.
// Given a parent process's page table, copy
// its memory into a child's page table.
// Copies both the page table and the
// physical memory.
void
clearpteu(pde_t *pgdir, char *uva)
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
pte = walkpgdir(pgdir, uva, 0);
if(pte == 0)
panic("clearpteu");
*pte &= ~PTE_U;
}
// Given a parent process's page table, create a copy
// of it for a child.
pde_t*
copyuvm(pde_t *pgdir, uint sz)
{
pde_t *d;
pte_t *pte;
uint64 pa, i;
uint flags;
char *mem;
if((d = setupkvm()) == 0)
return 0;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0)
if((pte = walk(old, (void *) i, 0)) == 0)
panic("copyuvm: pte should exist");
if(!(*pte & PTE_P))
if((*pte & PTE_V) == 0)
panic("copyuvm: page not present");
pa = PTE_ADDR(*pte);
pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte);
if((mem = kalloc()) == 0)
goto bad;
memmove(mem, (char*)P2V(pa), PGSIZE);
if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) {
kfree(mem);
goto bad;
panic("uvmcopy: kalloc failed");
memmove(mem, (char*)pa, PGSIZE);
mappages(new, i, PGSIZE, (uint64)mem, flags);
}
}
return d;
bad:
freevm(d, sz);
return 0;
}
//PAGEBREAK!
// Map user virtual address to kernel address.
char*
uva2ka(pde_t *pgdir, char *uva)
{
pte_t *pte;
pte = walkpgdir(pgdir, uva, 0);
if((*pte & PTE_P) == 0)
return 0;
if((*pte & PTE_U) == 0)
return 0;
return (char*)P2V(PTE_ADDR(*pte));
}
// Copy len bytes from p to user address va in page table pgdir.
// Most useful when pgdir is not the current page table.
// uva2ka ensures this only works for PTE_U pages.
int
copyout(pde_t *pgdir, uint va, void *p, uint len)
{
char *buf, *pa0;
uint64 n, va0;
buf = (char*)p;
while(len > 0){
va0 = (uint)PGROUNDDOWN(va);
pa0 = uva2ka(pgdir, (char*)va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (va - va0);
if(n > len)
n = len;
memmove(pa0 + (va - va0), buf, n);
len -= n;
buf += n;
va = va0 + PGSIZE;
}
return 0;
}
//PAGEBREAK!
// Blank page.
//PAGEBREAK!
// Blank page.
//PAGEBREAK!
// Blank page.

198
x86.h
View file

@ -1,198 +0,0 @@
// Routines to let C code use special x86 instructions.
#ifndef __ASSEMBLER__
static inline uchar
inb(ushort port)
{
uchar data;
asm volatile("in %1,%0" : "=a" (data) : "d" (port));
return data;
}
static inline void
insl(int port, void *addr, int cnt)
{
asm volatile("cld; rep insl" :
"=D" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"memory", "cc");
}
static inline void
outb(ushort port, uchar data)
{
asm volatile("out %0,%1" : : "a" (data), "d" (port));
}
static inline void
outw(ushort port, ushort data)
{
asm volatile("out %0,%1" : : "a" (data), "d" (port));
}
static inline void
outsl(int port, const void *addr, int cnt)
{
asm volatile("cld; rep outsl" :
"=S" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"cc");
}
static inline void
stosb(void *addr, int data, int cnt)
{
asm volatile("cld; rep stosb" :
"=D" (addr), "=c" (cnt) :
"0" (addr), "1" (cnt), "a" (data) :
"memory", "cc");
}
static inline void
stosl(void *addr, int data, int cnt)
{
asm volatile("cld; rep stosl" :
"=D" (addr), "=c" (cnt) :
"0" (addr), "1" (cnt), "a" (data) :
"memory", "cc");
}
static inline void
lgdt(void *p)
{
asm volatile("lgdt (%0)" : : "r" (p) : "memory");
}
static inline void
lidt(void *p)
{
asm volatile("lidt (%0)" : : "r" (p) : "memory");
}
static inline void
ltr(ushort sel)
{
asm volatile("ltr %0" : : "r" (sel));
}
static inline uint64
readeflags(void)
{
uint64 eflags;
asm volatile("pushf; pop %0" : "=r" (eflags));
return eflags;
}
static inline void
loadgs(ushort v)
{
asm volatile("movw %0, %%gs" : : "r" (v));
}
static inline void
cli(void)
{
asm volatile("cli");
}
static inline void
sti(void)
{
asm volatile("sti");
}
static inline uint
xchg(volatile uint *addr, uint newval)
{
uint result;
// The + in "+m" denotes a read-modify-write operand.
asm volatile("lock; xchgl %0, %1" :
"+m" (*addr), "=a" (result) :
"1" (newval) :
"cc");
return result;
}
static inline uint
rcr2(void)
{
uint64 val;
asm volatile("mov %%cr2,%0" : "=r" (val));
return val;
}
static inline void
lcr3(uint64 val)
{
asm volatile("mov %0,%%cr3" : : "r" (val));
}
static inline void
writegs(uint16 v)
{
__asm volatile("movw %0, %%gs" : : "r" (v));
}
//PAGEBREAK: 36
// Layout of the trap frame built on the stack by the
// hardware and by trapasm.S, and passed to trap().
struct trapframe {
uint64 rax;
uint64 rbx;
uint64 rcx;
uint64 rdx;
uint64 rbp;
uint64 rsi;
uint64 rdi;
uint64 r8;
uint64 r9;
uint64 r10;
uint64 r11;
uint64 r12;
uint64 r13;
uint64 r14;
uint64 r15;
uint64 trapno;
uint64 err;
uint64 rip;
uint16 cs;
uint16 padding[3];
uint64 rflags;
uint64 rsp;
uint64 ss;
}__attribute__((packed));
struct sysframe {
// arguments
uint64 rdi;
uint64 rsi;
uint64 rdx;
uint64 r10;
uint64 r8;
uint64 r9;
// callee-saved registers
uint64 r15;
uint64 r14;
uint64 r13;
uint64 r12;
uint64 rbx;
uint64 rbp;
// return value
uint64 rax;
// syscall registers
uint64 r11; // eflags
uint64 rcx; // rip
uint64 rsp;
}__attribute__((packed));
#endif
#define TF_CS 144 // offset in trapframe for saved cs