Checkpoint port of xv6 to x86-64. Passed usertests on 2 processors a few times.
The x86-64 doesn't just add two levels to page tables to support 64 bit addresses, but is a different processor. For example, calling conventions, system calls, and segmentation are different from 32-bit x86. Segmentation is basically gone, but gs/fs in combination with MSRs can be used to hold a per-core pointer. In general, x86-64 is more straightforward than 32-bit x86. The port uses code from sv6 and the xv6 "rsc-amd64" branch. A summary of the changes is as follows: - Booting: switch to grub instead of xv6's bootloader (pass -kernel to qemu), because xv6's boot loader doesn't understand 64bit ELF files. And, we don't care anymore about booting. - Makefile: use -m64 instead of -m32 flag for gcc, delete boot loader, xv6.img, bochs, and memfs. For now dont' use -O2, since usertests with -O2 is bigger than MAXFILE! - Update gdb.tmpl to be for i386 or x86-64 - Console/printf: use stdarg.h and treat 64-bit addresses different from ints (32-bit) - Update elfhdr to be 64 bit - entry.S/entryother.S: add code to switch to 64-bit mode: build a simple page table in 32-bit mode before switching to 64-bit mode, share code for entering boot processor and APs, and tweak boot gdt. The boot gdt is the gdt that the kernel proper also uses. (In 64-bit mode, the gdt/segmentation and task state mostly disappear.) - exec.c: fix passing argv (64-bit now instead of 32-bit). - initcode.c: use syscall instead of int. - kernel.ld: load kernel very high, in top terabyte. 64 bits is a lot of address space! - proc.c: initial return is through new syscall path instead of trapret. - proc.h: update struct cpu to have some scratch space since syscall saves less state than int, update struct context to reflect x86-64 calling conventions. - swtch: simplify for x86-64 calling conventions. - syscall: add fetcharg to handle x86-64 calling convetions (6 arguments are passed through registers), and fetchaddr to read a 64-bit value from user space. - sysfile: update to handle pointers from user space (e.g., sys_exec), which are 64 bits. - trap.c: no special trap vector for sys calls, because x86-64 has a different plan for system calls. - trapasm: one plan for syscalls and one plan for traps (interrupt and exceptions). On x86-64, the kernel is responsible for switching user/kernel stacks. To do, xv6 keeps some scratch space in the cpu structure, and uses MSR GS_KERN_BASE to point to the core's cpu structure (using swapgs). - types.h: add uint64, and change pde_t to uint64 - usertests: exit() when fork fails, which helped in tracking down one of the bugs in the switch from 32-bit to 64-bit - vectors: update to make them 64 bits - vm.c: use bootgdt in kernel too, program MSRs for syscalls and core-local state (for swapgs), walk 4 levels in walkpgdir, add DEVSPACETOP, use task segment to set kernel stack for interrupts (but simpler than in 32-bit mode), add an extra argument to freevm (size of user part of address space) to avoid checking all entries till KERNBASE (there are MANY TB before the top 1TB). - x86: update trapframe to have 64-bit entries, which is what the processor pushes on syscalls and traps. simplify lgdt and lidt, using struct desctr, which needs the gcc directives packed and aligned. TODO: - use int32 instead of int? - simplify curproc(). xv6 has per-cpu state again, but this time it must have it. - avoid repetition in walkpgdir - fix validateint() in usertests.c - fix bugs (e.g., observed one a case of entering kernel with invalid gs or proc
This commit is contained in:
parent
b818915f79
commit
ab0db651af
|
@ -1,27 +0,0 @@
|
||||||
set $lastcs = -1
|
|
||||||
|
|
||||||
define hook-stop
|
|
||||||
# There doesn't seem to be a good way to detect if we're in 16- or
|
|
||||||
# 32-bit mode, but in 32-bit mode we always run with CS == 8 in the
|
|
||||||
# kernel and CS == 35 in user space
|
|
||||||
if $cs == 8 || $cs == 35
|
|
||||||
if $lastcs != 8 && $lastcs != 35
|
|
||||||
set architecture i386
|
|
||||||
end
|
|
||||||
x/i $pc
|
|
||||||
else
|
|
||||||
if $lastcs == -1 || $lastcs == 8 || $lastcs == 35
|
|
||||||
set architecture i8086
|
|
||||||
end
|
|
||||||
# Translate the segment:offset into a physical address
|
|
||||||
printf "[%4x:%4x] ", $cs, $eip
|
|
||||||
x/i $cs*16+$eip
|
|
||||||
end
|
|
||||||
set $lastcs = $cs
|
|
||||||
end
|
|
||||||
|
|
||||||
echo + target remote localhost:1234\n
|
|
||||||
target remote localhost:1234
|
|
||||||
|
|
||||||
echo + symbol-file kernel\n
|
|
||||||
symbol-file kernel
|
|
5
.gdbinit.tmpl-i386
Normal file
5
.gdbinit.tmpl-i386
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
python
|
||||||
|
gdb.execute("target remote localhost:26000")
|
||||||
|
gdb.execute("set architecture i386")
|
||||||
|
gdb.execute("symbol-file kernel")
|
||||||
|
gdb.execute("break *0x7c00")
|
18
.gdbinit.tmpl-x64
Normal file
18
.gdbinit.tmpl-x64
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
#if you would like to use gdb in 32bit mode, comment out lines 8 and 15, then uncomment
|
||||||
|
#the lines after. Note this will only work properly until 64bit mode is enabled in entry.S
|
||||||
|
|
||||||
|
python
|
||||||
|
gdb.execute("set architecture i386:x86-64:intel")
|
||||||
|
gdb.execute("target remote localhost:26000")
|
||||||
|
gdb.execute("symbol-file kernel")
|
||||||
|
gdb.execute("break start64")
|
||||||
|
#gdb.execute("break *0x7c00")
|
||||||
|
try:
|
||||||
|
gdb.execute("continue")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
gdb.execute("disconnect")
|
||||||
|
gdb.execute("set architecture i386:x86-64")
|
||||||
|
#gdb.execute("set architecture i386")
|
||||||
|
gdb.execute("target remote localhost:26000")
|
||||||
|
gdb.execute("delete break 1")
|
79
Makefile
79
Makefile
|
@ -51,7 +51,7 @@ TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/d
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# If the makefile can't find QEMU, specify its path here
|
# If the makefile can't find QEMU, specify its path here
|
||||||
# QEMU = qemu-system-i386
|
QEMU = qemu-system-x86_64
|
||||||
|
|
||||||
# Try to infer the correct QEMU
|
# Try to infer the correct QEMU
|
||||||
ifndef QEMU
|
ifndef QEMU
|
||||||
|
@ -76,11 +76,16 @@ AS = $(TOOLPREFIX)gas
|
||||||
LD = $(TOOLPREFIX)ld
|
LD = $(TOOLPREFIX)ld
|
||||||
OBJCOPY = $(TOOLPREFIX)objcopy
|
OBJCOPY = $(TOOLPREFIX)objcopy
|
||||||
OBJDUMP = $(TOOLPREFIX)objdump
|
OBJDUMP = $(TOOLPREFIX)objdump
|
||||||
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer
|
|
||||||
|
XFLAGS = -m64 -mcmodel=large -ggdb
|
||||||
|
# CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -Werror -fno-omit-frame-pointer
|
||||||
|
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer
|
||||||
|
CFLAGS += -ffreestanding -fno-common -nostdlib $(XFLAGS)
|
||||||
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
|
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
|
||||||
ASFLAGS = -m32 -gdwarf-2 -Wa,-divide
|
ASFLAGS = -gdwarf-2 -Wa,-divide $(XFLAGS)
|
||||||
# FreeBSD ld wants ``elf_i386_fbsd''
|
# FreeBSD ld wants ``elf_i386_fbsd''
|
||||||
LDFLAGS += -m $(shell $(LD) -V | grep elf_i386 2>/dev/null | head -n 1)
|
LDFLAGS += -m $(shell $(LD) -V | grep elf_x86_64 2>/dev/null | head -n 1)
|
||||||
|
LDFLAGS += -z max-page-size=4096
|
||||||
|
|
||||||
# Disable PIE when possible (for Ubuntu 16.10 toolchain)
|
# Disable PIE when possible (for Ubuntu 16.10 toolchain)
|
||||||
ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),)
|
ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),)
|
||||||
|
@ -90,23 +95,10 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),)
|
||||||
CFLAGS += -fno-pie -nopie
|
CFLAGS += -fno-pie -nopie
|
||||||
endif
|
endif
|
||||||
|
|
||||||
xv6.img: bootblock kernel
|
kernel: $(OBJS) entry.o entryother initcode kernel.ld
|
||||||
dd if=/dev/zero of=xv6.img count=10000
|
$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother
|
||||||
dd if=bootblock of=xv6.img conv=notrunc
|
$(OBJDUMP) -S kernel > kernel.asm
|
||||||
dd if=kernel of=xv6.img seek=1 conv=notrunc
|
$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym
|
||||||
|
|
||||||
xv6memfs.img: bootblock kernelmemfs
|
|
||||||
dd if=/dev/zero of=xv6memfs.img count=10000
|
|
||||||
dd if=bootblock of=xv6memfs.img conv=notrunc
|
|
||||||
dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc
|
|
||||||
|
|
||||||
bootblock: bootasm.S bootmain.c
|
|
||||||
$(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c
|
|
||||||
$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S
|
|
||||||
$(LD) $(LDFLAGS) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o
|
|
||||||
$(OBJDUMP) -S bootblock.o > bootblock.asm
|
|
||||||
$(OBJCOPY) -S -O binary -j .text bootblock.o bootblock
|
|
||||||
./sign.pl bootblock
|
|
||||||
|
|
||||||
entryother: entryother.S
|
entryother: entryother.S
|
||||||
$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S
|
$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S
|
||||||
|
@ -120,23 +112,6 @@ initcode: initcode.S
|
||||||
$(OBJCOPY) -S -O binary initcode.out initcode
|
$(OBJCOPY) -S -O binary initcode.out initcode
|
||||||
$(OBJDUMP) -S initcode.o > initcode.asm
|
$(OBJDUMP) -S initcode.o > initcode.asm
|
||||||
|
|
||||||
kernel: $(OBJS) entry.o entryother initcode kernel.ld
|
|
||||||
$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother
|
|
||||||
$(OBJDUMP) -S kernel > kernel.asm
|
|
||||||
$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym
|
|
||||||
|
|
||||||
# kernelmemfs is a copy of kernel that maintains the
|
|
||||||
# disk image in memory instead of writing to a disk.
|
|
||||||
# This is not so useful for testing persistent storage or
|
|
||||||
# exploring disk buffering implementations, but it is
|
|
||||||
# great for testing the kernel on real hardware without
|
|
||||||
# needing a scratch disk.
|
|
||||||
MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o
|
|
||||||
kernelmemfs: $(MEMFSOBJS) entry.o entryother initcode kernel.ld fs.img
|
|
||||||
$(LD) $(LDFLAGS) -T kernel.ld -o kernelmemfs entry.o $(MEMFSOBJS) -b binary initcode entryother fs.img
|
|
||||||
$(OBJDUMP) -S kernelmemfs > kernelmemfs.asm
|
|
||||||
$(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym
|
|
||||||
|
|
||||||
tags: $(OBJS) entryother.S _init
|
tags: $(OBJS) entryother.S _init
|
||||||
etags *.S *.c
|
etags *.S *.c
|
||||||
|
|
||||||
|
@ -190,8 +165,8 @@ fs.img: mkfs README $(UPROGS)
|
||||||
clean:
|
clean:
|
||||||
rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \
|
rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \
|
||||||
*.o *.d *.asm *.sym vectors.S bootblock entryother \
|
*.o *.d *.asm *.sym vectors.S bootblock entryother \
|
||||||
initcode initcode.out kernel xv6.img fs.img kernelmemfs \
|
initcode initcode.out kernel fs.img kernelmemfs \
|
||||||
xv6memfs.img mkfs .gdbinit \
|
mkfs .gdbinit \
|
||||||
$(UPROGS)
|
$(UPROGS)
|
||||||
|
|
||||||
# make a printout
|
# make a printout
|
||||||
|
@ -204,12 +179,6 @@ xv6.pdf: $(PRINT)
|
||||||
|
|
||||||
print: xv6.pdf
|
print: xv6.pdf
|
||||||
|
|
||||||
# run in emulators
|
|
||||||
|
|
||||||
bochs : fs.img xv6.img
|
|
||||||
if [ ! -e .bochsrc ]; then ln -s dot-bochsrc .bochsrc; fi
|
|
||||||
bochs -q
|
|
||||||
|
|
||||||
# try to generate a unique GDB port
|
# try to generate a unique GDB port
|
||||||
GDBPORT = $(shell expr `id -u` % 5000 + 25000)
|
GDBPORT = $(shell expr `id -u` % 5000 + 25000)
|
||||||
# QEMU's gdb stub command line changed in 0.11
|
# QEMU's gdb stub command line changed in 0.11
|
||||||
|
@ -219,25 +188,21 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \
|
||||||
ifndef CPUS
|
ifndef CPUS
|
||||||
CPUS := 2
|
CPUS := 2
|
||||||
endif
|
endif
|
||||||
QEMUOPTS = -drive file=fs.img,index=1,media=disk,format=raw -drive file=xv6.img,index=0,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA)
|
QEMUOPTS = -kernel kernel -drive file=fs.img,index=1,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA)
|
||||||
|
qemu: fs.img
|
||||||
qemu: fs.img xv6.img
|
|
||||||
$(QEMU) -serial mon:stdio $(QEMUOPTS)
|
$(QEMU) -serial mon:stdio $(QEMUOPTS)
|
||||||
|
|
||||||
qemu-memfs: xv6memfs.img
|
qemu-nox: fs.img kernel
|
||||||
$(QEMU) -drive file=xv6memfs.img,index=0,media=disk,format=raw -smp $(CPUS) -m 256
|
|
||||||
|
|
||||||
qemu-nox: fs.img xv6.img
|
|
||||||
$(QEMU) -nographic $(QEMUOPTS)
|
$(QEMU) -nographic $(QEMUOPTS)
|
||||||
|
|
||||||
.gdbinit: .gdbinit.tmpl
|
.gdbinit: .gdbinit.tmpl-x64
|
||||||
sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@
|
sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@
|
||||||
|
|
||||||
qemu-gdb: fs.img xv6.img .gdbinit
|
qemu-gdb: fs.img kernel .gdbinit
|
||||||
@echo "*** Now run 'gdb'." 1>&2
|
@echo "*** Now run 'gdb'." 1>&2
|
||||||
$(QEMU) -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB)
|
$(QEMU) $(QEMUOPTS) -S $(QEMUGDB)
|
||||||
|
|
||||||
qemu-nox-gdb: fs.img xv6.img .gdbinit
|
qemu-nox-gdb: fs.img kernel .gdbinit
|
||||||
@echo "*** Now run 'gdb'." 1>&2
|
@echo "*** Now run 'gdb'." 1>&2
|
||||||
$(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB)
|
$(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB)
|
||||||
|
|
||||||
|
|
88
bootasm.S
88
bootasm.S
|
@ -1,88 +0,0 @@
|
||||||
#include "asm.h"
|
|
||||||
#include "memlayout.h"
|
|
||||||
#include "mmu.h"
|
|
||||||
|
|
||||||
# Start the first CPU: switch to 32-bit protected mode, jump into C.
|
|
||||||
# The BIOS loads this code from the first sector of the hard disk into
|
|
||||||
# memory at physical address 0x7c00 and starts executing in real mode
|
|
||||||
# with %cs=0 %ip=7c00.
|
|
||||||
|
|
||||||
.code16 # Assemble for 16-bit mode
|
|
||||||
.globl start
|
|
||||||
start:
|
|
||||||
cli # BIOS enabled interrupts; disable
|
|
||||||
|
|
||||||
# Zero data segment registers DS, ES, and SS.
|
|
||||||
xorw %ax,%ax # Set %ax to zero
|
|
||||||
movw %ax,%ds # -> Data Segment
|
|
||||||
movw %ax,%es # -> Extra Segment
|
|
||||||
movw %ax,%ss # -> Stack Segment
|
|
||||||
|
|
||||||
# Physical address line A20 is tied to zero so that the first PCs
|
|
||||||
# with 2 MB would run software that assumed 1 MB. Undo that.
|
|
||||||
seta20.1:
|
|
||||||
inb $0x64,%al # Wait for not busy
|
|
||||||
testb $0x2,%al
|
|
||||||
jnz seta20.1
|
|
||||||
|
|
||||||
movb $0xd1,%al # 0xd1 -> port 0x64
|
|
||||||
outb %al,$0x64
|
|
||||||
|
|
||||||
seta20.2:
|
|
||||||
inb $0x64,%al # Wait for not busy
|
|
||||||
testb $0x2,%al
|
|
||||||
jnz seta20.2
|
|
||||||
|
|
||||||
movb $0xdf,%al # 0xdf -> port 0x60
|
|
||||||
outb %al,$0x60
|
|
||||||
|
|
||||||
# Switch from real to protected mode. Use a bootstrap GDT that makes
|
|
||||||
# virtual addresses map directly to physical addresses so that the
|
|
||||||
# effective memory map doesn't change during the transition.
|
|
||||||
lgdt gdtdesc
|
|
||||||
movl %cr0, %eax
|
|
||||||
orl $CR0_PE, %eax
|
|
||||||
movl %eax, %cr0
|
|
||||||
|
|
||||||
//PAGEBREAK!
|
|
||||||
# Complete the transition to 32-bit protected mode by using a long jmp
|
|
||||||
# to reload %cs and %eip. The segment descriptors are set up with no
|
|
||||||
# translation, so that the mapping is still the identity mapping.
|
|
||||||
ljmp $(SEG_KCODE<<3), $start32
|
|
||||||
|
|
||||||
.code32 # Tell assembler to generate 32-bit code now.
|
|
||||||
start32:
|
|
||||||
# Set up the protected-mode data segment registers
|
|
||||||
movw $(SEG_KDATA<<3), %ax # Our data segment selector
|
|
||||||
movw %ax, %ds # -> DS: Data Segment
|
|
||||||
movw %ax, %es # -> ES: Extra Segment
|
|
||||||
movw %ax, %ss # -> SS: Stack Segment
|
|
||||||
movw $0, %ax # Zero segments not ready for use
|
|
||||||
movw %ax, %fs # -> FS
|
|
||||||
movw %ax, %gs # -> GS
|
|
||||||
|
|
||||||
# Set up the stack pointer and call into C.
|
|
||||||
movl $start, %esp
|
|
||||||
call bootmain
|
|
||||||
|
|
||||||
# If bootmain returns (it shouldn't), trigger a Bochs
|
|
||||||
# breakpoint if running under Bochs, then loop.
|
|
||||||
movw $0x8a00, %ax # 0x8a00 -> port 0x8a00
|
|
||||||
movw %ax, %dx
|
|
||||||
outw %ax, %dx
|
|
||||||
movw $0x8ae0, %ax # 0x8ae0 -> port 0x8a00
|
|
||||||
outw %ax, %dx
|
|
||||||
spin:
|
|
||||||
jmp spin
|
|
||||||
|
|
||||||
# Bootstrap GDT
|
|
||||||
.p2align 2 # force 4 byte alignment
|
|
||||||
gdt:
|
|
||||||
SEG_NULLASM # null seg
|
|
||||||
SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg
|
|
||||||
SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg
|
|
||||||
|
|
||||||
gdtdesc:
|
|
||||||
.word (gdtdesc - gdt - 1) # sizeof(gdt) - 1
|
|
||||||
.long gdt # address gdt
|
|
||||||
|
|
30
console.c
30
console.c
|
@ -2,6 +2,8 @@
|
||||||
// Input is from the keyboard or serial port.
|
// Input is from the keyboard or serial port.
|
||||||
// Output is written to the screen and serial port.
|
// Output is written to the screen and serial port.
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
#include "param.h"
|
#include "param.h"
|
||||||
|
@ -24,10 +26,11 @@ static struct {
|
||||||
int locking;
|
int locking;
|
||||||
} cons;
|
} cons;
|
||||||
|
|
||||||
|
static char digits[] = "0123456789abcdef";
|
||||||
|
|
||||||
static void
|
static void
|
||||||
printint(int xx, int base, int sign)
|
printint(int xx, int base, int sign)
|
||||||
{
|
{
|
||||||
static char digits[] = "0123456789abcdef";
|
|
||||||
char buf[16];
|
char buf[16];
|
||||||
int i;
|
int i;
|
||||||
uint x;
|
uint x;
|
||||||
|
@ -48,14 +51,25 @@ printint(int xx, int base, int sign)
|
||||||
while(--i >= 0)
|
while(--i >= 0)
|
||||||
consputc(buf[i]);
|
consputc(buf[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
printptr(uint64 x) {
|
||||||
|
int i;
|
||||||
|
consputc('0');
|
||||||
|
consputc('x');
|
||||||
|
for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4)
|
||||||
|
consputc(digits[x >> (sizeof(uint64) * 8 - 4)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//PAGEBREAK: 50
|
//PAGEBREAK: 50
|
||||||
|
|
||||||
// Print to the console. only understands %d, %x, %p, %s.
|
// Print to the console. only understands %d, %x, %p, %s.
|
||||||
void
|
void
|
||||||
cprintf(char *fmt, ...)
|
cprintf(char *fmt, ...)
|
||||||
{
|
{
|
||||||
|
va_list ap;
|
||||||
int i, c, locking;
|
int i, c, locking;
|
||||||
uint *argp;
|
|
||||||
char *s;
|
char *s;
|
||||||
|
|
||||||
locking = cons.locking;
|
locking = cons.locking;
|
||||||
|
@ -65,7 +79,7 @@ cprintf(char *fmt, ...)
|
||||||
if (fmt == 0)
|
if (fmt == 0)
|
||||||
panic("null fmt");
|
panic("null fmt");
|
||||||
|
|
||||||
argp = (uint*)(void*)(&fmt + 1);
|
va_start(ap, fmt);
|
||||||
for(i = 0; (c = fmt[i] & 0xff) != 0; i++){
|
for(i = 0; (c = fmt[i] & 0xff) != 0; i++){
|
||||||
if(c != '%'){
|
if(c != '%'){
|
||||||
consputc(c);
|
consputc(c);
|
||||||
|
@ -76,14 +90,16 @@ cprintf(char *fmt, ...)
|
||||||
break;
|
break;
|
||||||
switch(c){
|
switch(c){
|
||||||
case 'd':
|
case 'd':
|
||||||
printint(*argp++, 10, 1);
|
printint(va_arg(ap, int), 10, 1);
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
|
printint(va_arg(ap, int), 16, 1);
|
||||||
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
printint(*argp++, 16, 0);
|
printptr(va_arg(ap, uint64));
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
if((s = (char*)*argp++) == 0)
|
if((s = va_arg(ap, char*)) == 0)
|
||||||
s = "(null)";
|
s = "(null)";
|
||||||
for(; *s; s++)
|
for(; *s; s++)
|
||||||
consputc(*s);
|
consputc(*s);
|
||||||
|
@ -107,7 +123,7 @@ void
|
||||||
panic(char *s)
|
panic(char *s)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint pcs[10];
|
uint64 pcs[10];
|
||||||
|
|
||||||
cli();
|
cli();
|
||||||
cons.locking = 0;
|
cons.locking = 0;
|
||||||
|
|
12
defs.h
12
defs.h
|
@ -126,7 +126,7 @@ void swtch(struct context**, struct context*);
|
||||||
|
|
||||||
// spinlock.c
|
// spinlock.c
|
||||||
void acquire(struct spinlock*);
|
void acquire(struct spinlock*);
|
||||||
void getcallerpcs(void*, uint*);
|
void getcallerpcs(void*, uint64*);
|
||||||
int holding(struct spinlock*);
|
int holding(struct spinlock*);
|
||||||
void initlock(struct spinlock*, char*);
|
void initlock(struct spinlock*, char*);
|
||||||
void release(struct spinlock*);
|
void release(struct spinlock*);
|
||||||
|
@ -152,8 +152,10 @@ char* strncpy(char*, const char*, int);
|
||||||
int argint(int, int*);
|
int argint(int, int*);
|
||||||
int argptr(int, char**, int);
|
int argptr(int, char**, int);
|
||||||
int argstr(int, char**);
|
int argstr(int, char**);
|
||||||
int fetchint(uint, int*);
|
int argaddr(int, uint64 *);
|
||||||
int fetchstr(uint, char**);
|
int fetchint(uint64, int*);
|
||||||
|
int fetchstr(uint64, char**);
|
||||||
|
int fetchaddr(uint64, uint64*);
|
||||||
void syscall(void);
|
void syscall(void);
|
||||||
|
|
||||||
// timer.c
|
// timer.c
|
||||||
|
@ -176,8 +178,8 @@ void kvmalloc(void);
|
||||||
pde_t* setupkvm(void);
|
pde_t* setupkvm(void);
|
||||||
char* uva2ka(pde_t*, char*);
|
char* uva2ka(pde_t*, char*);
|
||||||
int allocuvm(pde_t*, uint, uint);
|
int allocuvm(pde_t*, uint, uint);
|
||||||
int deallocuvm(pde_t*, uint, uint);
|
int deallocuvm(pde_t*, uint64, uint64);
|
||||||
void freevm(pde_t*);
|
void freevm(pde_t*, uint64);
|
||||||
void inituvm(pde_t*, char*, uint);
|
void inituvm(pde_t*, char*, uint);
|
||||||
int loaduvm(pde_t*, char*, struct inode*, uint, uint);
|
int loaduvm(pde_t*, char*, struct inode*, uint, uint);
|
||||||
pde_t* copyuvm(pde_t*, uint);
|
pde_t* copyuvm(pde_t*, uint);
|
||||||
|
|
22
elf.h
22
elf.h
|
@ -9,9 +9,9 @@ struct elfhdr {
|
||||||
ushort type;
|
ushort type;
|
||||||
ushort machine;
|
ushort machine;
|
||||||
uint version;
|
uint version;
|
||||||
uint entry;
|
uint64 entry;
|
||||||
uint phoff;
|
uint64 phoff;
|
||||||
uint shoff;
|
uint64 shoff;
|
||||||
uint flags;
|
uint flags;
|
||||||
ushort ehsize;
|
ushort ehsize;
|
||||||
ushort phentsize;
|
ushort phentsize;
|
||||||
|
@ -23,14 +23,14 @@ struct elfhdr {
|
||||||
|
|
||||||
// Program section header
|
// Program section header
|
||||||
struct proghdr {
|
struct proghdr {
|
||||||
uint type;
|
uint32 type;
|
||||||
uint off;
|
uint32 flags;
|
||||||
uint vaddr;
|
uint64 off;
|
||||||
uint paddr;
|
uint64 vaddr;
|
||||||
uint filesz;
|
uint64 paddr;
|
||||||
uint memsz;
|
uint64 filesz;
|
||||||
uint flags;
|
uint64 memsz;
|
||||||
uint align;
|
uint64 align;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Values for Proghdr type
|
// Values for Proghdr type
|
||||||
|
|
261
entry.S
261
entry.S
|
@ -1,68 +1,223 @@
|
||||||
# The xv6 kernel starts executing in this file. This file is linked with
|
# x86-64 bootstrap, assuming load by MultiBoot-compliant loader.
|
||||||
# the kernel C code, so it can refer to kernel symbols such as main().
|
# The MutliBoot specification is at:
|
||||||
# The boot block (bootasm.S and bootmain.c) jumps to entry below.
|
|
||||||
|
|
||||||
# Multiboot header, for multiboot boot loaders like GNU Grub.
|
|
||||||
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html
|
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html
|
||||||
#
|
# GRUB is a MultiBoot loader, as is qemu's -kernel option.
|
||||||
# Using GRUB 2, you can boot xv6 from a file stored in a
|
|
||||||
# Linux file system by copying kernel or kernelmemfs to /boot
|
|
||||||
# and then adding this menu entry:
|
|
||||||
#
|
|
||||||
# menuentry "xv6" {
|
|
||||||
# insmod ext2
|
|
||||||
# set root='(hd0,msdos1)'
|
|
||||||
# set kernel='/boot/kernel'
|
|
||||||
# echo "Loading ${kernel}..."
|
|
||||||
# multiboot ${kernel} ${kernel}
|
|
||||||
# boot
|
|
||||||
# }
|
|
||||||
|
|
||||||
#include "asm.h"
|
|
||||||
#include "memlayout.h"
|
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
#include "param.h"
|
#include "memlayout.h"
|
||||||
|
|
||||||
# Multiboot header. Data to direct multiboot loader.
|
# STACK is the size of the bootstrap stack.
|
||||||
.p2align 2
|
#define STACK 8192
|
||||||
|
|
||||||
|
# MultiBoot header.
|
||||||
|
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Header-layout
|
||||||
|
.align 4
|
||||||
.text
|
.text
|
||||||
.globl multiboot_header
|
.globl multiboot_header
|
||||||
multiboot_header:
|
multiboot_header:
|
||||||
#define magic 0x1badb002
|
#define magic 0x1badb002
|
||||||
#define flags 0
|
#define flags (1<<16 | 1<<0)
|
||||||
.long magic
|
.long magic
|
||||||
.long flags
|
.long flags
|
||||||
.long (-magic-flags)
|
.long (- magic - flags) # checksum
|
||||||
|
.long V2P_WO(multiboot_header) # header address
|
||||||
|
.long V2P_WO(multiboot_header) # load address
|
||||||
|
.long V2P_WO(edata) # load end address
|
||||||
|
.long V2P_WO(end) # bss end address
|
||||||
|
.long V2P_WO(start) # entry address
|
||||||
|
|
||||||
# By convention, the _start symbol specifies the ELF entry point.
|
# Entry point jumped to by boot loader. Running in 32-bit mode.
|
||||||
# Since we haven't set up virtual memory yet, our entry point is
|
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Machine-state
|
||||||
# the physical address of 'entry'.
|
#
|
||||||
.globl _start
|
# EAX = 0x2badb002
|
||||||
_start = V2P_WO(entry)
|
# EBX = address of multiboot information structure
|
||||||
|
# CS = 32-bit read/execute code segment with identity map
|
||||||
|
# DS, ES, FS, GS, SS = 32-bit read/write data segment with identity map
|
||||||
|
# A20 gate = enabled
|
||||||
|
# CR0 = PE set, PG clear
|
||||||
|
# EFLAGS = VM clear, IF clear
|
||||||
|
#
|
||||||
|
.code32
|
||||||
|
.globl start
|
||||||
|
start:
|
||||||
|
# Tell BIOS to do "warm reboot" when we shut down.
|
||||||
|
movw $0x1234, 0x472
|
||||||
|
|
||||||
# Entering xv6 on boot processor, with paging off.
|
# Set up multiboot arguments for main.
|
||||||
.globl entry
|
movl %eax, %edi
|
||||||
entry:
|
movl %ebx, %esi
|
||||||
# Turn on page size extension for 4Mbyte pages
|
|
||||||
movl %cr4, %eax
|
|
||||||
orl $(CR4_PSE), %eax
|
|
||||||
movl %eax, %cr4
|
|
||||||
# Set page directory
|
|
||||||
movl $(V2P_WO(entrypgdir)), %eax
|
|
||||||
movl %eax, %cr3
|
|
||||||
# Turn on paging.
|
|
||||||
movl %cr0, %eax
|
|
||||||
orl $(CR0_PG|CR0_WP), %eax
|
|
||||||
movl %eax, %cr0
|
|
||||||
|
|
||||||
# Set up the stack pointer.
|
# Initialize stack.
|
||||||
movl $(stack + KSTACKSIZE), %esp
|
movl $V2P_WO(stack+STACK), %esp
|
||||||
|
|
||||||
|
# Zero bss. QEMU's MultiBoot seems not to.
|
||||||
|
# It's possible that the header above is not right, but it looks right.
|
||||||
|
# %edi is holding multiboot argument, so save in another register.
|
||||||
|
# (The stack is in the bss.)
|
||||||
|
movl %edi, %edx
|
||||||
|
movl $V2P_WO(edata), %edi
|
||||||
|
movl $V2P_WO(end), %ecx
|
||||||
|
subl $V2P_WO(edata), %ecx
|
||||||
|
movl $0, %eax
|
||||||
|
cld
|
||||||
|
rep stosb
|
||||||
|
movl %edx, %edi
|
||||||
|
|
||||||
# Jump to main(), and switch to executing at
|
call loadgdt
|
||||||
# high addresses. The indirect call is needed because
|
|
||||||
# the assembler produces a PC-relative instruction
|
# Enter new 32-bit code segment (already in 32-bit mode).
|
||||||
# for a direct jump.
|
ljmp $KCSEG32, $V2P_WO(start32) // code32 segment selector
|
||||||
mov $main, %eax
|
|
||||||
jmp *%eax
|
start32:
|
||||||
|
# Initialize page table.
|
||||||
|
call initpagetables
|
||||||
|
call init32e
|
||||||
|
|
||||||
|
movl $V2P_WO(start64), %eax
|
||||||
|
# Enter 64-bit mode.
|
||||||
|
ljmp $KCSEG, $V2P_WO(tramp64) // code64 segment selector
|
||||||
|
|
||||||
.comm stack, KSTACKSIZE
|
.code64
|
||||||
|
start64:
|
||||||
|
# Load VA of stack
|
||||||
|
movabsq $(stack+STACK), %rsp
|
||||||
|
# Clear frame pointer for stack walks
|
||||||
|
movl $0, %ebp
|
||||||
|
# Call into C code.
|
||||||
|
call bpmain
|
||||||
|
# should not return from bpmain
|
||||||
|
jmp .
|
||||||
|
|
||||||
|
.code32
|
||||||
|
.global apstart
|
||||||
|
apstart:
|
||||||
|
call loadgdt
|
||||||
|
ljmp $KCSEG32, $V2P_WO(apstart32) // code32 segment selector
|
||||||
|
|
||||||
|
apstart32:
|
||||||
|
call init32e
|
||||||
|
movl $V2P_WO(apstart64), %eax
|
||||||
|
ljmp $KCSEG, $V2P_WO(tramp64) // code64 segment selector
|
||||||
|
|
||||||
|
.code64
|
||||||
|
apstart64:
|
||||||
|
# Remember (from bootothers), that our kernel stack pointer is
|
||||||
|
# at the top of our temporary stack.
|
||||||
|
popq %rax
|
||||||
|
movq %rax, %rsp
|
||||||
|
movq $0, %rbp
|
||||||
|
call apmain
|
||||||
|
1: jmp 1b
|
||||||
|
|
||||||
|
.code64
|
||||||
|
tramp64:
|
||||||
|
# The linker thinks we are running at tramp64, but we're actually
|
||||||
|
# running at PADDR(tramp64), so use an explicit calculation to
|
||||||
|
# load and jump to the correct address. %rax should hold the
|
||||||
|
# physical address of the jmp target.
|
||||||
|
movq $KERNBASE, %r11
|
||||||
|
addq %r11, %rax
|
||||||
|
jmp *%rax
|
||||||
|
|
||||||
|
# Initial stack
|
||||||
|
.comm stack, STACK
|
||||||
|
|
||||||
|
# Page tables. See section 4.5 of 253668.pdf.
|
||||||
|
# We map the first GB of physical memory at 0 and at 1 TB (not GB) before
|
||||||
|
# the end of virtual memory. At boot time we are using the mapping at 0
|
||||||
|
# but during ordinary execution we use the high mapping.
|
||||||
|
# The intent is that after bootstrap the kernel can expand this mapping
|
||||||
|
# to cover all the available physical memory.
|
||||||
|
# This would be easier if we could use the PS bit to create GB-sized entries
|
||||||
|
# and skip the pdt table, but not all chips support it, and QEMU doesn't.
|
||||||
|
.align 4096
|
||||||
|
pml4:
|
||||||
|
.quad V2P_WO(pdpt) + PTE_P + PTE_W // present, read/write
|
||||||
|
.quad 0
|
||||||
|
.space 4096 - 2*16
|
||||||
|
.quad V2P_WO(pdpt) + PTE_P + PTE_W
|
||||||
|
.quad 0
|
||||||
|
|
||||||
|
.align 4096
|
||||||
|
pdpt:
|
||||||
|
.quad V2P_WO(pdt) + PTE_P + PTE_W
|
||||||
|
.space 4096 - 8
|
||||||
|
|
||||||
|
.align 4096
|
||||||
|
pdt:
|
||||||
|
// Filled in below.
|
||||||
|
.space 4096
|
||||||
|
|
||||||
|
.code32
|
||||||
|
initpagetables:
|
||||||
|
pushl %edi
|
||||||
|
pushl %ecx
|
||||||
|
pushl %eax
|
||||||
|
|
||||||
|
// Set up 64-bit entry in %edx:%eax.
|
||||||
|
// Base address 0, present, read/write, large page.
|
||||||
|
movl $(0 | PTE_P | PTE_W | PTE_PS), %eax
|
||||||
|
movl $0, %edx
|
||||||
|
|
||||||
|
// Fill in 512 entries at pdt.
|
||||||
|
movl $V2P_WO(pdt), %edi
|
||||||
|
movl $512, %ecx
|
||||||
|
1:
|
||||||
|
// Write this 64-bit entry.
|
||||||
|
movl %eax, 0(%edi)
|
||||||
|
movl %edx, 4(%edi)
|
||||||
|
addl $8, %edi
|
||||||
|
// 64-bit add to prepare address for next entry.
|
||||||
|
// Because this is a large page entry, it covers 512 4k pages (2 MB).
|
||||||
|
add $(512*4096), %eax
|
||||||
|
adc $0, %edx
|
||||||
|
loop 1b
|
||||||
|
|
||||||
|
popl %eax
|
||||||
|
popl %ecx
|
||||||
|
popl %edi
|
||||||
|
ret
|
||||||
|
|
||||||
|
# Initialize IA-32e mode. See section 9.8.5 of 253668.pdf.
|
||||||
|
init32e:
|
||||||
|
# Set CR4.PAE and CR4.PSE = 1.
|
||||||
|
movl %cr4, %eax
|
||||||
|
orl $0x30, %eax
|
||||||
|
movl %eax, %cr4
|
||||||
|
|
||||||
|
# Load CR3 with physical base address of level 4 page table.
|
||||||
|
movl $V2P_WO(pml4), %eax
|
||||||
|
movl %eax, %cr3
|
||||||
|
|
||||||
|
# Enable IA-32e mode by setting IA32_EFER.LME = 1.
|
||||||
|
# Also turn on IA32_EFER.SCE (syscall enable).
|
||||||
|
movl $0xc0000080, %ecx
|
||||||
|
rdmsr
|
||||||
|
orl $0x101, %eax
|
||||||
|
wrmsr
|
||||||
|
|
||||||
|
# Enable paging by setting CR0.PG = 1.
|
||||||
|
movl %cr0, %eax
|
||||||
|
orl $0x80000000, %eax
|
||||||
|
movl %eax, %cr0
|
||||||
|
nop
|
||||||
|
nop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
loadgdt:
|
||||||
|
subl $8, %esp
|
||||||
|
movl $V2P_WO(bootgdt), 4(%esp)
|
||||||
|
movw $(8*NSEGS-1), 2(%esp)
|
||||||
|
lgdt 2(%esp)
|
||||||
|
addl $8, %esp
|
||||||
|
|
||||||
|
movl $KDSEG, %eax // data segment selector
|
||||||
|
movw %ax, %ds
|
||||||
|
movw %ax, %es
|
||||||
|
movw %ax, %ss
|
||||||
|
movl $0, %eax // null segment selector
|
||||||
|
movw %ax, %fs
|
||||||
|
movw %ax, %gs
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
57
entryother.S
57
entryother.S
|
@ -13,11 +13,9 @@
|
||||||
#
|
#
|
||||||
# Startothers (in main.c) sends the STARTUPs one at a time.
|
# Startothers (in main.c) sends the STARTUPs one at a time.
|
||||||
# It copies this code (start) at 0x7000. It puts the address of
|
# It copies this code (start) at 0x7000. It puts the address of
|
||||||
# a newly allocated per-core stack in start-4,the address of the
|
# a newly allocated per-core stack in start-12,the address of the
|
||||||
# place to jump to (mpenter) in start-8, and the physical address
|
# place to jump to (apstart32) in start-4, and the physical address
|
||||||
# of entrypgdir in start-12.
|
# of entrypgdir in start-12.
|
||||||
#
|
|
||||||
# This code combines elements of bootasm.S and entry.S.
|
|
||||||
|
|
||||||
.code16
|
.code16
|
||||||
.globl start
|
.globl start
|
||||||
|
@ -41,53 +39,22 @@ start:
|
||||||
# Complete the transition to 32-bit protected mode by using a long jmp
|
# Complete the transition to 32-bit protected mode by using a long jmp
|
||||||
# to reload %cs and %eip. The segment descriptors are set up with no
|
# to reload %cs and %eip. The segment descriptors are set up with no
|
||||||
# translation, so that the mapping is still the identity mapping.
|
# translation, so that the mapping is still the identity mapping.
|
||||||
ljmpl $(SEG_KCODE<<3), $(start32)
|
ljmpl $(KCSEG32), $start32
|
||||||
|
|
||||||
//PAGEBREAK!
|
.code32
|
||||||
.code32 # Tell assembler to generate 32-bit code now.
|
|
||||||
start32:
|
start32:
|
||||||
# Set up the protected-mode data segment registers
|
movl $start-12, %esp
|
||||||
movw $(SEG_KDATA<<3), %ax # Our data segment selector
|
movl start-4, %ecx
|
||||||
movw %ax, %ds # -> DS: Data Segment
|
jmp *%ecx
|
||||||
movw %ax, %es # -> ES: Extra Segment
|
|
||||||
movw %ax, %ss # -> SS: Stack Segment
|
|
||||||
movw $0, %ax # Zero segments not ready for use
|
|
||||||
movw %ax, %fs # -> FS
|
|
||||||
movw %ax, %gs # -> GS
|
|
||||||
|
|
||||||
# Turn on page size extension for 4Mbyte pages
|
.align 4
|
||||||
movl %cr4, %eax
|
|
||||||
orl $(CR4_PSE), %eax
|
|
||||||
movl %eax, %cr4
|
|
||||||
# Use entrypgdir as our initial page table
|
|
||||||
movl (start-12), %eax
|
|
||||||
movl %eax, %cr3
|
|
||||||
# Turn on paging.
|
|
||||||
movl %cr0, %eax
|
|
||||||
orl $(CR0_PE|CR0_PG|CR0_WP), %eax
|
|
||||||
movl %eax, %cr0
|
|
||||||
|
|
||||||
# Switch to the stack allocated by startothers()
|
|
||||||
movl (start-4), %esp
|
|
||||||
# Call mpenter()
|
|
||||||
call *(start-8)
|
|
||||||
|
|
||||||
movw $0x8a00, %ax
|
|
||||||
movw %ax, %dx
|
|
||||||
outw %ax, %dx
|
|
||||||
movw $0x8ae0, %ax
|
|
||||||
outw %ax, %dx
|
|
||||||
spin:
|
|
||||||
jmp spin
|
|
||||||
|
|
||||||
.p2align 2
|
|
||||||
gdt:
|
gdt:
|
||||||
SEG_NULLASM
|
SEG_NULLASM
|
||||||
SEG_ASM(STA_X|STA_R, 0, 0xffffffff)
|
SEG_ASM(0xa, 0, 0xffffffff)
|
||||||
SEG_ASM(STA_W, 0, 0xffffffff)
|
SEG_ASM(0x2, 0, 0xffffffff)
|
||||||
|
|
||||||
|
|
||||||
|
.align 16
|
||||||
gdtdesc:
|
gdtdesc:
|
||||||
.word (gdtdesc - gdt - 1)
|
.word 0x17 # sizeof(gdt)-1
|
||||||
.long gdt
|
.long gdt
|
||||||
|
|
||||||
|
|
30
exec.c
30
exec.c
|
@ -4,6 +4,8 @@
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
#include "proc.h"
|
#include "proc.h"
|
||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
|
#include "traps.h"
|
||||||
|
#include "msr.h"
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
#include "elf.h"
|
#include "elf.h"
|
||||||
|
|
||||||
|
@ -12,18 +14,18 @@ exec(char *path, char **argv)
|
||||||
{
|
{
|
||||||
char *s, *last;
|
char *s, *last;
|
||||||
int i, off;
|
int i, off;
|
||||||
uint argc, sz, sp, ustack[3+MAXARG+1];
|
uint64 argc, sz, sp, ustack[3+MAXARG+1];
|
||||||
struct elfhdr elf;
|
struct elfhdr elf;
|
||||||
struct inode *ip;
|
struct inode *ip;
|
||||||
struct proghdr ph;
|
struct proghdr ph;
|
||||||
pde_t *pgdir, *oldpgdir;
|
pde_t *pgdir, *oldpgdir;
|
||||||
struct proc *curproc = myproc();
|
struct proc *curproc = myproc();
|
||||||
|
uint64 oldsz = curproc->sz;
|
||||||
|
|
||||||
begin_op();
|
begin_op();
|
||||||
|
|
||||||
if((ip = namei(path)) == 0){
|
if((ip = namei(path)) == 0){
|
||||||
end_op();
|
end_op();
|
||||||
cprintf("exec: fail\n");
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ilock(ip);
|
ilock(ip);
|
||||||
|
@ -72,7 +74,7 @@ exec(char *path, char **argv)
|
||||||
for(argc = 0; argv[argc]; argc++) {
|
for(argc = 0; argv[argc]; argc++) {
|
||||||
if(argc >= MAXARG)
|
if(argc >= MAXARG)
|
||||||
goto bad;
|
goto bad;
|
||||||
sp = (sp - (strlen(argv[argc]) + 1)) & ~3;
|
sp = (sp - (strlen(argv[argc]) + 1)) & ~(sizeof(uint64)-1);
|
||||||
if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
|
if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
|
||||||
goto bad;
|
goto bad;
|
||||||
ustack[3+argc] = sp;
|
ustack[3+argc] = sp;
|
||||||
|
@ -81,10 +83,13 @@ exec(char *path, char **argv)
|
||||||
|
|
||||||
ustack[0] = 0xffffffff; // fake return PC
|
ustack[0] = 0xffffffff; // fake return PC
|
||||||
ustack[1] = argc;
|
ustack[1] = argc;
|
||||||
ustack[2] = sp - (argc+1)*4; // argv pointer
|
ustack[2] = sp - (argc+1)*sizeof(uint64); // argv pointer
|
||||||
|
|
||||||
sp -= (3+argc+1) * 4;
|
curproc->tf->rdi = argc;
|
||||||
if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
|
curproc->tf->rsi = sp - (argc+1)*sizeof(uint64);
|
||||||
|
|
||||||
|
sp -= (3+argc+1) * sizeof(uint64);
|
||||||
|
if(copyout(pgdir, sp, ustack, (3+argc+1)*sizeof(uint64)) < 0)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
// Save program name for debugging.
|
// Save program name for debugging.
|
||||||
|
@ -92,20 +97,21 @@ exec(char *path, char **argv)
|
||||||
if(*s == '/')
|
if(*s == '/')
|
||||||
last = s+1;
|
last = s+1;
|
||||||
safestrcpy(curproc->name, last, sizeof(curproc->name));
|
safestrcpy(curproc->name, last, sizeof(curproc->name));
|
||||||
|
|
||||||
// Commit to the user image.
|
// Commit to the user image.
|
||||||
oldpgdir = curproc->pgdir;
|
oldpgdir = curproc->pgdir;
|
||||||
curproc->pgdir = pgdir;
|
curproc->pgdir = pgdir;
|
||||||
curproc->sz = sz;
|
curproc->sz = sz;
|
||||||
curproc->tf->eip = elf.entry; // main
|
curproc->tf->rip = elf.entry; // main
|
||||||
curproc->tf->esp = sp;
|
curproc->tf->rcx = elf.entry;
|
||||||
|
curproc->tf->rsp = sp;
|
||||||
switchuvm(curproc);
|
switchuvm(curproc);
|
||||||
freevm(oldpgdir);
|
freevm(oldpgdir, oldsz);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bad:
|
bad:
|
||||||
if(pgdir)
|
if(pgdir)
|
||||||
freevm(pgdir);
|
freevm(pgdir, sz);
|
||||||
if(ip){
|
if(ip){
|
||||||
iunlockput(ip);
|
iunlockput(ip);
|
||||||
end_op();
|
end_op();
|
||||||
|
|
13
initcode.S
13
initcode.S
|
@ -8,16 +8,15 @@
|
||||||
# exec(init, argv)
|
# exec(init, argv)
|
||||||
.globl start
|
.globl start
|
||||||
start:
|
start:
|
||||||
pushl $argv
|
mov $init, %rdi
|
||||||
pushl $init
|
mov $argv, %rsi
|
||||||
pushl $0 // where caller pc would be
|
mov $SYS_exec, %rax
|
||||||
movl $SYS_exec, %eax
|
syscall
|
||||||
int $T_SYSCALL
|
|
||||||
|
|
||||||
# for(;;) exit();
|
# for(;;) exit();
|
||||||
exit:
|
exit:
|
||||||
movl $SYS_exit, %eax
|
mov $SYS_exit, %rax
|
||||||
int $T_SYSCALL
|
syscall
|
||||||
jmp exit
|
jmp exit
|
||||||
|
|
||||||
# char init[] = "/init\0";
|
# char init[] = "/init\0";
|
||||||
|
|
3
ioapic.c
3
ioapic.c
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
|
#include "memlayout.h"
|
||||||
#include "traps.h"
|
#include "traps.h"
|
||||||
|
|
||||||
#define IOAPIC 0xFEC00000 // Default physical address of IO APIC
|
#define IOAPIC 0xFEC00000 // Default physical address of IO APIC
|
||||||
|
@ -50,7 +51,7 @@ ioapicinit(void)
|
||||||
{
|
{
|
||||||
int i, id, maxintr;
|
int i, id, maxintr;
|
||||||
|
|
||||||
ioapic = (volatile struct ioapic*)IOAPIC;
|
ioapic = P2V((volatile struct ioapic*)IOAPIC);
|
||||||
maxintr = (ioapicread(REG_VER) >> 16) & 0xFF;
|
maxintr = (ioapicread(REG_VER) >> 16) & 0xFF;
|
||||||
id = ioapicread(REG_ID) >> 24;
|
id = ioapicread(REG_ID) >> 24;
|
||||||
if(id != ioapicid)
|
if(id != ioapicid)
|
||||||
|
|
6
kalloc.c
6
kalloc.c
|
@ -47,7 +47,7 @@ void
|
||||||
freerange(void *vstart, void *vend)
|
freerange(void *vstart, void *vend)
|
||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
p = (char*)PGROUNDUP((uint)vstart);
|
p = (char*)PGROUNDUP((uint64)vstart);
|
||||||
for(; p + PGSIZE <= (char*)vend; p += PGSIZE)
|
for(; p + PGSIZE <= (char*)vend; p += PGSIZE)
|
||||||
kfree(p);
|
kfree(p);
|
||||||
}
|
}
|
||||||
|
@ -61,7 +61,7 @@ kfree(char *v)
|
||||||
{
|
{
|
||||||
struct run *r;
|
struct run *r;
|
||||||
|
|
||||||
if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
|
if((uint64)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
|
||||||
panic("kfree");
|
panic("kfree");
|
||||||
|
|
||||||
// Fill with junk to catch dangling refs.
|
// Fill with junk to catch dangling refs.
|
||||||
|
@ -91,6 +91,8 @@ kalloc(void)
|
||||||
kmem.freelist = r->next;
|
kmem.freelist = r->next;
|
||||||
if(kmem.use_lock)
|
if(kmem.use_lock)
|
||||||
release(&kmem.lock);
|
release(&kmem.lock);
|
||||||
|
if(r != 0 && (uint64) r < KERNBASE)
|
||||||
|
panic("kalloc");
|
||||||
return (char*)r;
|
return (char*)r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
41
kernel.ld
41
kernel.ld
|
@ -1,22 +1,13 @@
|
||||||
/* Simple linker script for the JOS kernel.
|
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
|
||||||
See the GNU ld 'info' manual ("info ld") to learn the syntax. */
|
OUTPUT_ARCH(i386:x86-64)
|
||||||
|
|
||||||
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
|
|
||||||
OUTPUT_ARCH(i386)
|
|
||||||
ENTRY(_start)
|
|
||||||
|
|
||||||
SECTIONS
|
SECTIONS
|
||||||
{
|
{
|
||||||
/* Link the kernel at this address: "." means the current address */
|
. = 0xFFFFFF0000100000;
|
||||||
/* Must be equal to KERNLINK */
|
PROVIDE(text = .);
|
||||||
. = 0x80100000;
|
|
||||||
|
|
||||||
.text : AT(0x100000) {
|
.text : AT(0x100000) {
|
||||||
*(.text .stub .text.* .gnu.linkonce.t.*)
|
*(.text .stub .text.* .gnu.linkonce.t.*)
|
||||||
}
|
}
|
||||||
|
|
||||||
PROVIDE(etext = .); /* Define the 'etext' symbol to this value */
|
|
||||||
|
|
||||||
.rodata : {
|
.rodata : {
|
||||||
*(.rodata .rodata.* .gnu.linkonce.r.*)
|
*(.rodata .rodata.* .gnu.linkonce.r.*)
|
||||||
}
|
}
|
||||||
|
@ -38,31 +29,21 @@ SECTIONS
|
||||||
for this section */
|
for this section */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Adjust the address for the data segment to the next page */
|
|
||||||
. = ALIGN(0x1000);
|
. = ALIGN(0x1000);
|
||||||
|
|
||||||
/* Conventionally, Unix linkers provide pseudo-symbols
|
/* Conventionally, Unix linkers provide pseudo-symbols
|
||||||
* etext, edata, and end, at the end of the text, data, and bss.
|
* etext, edata, and end, at the end of the text, data, and bss.
|
||||||
* For the kernel mapping, we need the address at the beginning
|
* For the kernel mapping, we need the address at the beginning
|
||||||
* of the data section, but that's not one of the conventional
|
* of the data section, but that's not one of the conventional
|
||||||
* symbols, because the convention started before there was a
|
* symbols, because the convention started before there was a
|
||||||
* read-only rodata section between text and data. */
|
* read-only rodata section between text and data. */
|
||||||
PROVIDE(data = .);
|
PROVIDE(data = .);
|
||||||
|
|
||||||
/* The data segment */
|
|
||||||
.data : {
|
.data : {
|
||||||
*(.data)
|
*(.data)
|
||||||
}
|
}
|
||||||
|
|
||||||
PROVIDE(edata = .);
|
PROVIDE(edata = .);
|
||||||
|
|
||||||
.bss : {
|
.bss : {
|
||||||
*(.bss)
|
*(.bss)
|
||||||
}
|
}
|
||||||
|
|
||||||
PROVIDE(end = .);
|
PROVIDE(end = .);
|
||||||
|
|
||||||
/DISCARD/ : {
|
|
||||||
*(.eh_frame .note.GNU-stack)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
69
main.c
69
main.c
|
@ -6,17 +6,22 @@
|
||||||
#include "proc.h"
|
#include "proc.h"
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
|
|
||||||
static void startothers(void);
|
|
||||||
static void mpmain(void) __attribute__((noreturn));
|
|
||||||
extern pde_t *kpgdir;
|
extern pde_t *kpgdir;
|
||||||
extern char end[]; // first address after kernel loaded from ELF file
|
extern char end[]; // first address after kernel loaded from ELF file
|
||||||
|
|
||||||
|
static void main(void) __attribute__((noreturn));
|
||||||
|
static void startothers(void);
|
||||||
|
|
||||||
|
|
||||||
// Bootstrap processor starts running C code here.
|
// Bootstrap processor starts running C code here.
|
||||||
// Allocate a real stack and switch to it, first
|
// Allocate a real stack and switch to it, first
|
||||||
// doing some setup required for memory allocator to work.
|
// doing some setup required for memory allocator to work.
|
||||||
int
|
int
|
||||||
main(void)
|
bpmain(uint64 mbmagic, uint64 mbaddr)
|
||||||
{
|
{
|
||||||
|
if(mbmagic != 0x2badb002)
|
||||||
|
panic("multiboot header not found");
|
||||||
|
|
||||||
kinit1(end, P2V(4*1024*1024)); // phys page allocator
|
kinit1(end, P2V(4*1024*1024)); // phys page allocator
|
||||||
kvmalloc(); // kernel page table
|
kvmalloc(); // kernel page table
|
||||||
mpinit(); // detect other processors
|
mpinit(); // detect other processors
|
||||||
|
@ -30,26 +35,19 @@ main(void)
|
||||||
tvinit(); // trap vectors
|
tvinit(); // trap vectors
|
||||||
binit(); // buffer cache
|
binit(); // buffer cache
|
||||||
fileinit(); // file table
|
fileinit(); // file table
|
||||||
ideinit(); // disk
|
ideinit(); // disk
|
||||||
|
|
||||||
startothers(); // start other processors
|
startothers(); // start other processors
|
||||||
|
|
||||||
kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers()
|
kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers()
|
||||||
userinit(); // first user process
|
userinit(); // first user process
|
||||||
mpmain(); // finish this processor's setup
|
main();
|
||||||
}
|
return 0;
|
||||||
|
|
||||||
// Other CPUs jump here from entryother.S.
|
|
||||||
static void
|
|
||||||
mpenter(void)
|
|
||||||
{
|
|
||||||
switchkvm();
|
|
||||||
seginit();
|
|
||||||
lapicinit();
|
|
||||||
mpmain();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Common CPU setup code.
|
// Common CPU setup code.
|
||||||
static void
|
static void
|
||||||
mpmain(void)
|
main(void)
|
||||||
{
|
{
|
||||||
cprintf("cpu%d: starting %d\n", cpuid(), cpuid());
|
cprintf("cpu%d: starting %d\n", cpuid(), cpuid());
|
||||||
idtinit(); // load idt register
|
idtinit(); // load idt register
|
||||||
|
@ -57,7 +55,17 @@ mpmain(void)
|
||||||
scheduler(); // start running processes
|
scheduler(); // start running processes
|
||||||
}
|
}
|
||||||
|
|
||||||
pde_t entrypgdir[]; // For entry.S
|
// Other CPUs jump here from entryother.S.
|
||||||
|
void
|
||||||
|
apmain(void)
|
||||||
|
{
|
||||||
|
switchkvm();
|
||||||
|
seginit();
|
||||||
|
lapicinit();
|
||||||
|
main();
|
||||||
|
}
|
||||||
|
|
||||||
|
void apstart(void);
|
||||||
|
|
||||||
// Start the non-boot (AP) processors.
|
// Start the non-boot (AP) processors.
|
||||||
static void
|
static void
|
||||||
|
@ -72,7 +80,7 @@ startothers(void)
|
||||||
// The linker has placed the image of entryother.S in
|
// The linker has placed the image of entryother.S in
|
||||||
// _binary_entryother_start.
|
// _binary_entryother_start.
|
||||||
code = P2V(0x7000);
|
code = P2V(0x7000);
|
||||||
memmove(code, _binary_entryother_start, (uint)_binary_entryother_size);
|
memmove(code, _binary_entryother_start, (uint64)_binary_entryother_size);
|
||||||
|
|
||||||
for(c = cpus; c < cpus+ncpu; c++){
|
for(c = cpus; c < cpus+ncpu; c++){
|
||||||
if(c == mycpu()) // We've started already.
|
if(c == mycpu()) // We've started already.
|
||||||
|
@ -82,9 +90,8 @@ startothers(void)
|
||||||
// pgdir to use. We cannot use kpgdir yet, because the AP processor
|
// pgdir to use. We cannot use kpgdir yet, because the AP processor
|
||||||
// is running in low memory, so we use entrypgdir for the APs too.
|
// is running in low memory, so we use entrypgdir for the APs too.
|
||||||
stack = kalloc();
|
stack = kalloc();
|
||||||
*(void**)(code-4) = stack + KSTACKSIZE;
|
*(uint32*)(code-4) = V2P(apstart);
|
||||||
*(void(**)(void))(code-8) = mpenter;
|
*(uint64*)(code-12) = (uint64) (stack+KSTACKSIZE);
|
||||||
*(int**)(code-12) = (void *) V2P(entrypgdir);
|
|
||||||
|
|
||||||
lapicstartap(c->apicid, V2P(code));
|
lapicstartap(c->apicid, V2P(code));
|
||||||
|
|
||||||
|
@ -94,23 +101,3 @@ startothers(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The boot page table used in entry.S and entryother.S.
|
|
||||||
// Page directories (and page tables) must start on page boundaries,
|
|
||||||
// hence the __aligned__ attribute.
|
|
||||||
// PTE_PS in a page directory entry enables 4Mbyte pages.
|
|
||||||
|
|
||||||
__attribute__((__aligned__(PGSIZE)))
|
|
||||||
pde_t entrypgdir[NPDENTRIES] = {
|
|
||||||
// Map VA's [0, 4MB) to PA's [0, 4MB)
|
|
||||||
[0] = (0) | PTE_P | PTE_W | PTE_PS,
|
|
||||||
// Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB)
|
|
||||||
[KERNBASE>>PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS,
|
|
||||||
};
|
|
||||||
|
|
||||||
//PAGEBREAK!
|
|
||||||
// Blank page.
|
|
||||||
//PAGEBREAK!
|
|
||||||
// Blank page.
|
|
||||||
//PAGEBREAK!
|
|
||||||
// Blank page.
|
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,14 @@
|
||||||
|
|
||||||
#define EXTMEM 0x100000 // Start of extended memory
|
#define EXTMEM 0x100000 // Start of extended memory
|
||||||
#define PHYSTOP 0xE000000 // Top physical memory
|
#define PHYSTOP 0xE000000 // Top physical memory
|
||||||
#define DEVSPACE 0xFE000000 // Other devices are at high addresses
|
#define DEVSPACE 0xFE000000 // Other devices are top of 32-bit address space
|
||||||
|
#define DEVSPACETOP 0x100000000
|
||||||
|
|
||||||
// Key addresses for address space layout (see kmap in vm.c for layout)
|
// Key addresses for address space layout (see kmap in vm.c for layout)
|
||||||
#define KERNBASE 0x80000000 // First kernel virtual address
|
#define KERNBASE 0xFFFFFF0000000000 // First kernel virtual address
|
||||||
#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked
|
#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked
|
||||||
|
|
||||||
#define V2P(a) (((uint) (a)) - KERNBASE)
|
#define V2P(a) (((uint64) (a)) - KERNBASE)
|
||||||
#define P2V(a) ((void *)(((char *) (a)) + KERNBASE))
|
#define P2V(a) ((void *)(((char *) (a)) + KERNBASE))
|
||||||
|
|
||||||
#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts
|
#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts
|
||||||
|
|
232
mmu.h
232
mmu.h
|
@ -2,8 +2,10 @@
|
||||||
// x86 memory management unit (MMU).
|
// x86 memory management unit (MMU).
|
||||||
|
|
||||||
// Eflags register
|
// Eflags register
|
||||||
|
#define FL_TF 0x00000100 // Trap Flag
|
||||||
#define FL_IF 0x00000200 // Interrupt Enable
|
#define FL_IF 0x00000200 // Interrupt Enable
|
||||||
|
|
||||||
|
|
||||||
// Control Register flags
|
// Control Register flags
|
||||||
#define CR0_PE 0x00000001 // Protection Enable
|
#define CR0_PE 0x00000001 // Protection Enable
|
||||||
#define CR0_WP 0x00010000 // Write Protect
|
#define CR0_WP 0x00010000 // Write Protect
|
||||||
|
@ -11,81 +13,104 @@
|
||||||
|
|
||||||
#define CR4_PSE 0x00000010 // Page size extension
|
#define CR4_PSE 0x00000010 // Page size extension
|
||||||
|
|
||||||
// various segment selectors.
|
// Segment selectors (indexes) in our GDTs.
|
||||||
#define SEG_KCODE 1 // kernel code
|
// Defined by our convention, not the architecture.
|
||||||
#define SEG_KDATA 2 // kernel data+stack
|
#define KCSEG32 (1<<3) /* kernel 32-bit code segment */
|
||||||
#define SEG_UCODE 3 // user code
|
#define KCSEG (2<<3) /* kernel code segment */
|
||||||
#define SEG_UDATA 4 // user data+stack
|
#define KDSEG (3<<3) /* kernel data segment */
|
||||||
#define SEG_TSS 5 // this process's task state
|
#define TSSSEG (4<<3) /* tss segment - takes two slots */
|
||||||
|
#define UDSEG (6<<3) /* user data segment */
|
||||||
|
#define UCSEG (7<<3) /* user code segment */
|
||||||
|
|
||||||
// cpu->gdt[NSEGS] holds the above segments.
|
#define NSEGS 8
|
||||||
#define NSEGS 6
|
|
||||||
|
|
||||||
#ifndef __ASSEMBLER__
|
#ifndef __ASSEMBLER__
|
||||||
// Segment Descriptor
|
|
||||||
struct segdesc {
|
struct segdesc {
|
||||||
uint lim_15_0 : 16; // Low bits of segment limit
|
uint16 limit0;
|
||||||
uint base_15_0 : 16; // Low bits of segment base address
|
uint16 base0;
|
||||||
uint base_23_16 : 8; // Middle bits of segment base address
|
uint8 base1;
|
||||||
uint type : 4; // Segment type (see STS_ constants)
|
uint8 bits;
|
||||||
uint s : 1; // 0 = system, 1 = application
|
uint8 bitslimit1;
|
||||||
uint dpl : 2; // Descriptor Privilege Level
|
uint8 base2;
|
||||||
uint p : 1; // Present
|
|
||||||
uint lim_19_16 : 4; // High bits of segment limit
|
|
||||||
uint avl : 1; // Unused (available for software use)
|
|
||||||
uint rsv1 : 1; // Reserved
|
|
||||||
uint db : 1; // 0 = 16-bit segment, 1 = 32-bit segment
|
|
||||||
uint g : 1; // Granularity: limit scaled by 4K when set
|
|
||||||
uint base_31_24 : 8; // High bits of segment base address
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Normal segment
|
// SEGDESC constructs a segment descriptor literal
|
||||||
#define SEG(type, base, lim, dpl) (struct segdesc) \
|
// with the given, base, limit, and type bits.
|
||||||
{ ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff, \
|
#define SEGDESC(base, limit, bits) (struct segdesc){ \
|
||||||
((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \
|
(limit)&0xffff, (base)&0xffff, \
|
||||||
(uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 }
|
((base)>>16)&0xff, \
|
||||||
#define SEG16(type, base, lim, dpl) (struct segdesc) \
|
(bits)&0xff, \
|
||||||
{ (lim) & 0xffff, (uint)(base) & 0xffff, \
|
(((bits)>>4)&0xf0) | ((limit>>16)&0xf), \
|
||||||
((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \
|
((base)>>24)&0xff, \
|
||||||
(uint)(lim) >> 16, 0, 0, 1, 0, (uint)(base) >> 24 }
|
}
|
||||||
|
|
||||||
|
// SEGDESCHI constructs an extension segment descriptor
|
||||||
|
// literal that records the high bits of base.
|
||||||
|
#define SEGDESCHI(base) (struct segdesc) { \
|
||||||
|
(((base)>>32)&0xffff), (((base)>>48)&0xffff), \
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define DPL_USER 0x3 // User DPL
|
#define DPL_USER 0x3 // User DPL
|
||||||
|
|
||||||
|
#define SEG_A (1<<0) /* segment accessed bit */
|
||||||
|
#define SEG_R (1<<1) /* readable (code) */
|
||||||
|
#define SEG_W (1<<1) /* writable (data) */
|
||||||
|
#define SEG_C (1<<2) /* conforming segment (code) */
|
||||||
|
#define SEG_E (1<<2) /* expand-down bit (data) */
|
||||||
|
#define SEG_CODE (1<<3) /* code segment (instead of data) */
|
||||||
|
|
||||||
|
// User and system segment bits.
|
||||||
|
#define SEG_S (1<<4) /* if 0, system descriptor */
|
||||||
|
#define SEG_DPL(x) ((x)<<5) /* descriptor privilege level (2 bits) */
|
||||||
|
#define SEG_P (1<<7) /* segment present */
|
||||||
|
#define SEG_AVL (1<<8) /* available for operating system use */
|
||||||
|
#define SEG_L (1<<9) /* long mode */
|
||||||
|
#define SEG_D (1<<10) /* default operation size 32-bit */
|
||||||
|
#define SEG_G (1<<11) /* granularity */
|
||||||
|
|
||||||
// Application segment type bits
|
// Application segment type bits
|
||||||
#define STA_X 0x8 // Executable segment
|
#define STA_X 0x8 // Executable segment
|
||||||
#define STA_W 0x2 // Writeable (non-executable segments)
|
#define STA_W 0x2 // Writeable (non-executable segments)
|
||||||
#define STA_R 0x2 // Readable (executable segments)
|
#define STA_R 0x2 // Readable (executable segments)
|
||||||
|
|
||||||
// System segment type bits
|
// System segment type bits
|
||||||
#define STS_T32A 0x9 // Available 32-bit TSS
|
#define SEG_LDT (2<<0) /* local descriptor table */
|
||||||
#define STS_IG32 0xE // 32-bit Interrupt Gate
|
#define SEG_TSS64A (9<<0) /* available 64-bit TSS */
|
||||||
#define STS_TG32 0xF // 32-bit Trap Gate
|
#define SEG_TSS64B (11<<0) /* busy 64-bit TSS */
|
||||||
|
#define SEG_CALL64 (12<<0) /* 64-bit call gate */
|
||||||
|
#define SEG_INTR64 (14<<0) /* 64-bit interrupt gate */
|
||||||
|
#define SEG_TRAP64 (15<<0) /* 64-bit trap gate */
|
||||||
|
|
||||||
// A virtual address 'la' has a three-part structure as follows:
|
// A virtual address 'la' has a six-part structure as follows:
|
||||||
//
|
//
|
||||||
// +--------10------+-------10-------+---------12----------+
|
// +--16--+---9---+------9-------+-----9----+----9-------+----12-------+
|
||||||
// | Page Directory | Page Table | Offset within Page |
|
// | Sign | PML4 |Page Directory| Page Dir |Page Table | Offset Page |
|
||||||
// | Index | Index | |
|
// |Extend| Index | Pointer Index| Index | Index | in Page |
|
||||||
// +----------------+----------------+---------------------+
|
// +------+-------+--------------+----------+------------+-------------+
|
||||||
// \--- PDX(va) --/ \--- PTX(va) --/
|
// \-PMX(va)-/\-PDPX(va)--/ \-PDX(va)-/ \-PTX(va)-/
|
||||||
|
|
||||||
|
#define PMX(va) (((uint64)(va) >> PML4XSHIFT) & PXMASK)
|
||||||
|
#define PDPX(va) (((uint64)(va) >> PDPXSHIFT) & PXMASK)
|
||||||
// page directory index
|
// page directory index
|
||||||
#define PDX(va) (((uint)(va) >> PDXSHIFT) & 0x3FF)
|
#define PDX(va) (((uint64)(va) >> PDXSHIFT) & PXMASK)
|
||||||
|
|
||||||
// page table index
|
// page table index
|
||||||
#define PTX(va) (((uint)(va) >> PTXSHIFT) & 0x3FF)
|
#define PTX(va) (((uint64)(va) >> PTXSHIFT) & PXMASK)
|
||||||
|
|
||||||
// construct virtual address from indexes and offset
|
// construct virtual address from indexes and offset
|
||||||
#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
|
#define PGADDR(d, t, o) ((uint64)((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
|
||||||
|
|
||||||
// Page directory and page table constants.
|
// Page directory and page table constants.
|
||||||
#define NPDENTRIES 1024 // # directory entries per page directory
|
#define NPDENTRIES 512 // # directory entries per page directory
|
||||||
#define NPTENTRIES 1024 // # PTEs per page table
|
#define NPTENTRIES 512 // # PTEs per page table
|
||||||
#define PGSIZE 4096 // bytes mapped by a page
|
#define PGSIZE 4096 // bytes mapped by a page
|
||||||
|
|
||||||
#define PTXSHIFT 12 // offset of PTX in a linear address
|
#define PTXSHIFT 12 // offset of PTX in a linear address
|
||||||
#define PDXSHIFT 22 // offset of PDX in a linear address
|
#define PDXSHIFT 21 // offset of PDX in a linear address
|
||||||
|
#define PDPXSHIFT 30 // offset of PDPX in a linear address
|
||||||
|
#define PML4XSHIFT 39 // offset of PML4X in a linear address
|
||||||
|
#define PXMASK 0X1FF
|
||||||
|
|
||||||
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
|
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
|
||||||
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
|
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
|
||||||
|
@ -95,87 +120,54 @@ struct segdesc {
|
||||||
#define PTE_W 0x002 // Writeable
|
#define PTE_W 0x002 // Writeable
|
||||||
#define PTE_U 0x004 // User
|
#define PTE_U 0x004 // User
|
||||||
#define PTE_PS 0x080 // Page Size
|
#define PTE_PS 0x080 // Page Size
|
||||||
|
#define PTE_PWT 0x008 // Write-Through
|
||||||
|
#define PTE_PCD 0x010 // Cache-Disable
|
||||||
|
|
||||||
// Address in page table or page directory entry
|
// Address in page table or page directory entry
|
||||||
#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF)
|
#define PTE_ADDR(pte) ((uint64)(pte) & ~0xFFF)
|
||||||
#define PTE_FLAGS(pte) ((uint)(pte) & 0xFFF)
|
#define PTE_FLAGS(pte) ((uint64)(pte) & 0xFFF)
|
||||||
|
|
||||||
#ifndef __ASSEMBLER__
|
#ifndef __ASSEMBLER__
|
||||||
typedef uint pte_t;
|
|
||||||
|
|
||||||
// Task state segment format
|
typedef uint64 pml4e_t;
|
||||||
|
typedef uint64 pdpe_t;
|
||||||
|
typedef uint64 pte_t;
|
||||||
|
|
||||||
struct taskstate {
|
struct taskstate {
|
||||||
uint link; // Old ts selector
|
uint8 reserved0[4];
|
||||||
uint esp0; // Stack pointers and segment selectors
|
uint64 rsp[3];
|
||||||
ushort ss0; // after an increase in privilege level
|
uint64 ist[8];
|
||||||
ushort padding1;
|
uint8 reserved1[10];
|
||||||
uint *esp1;
|
uint16 iomba;
|
||||||
ushort ss1;
|
uint8 iopb[0];
|
||||||
ushort padding2;
|
} __attribute__ ((packed));
|
||||||
uint *esp2;
|
|
||||||
ushort ss2;
|
#define INT_P (1<<7) /* interrupt descriptor present */
|
||||||
ushort padding3;
|
|
||||||
void *cr3; // Page directory base
|
struct intgate
|
||||||
uint *eip; // Saved state from last task switch
|
{
|
||||||
uint eflags;
|
uint16 rip0;
|
||||||
uint eax; // More saved state (registers)
|
uint16 cs;
|
||||||
uint ecx;
|
uint8 reserved0;
|
||||||
uint edx;
|
uint8 bits;
|
||||||
uint ebx;
|
uint16 rip1;
|
||||||
uint *esp;
|
uint32 rip2;
|
||||||
uint *ebp;
|
uint32 reserved1;
|
||||||
uint esi;
|
|
||||||
uint edi;
|
|
||||||
ushort es; // Even more saved state (segment selectors)
|
|
||||||
ushort padding4;
|
|
||||||
ushort cs;
|
|
||||||
ushort padding5;
|
|
||||||
ushort ss;
|
|
||||||
ushort padding6;
|
|
||||||
ushort ds;
|
|
||||||
ushort padding7;
|
|
||||||
ushort fs;
|
|
||||||
ushort padding8;
|
|
||||||
ushort gs;
|
|
||||||
ushort padding9;
|
|
||||||
ushort ldt;
|
|
||||||
ushort padding10;
|
|
||||||
ushort t; // Trap on task switch
|
|
||||||
ushort iomb; // I/O map base address
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Gate descriptors for interrupts and traps
|
// INTDESC constructs an interrupt descriptor literal
|
||||||
struct gatedesc {
|
// that records the given code segment, instruction pointer,
|
||||||
uint off_15_0 : 16; // low 16 bits of offset in segment
|
// and type bits.
|
||||||
uint cs : 16; // code segment selector
|
#define INTDESC(cs, rip, bits) (struct intgate){ \
|
||||||
uint args : 5; // # args, 0 for interrupt/trap gates
|
(rip)&0xffff, (cs), 0, bits, ((rip)>>16)&0xffff, \
|
||||||
uint rsv1 : 3; // reserved(should be zero I guess)
|
(uint64)(rip)>>32, 0, \
|
||||||
uint type : 4; // type(STS_{IG32,TG32})
|
|
||||||
uint s : 1; // must be 0 (system)
|
|
||||||
uint dpl : 2; // descriptor(meaning new) privilege level
|
|
||||||
uint p : 1; // Present
|
|
||||||
uint off_31_16 : 16; // high bits of offset in segment
|
|
||||||
};
|
|
||||||
|
|
||||||
// Set up a normal interrupt/trap gate descriptor.
|
|
||||||
// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
|
|
||||||
// interrupt gate clears FL_IF, trap gate leaves FL_IF alone
|
|
||||||
// - sel: Code segment selector for interrupt/trap handler
|
|
||||||
// - off: Offset in code segment for interrupt/trap handler
|
|
||||||
// - dpl: Descriptor Privilege Level -
|
|
||||||
// the privilege level required for software to invoke
|
|
||||||
// this interrupt/trap gate explicitly using an int instruction.
|
|
||||||
#define SETGATE(gate, istrap, sel, off, d) \
|
|
||||||
{ \
|
|
||||||
(gate).off_15_0 = (uint)(off) & 0xffff; \
|
|
||||||
(gate).cs = (sel); \
|
|
||||||
(gate).args = 0; \
|
|
||||||
(gate).rsv1 = 0; \
|
|
||||||
(gate).type = (istrap) ? STS_TG32 : STS_IG32; \
|
|
||||||
(gate).s = 0; \
|
|
||||||
(gate).dpl = (d); \
|
|
||||||
(gate).p = 1; \
|
|
||||||
(gate).off_31_16 = (uint)(off) >> 16; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See section 4.6 of amd64 vol2
|
||||||
|
struct desctr
|
||||||
|
{
|
||||||
|
uint16 limit;
|
||||||
|
uint64 base;
|
||||||
|
} __attribute__((packed, aligned(16))); // important!
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
6
mp.c
6
mp.c
|
@ -28,7 +28,7 @@ sum(uchar *addr, int len)
|
||||||
|
|
||||||
// Look for an MP structure in the len bytes at addr.
|
// Look for an MP structure in the len bytes at addr.
|
||||||
static struct mp*
|
static struct mp*
|
||||||
mpsearch1(uint a, int len)
|
mpsearch1(uint64 a, int len)
|
||||||
{
|
{
|
||||||
uchar *e, *p, *addr;
|
uchar *e, *p, *addr;
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ mpconfig(struct mp **pmp)
|
||||||
|
|
||||||
if((mp = mpsearch()) == 0 || mp->physaddr == 0)
|
if((mp = mpsearch()) == 0 || mp->physaddr == 0)
|
||||||
return 0;
|
return 0;
|
||||||
conf = (struct mpconf*) P2V((uint) mp->physaddr);
|
conf = (struct mpconf*) P2V((uint64) mp->physaddr);
|
||||||
if(memcmp(conf, "PCMP", 4) != 0)
|
if(memcmp(conf, "PCMP", 4) != 0)
|
||||||
return 0;
|
return 0;
|
||||||
if(conf->version != 1 && conf->version != 4)
|
if(conf->version != 1 && conf->version != 4)
|
||||||
|
@ -101,7 +101,7 @@ mpinit(void)
|
||||||
if((conf = mpconfig(&mp)) == 0)
|
if((conf = mpconfig(&mp)) == 0)
|
||||||
panic("Expect to run on an SMP");
|
panic("Expect to run on an SMP");
|
||||||
ismp = 1;
|
ismp = 1;
|
||||||
lapic = (uint*)conf->lapicaddr;
|
lapic = P2V((uint64)conf->lapicaddr_p);
|
||||||
for(p=(uchar*)(conf+1), e=(uchar*)conf+conf->length; p<e; ){
|
for(p=(uchar*)(conf+1), e=(uchar*)conf+conf->length; p<e; ){
|
||||||
switch(*p){
|
switch(*p){
|
||||||
case MPPROC:
|
case MPPROC:
|
||||||
|
|
8
mp.h
8
mp.h
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
struct mp { // floating pointer
|
struct mp { // floating pointer
|
||||||
uchar signature[4]; // "_MP_"
|
uchar signature[4]; // "_MP_"
|
||||||
void *physaddr; // phys addr of MP config table
|
uint32 physaddr; // phys addr of MP config table
|
||||||
uchar length; // 1
|
uchar length; // 1
|
||||||
uchar specrev; // [14]
|
uchar specrev; // [14]
|
||||||
uchar checksum; // all bytes must add up to 0
|
uchar checksum; // all bytes must add up to 0
|
||||||
|
@ -17,10 +17,10 @@ struct mpconf { // configuration table header
|
||||||
uchar version; // [14]
|
uchar version; // [14]
|
||||||
uchar checksum; // all bytes must add up to 0
|
uchar checksum; // all bytes must add up to 0
|
||||||
uchar product[20]; // product id
|
uchar product[20]; // product id
|
||||||
uint *oemtable; // OEM table pointer
|
uint32 oemtable; // OEM table pointer
|
||||||
ushort oemlength; // OEM table length
|
ushort oemlength; // OEM table length
|
||||||
ushort entry; // entry count
|
ushort entry; // entry count
|
||||||
uint *lapicaddr; // address of local APIC
|
uint32 lapicaddr_p; // address of local APIC
|
||||||
ushort xlength; // extended table length
|
ushort xlength; // extended table length
|
||||||
uchar xchecksum; // extended table checksum
|
uchar xchecksum; // extended table checksum
|
||||||
uchar reserved;
|
uchar reserved;
|
||||||
|
@ -42,7 +42,7 @@ struct mpioapic { // I/O APIC table entry
|
||||||
uchar apicno; // I/O APIC id
|
uchar apicno; // I/O APIC id
|
||||||
uchar version; // I/O APIC version
|
uchar version; // I/O APIC version
|
||||||
uchar flags; // I/O APIC flags
|
uchar flags; // I/O APIC flags
|
||||||
uint *addr; // I/O APIC address
|
uint32 addr_p; // I/O APIC address
|
||||||
};
|
};
|
||||||
|
|
||||||
// Table entry types
|
// Table entry types
|
||||||
|
|
25
msr.h
Normal file
25
msr.h
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
// SYSCALL and SYSRET registers
|
||||||
|
#define MSR_STAR 0xc0000081
|
||||||
|
#define MSR_LSTAR 0xc0000082
|
||||||
|
#define MSR_CSTAR 0xc0000083
|
||||||
|
#define MSR_SFMASK 0xc0000084
|
||||||
|
|
||||||
|
// GS
|
||||||
|
#define MSR_GS_BASE 0xc0000101
|
||||||
|
#define MSR_GS_KERNBASE 0xc0000102
|
||||||
|
|
||||||
|
static inline uint64
|
||||||
|
readmsr(uint32 msr)
|
||||||
|
{
|
||||||
|
uint32 hi, lo;
|
||||||
|
__asm volatile("rdmsr" : "=d" (hi), "=a" (lo) : "c" (msr));
|
||||||
|
return ((uint64) lo) | (((uint64) hi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
writemsr(uint64 msr, uint64 val)
|
||||||
|
{
|
||||||
|
uint32 lo = val & 0xffffffff;
|
||||||
|
uint32 hi = val >> 32;
|
||||||
|
__asm volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi) : "memory");
|
||||||
|
}
|
34
printf.c
34
printf.c
|
@ -2,6 +2,10 @@
|
||||||
#include "stat.h"
|
#include "stat.h"
|
||||||
#include "user.h"
|
#include "user.h"
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
static char digits[] = "0123456789ABCDEF";
|
||||||
|
|
||||||
static void
|
static void
|
||||||
putc(int fd, char c)
|
putc(int fd, char c)
|
||||||
{
|
{
|
||||||
|
@ -11,7 +15,6 @@ putc(int fd, char c)
|
||||||
static void
|
static void
|
||||||
printint(int fd, int xx, int base, int sgn)
|
printint(int fd, int xx, int base, int sgn)
|
||||||
{
|
{
|
||||||
static char digits[] = "0123456789ABCDEF";
|
|
||||||
char buf[16];
|
char buf[16];
|
||||||
int i, neg;
|
int i, neg;
|
||||||
uint x;
|
uint x;
|
||||||
|
@ -35,16 +38,25 @@ printint(int fd, int xx, int base, int sgn)
|
||||||
putc(fd, buf[i]);
|
putc(fd, buf[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
printptr(int fd, uint64 x) {
|
||||||
|
int i;
|
||||||
|
putc(fd, '0');
|
||||||
|
putc(fd, 'x');
|
||||||
|
for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4)
|
||||||
|
putc(fd, digits[x >> (sizeof(uint64) * 8 - 4)]);
|
||||||
|
}
|
||||||
|
|
||||||
// Print to the given fd. Only understands %d, %x, %p, %s.
|
// Print to the given fd. Only understands %d, %x, %p, %s.
|
||||||
void
|
void
|
||||||
printf(int fd, const char *fmt, ...)
|
printf(int fd, const char *fmt, ...)
|
||||||
{
|
{
|
||||||
|
va_list ap;
|
||||||
char *s;
|
char *s;
|
||||||
int c, i, state;
|
int c, i, state;
|
||||||
uint *ap;
|
|
||||||
|
|
||||||
|
va_start(ap, fmt);
|
||||||
state = 0;
|
state = 0;
|
||||||
ap = (uint*)(void*)&fmt + 1;
|
|
||||||
for(i = 0; fmt[i]; i++){
|
for(i = 0; fmt[i]; i++){
|
||||||
c = fmt[i] & 0xff;
|
c = fmt[i] & 0xff;
|
||||||
if(state == 0){
|
if(state == 0){
|
||||||
|
@ -55,14 +67,13 @@ printf(int fd, const char *fmt, ...)
|
||||||
}
|
}
|
||||||
} else if(state == '%'){
|
} else if(state == '%'){
|
||||||
if(c == 'd'){
|
if(c == 'd'){
|
||||||
printint(fd, *ap, 10, 1);
|
printint(fd, va_arg(ap, int), 10, 1);
|
||||||
ap++;
|
} else if(c == 'x') {
|
||||||
} else if(c == 'x' || c == 'p'){
|
printint(fd, va_arg(ap, int), 16, 0);
|
||||||
printint(fd, *ap, 16, 0);
|
} else if(c == 'p') {
|
||||||
ap++;
|
printptr(fd, va_arg(ap, uint64));
|
||||||
} else if(c == 's'){
|
} else if(c == 's'){
|
||||||
s = (char*)*ap;
|
s = va_arg(ap, char*);
|
||||||
ap++;
|
|
||||||
if(s == 0)
|
if(s == 0)
|
||||||
s = "(null)";
|
s = "(null)";
|
||||||
while(*s != 0){
|
while(*s != 0){
|
||||||
|
@ -70,8 +81,7 @@ printf(int fd, const char *fmt, ...)
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
} else if(c == 'c'){
|
} else if(c == 'c'){
|
||||||
putc(fd, *ap);
|
putc(fd, va_arg(ap, uint));
|
||||||
ap++;
|
|
||||||
} else if(c == '%'){
|
} else if(c == '%'){
|
||||||
putc(fd, c);
|
putc(fd, c);
|
||||||
} else {
|
} else {
|
||||||
|
|
34
proc.c
34
proc.c
|
@ -6,6 +6,7 @@
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
#include "proc.h"
|
#include "proc.h"
|
||||||
#include "spinlock.h"
|
#include "spinlock.h"
|
||||||
|
#include "msr.h"
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct spinlock lock;
|
struct spinlock lock;
|
||||||
|
@ -16,7 +17,7 @@ static struct proc *initproc;
|
||||||
|
|
||||||
int nextpid = 1;
|
int nextpid = 1;
|
||||||
extern void forkret(void);
|
extern void forkret(void);
|
||||||
extern void trapret(void);
|
extern void sysexit(void);
|
||||||
|
|
||||||
static void wakeup1(void *chan);
|
static void wakeup1(void *chan);
|
||||||
|
|
||||||
|
@ -104,13 +105,13 @@ found:
|
||||||
|
|
||||||
// Set up new context to start executing at forkret,
|
// Set up new context to start executing at forkret,
|
||||||
// which returns to trapret.
|
// which returns to trapret.
|
||||||
sp -= 4;
|
sp -= sizeof(uint64);
|
||||||
*(uint*)sp = (uint)trapret;
|
*(uint64*)sp = (uint64)sysexit;
|
||||||
|
|
||||||
sp -= sizeof *p->context;
|
sp -= sizeof *p->context;
|
||||||
p->context = (struct context*)sp;
|
p->context = (struct context*)sp;
|
||||||
memset(p->context, 0, sizeof *p->context);
|
memset(p->context, 0, sizeof *p->context);
|
||||||
p->context->eip = (uint)forkret;
|
p->context->eip = (uint64)forkret;
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
@ -128,16 +129,12 @@ userinit(void)
|
||||||
initproc = p;
|
initproc = p;
|
||||||
if((p->pgdir = setupkvm()) == 0)
|
if((p->pgdir = setupkvm()) == 0)
|
||||||
panic("userinit: out of memory?");
|
panic("userinit: out of memory?");
|
||||||
inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size);
|
inituvm(p->pgdir, _binary_initcode_start, (uint64)_binary_initcode_size);
|
||||||
p->sz = PGSIZE;
|
p->sz = PGSIZE;
|
||||||
memset(p->tf, 0, sizeof(*p->tf));
|
memset(p->tf, 0, sizeof(*p->tf));
|
||||||
p->tf->cs = (SEG_UCODE << 3) | DPL_USER;
|
p->tf->r11 = FL_IF;
|
||||||
p->tf->ds = (SEG_UDATA << 3) | DPL_USER;
|
p->tf->rsp = PGSIZE;
|
||||||
p->tf->es = p->tf->ds;
|
p->tf->rcx = 0; // beginning of initcode.S
|
||||||
p->tf->ss = p->tf->ds;
|
|
||||||
p->tf->eflags = FL_IF;
|
|
||||||
p->tf->esp = PGSIZE;
|
|
||||||
p->tf->eip = 0; // beginning of initcode.S
|
|
||||||
|
|
||||||
safestrcpy(p->name, "initcode", sizeof(p->name));
|
safestrcpy(p->name, "initcode", sizeof(p->name));
|
||||||
p->cwd = namei("/");
|
p->cwd = namei("/");
|
||||||
|
@ -201,7 +198,7 @@ fork(void)
|
||||||
*np->tf = *curproc->tf;
|
*np->tf = *curproc->tf;
|
||||||
|
|
||||||
// Clear %eax so that fork returns 0 in the child.
|
// Clear %eax so that fork returns 0 in the child.
|
||||||
np->tf->eax = 0;
|
np->tf->rax = 0;
|
||||||
|
|
||||||
for(i = 0; i < NOFILE; i++)
|
for(i = 0; i < NOFILE; i++)
|
||||||
if(curproc->ofile[i])
|
if(curproc->ofile[i])
|
||||||
|
@ -289,8 +286,8 @@ wait(void)
|
||||||
pid = p->pid;
|
pid = p->pid;
|
||||||
kfree(p->kstack);
|
kfree(p->kstack);
|
||||||
p->kstack = 0;
|
p->kstack = 0;
|
||||||
freevm(p->pgdir);
|
freevm(p->pgdir, p->sz);
|
||||||
p->pid = 0;
|
p->pid = 0;
|
||||||
p->parent = 0;
|
p->parent = 0;
|
||||||
p->name[0] = 0;
|
p->name[0] = 0;
|
||||||
p->killed = 0;
|
p->killed = 0;
|
||||||
|
@ -339,6 +336,7 @@ scheduler(void)
|
||||||
// Switch to chosen process. It is the process's job
|
// Switch to chosen process. It is the process's job
|
||||||
// to release ptable.lock and then reacquire it
|
// to release ptable.lock and then reacquire it
|
||||||
// before jumping back to us.
|
// before jumping back to us.
|
||||||
|
|
||||||
c->proc = p;
|
c->proc = p;
|
||||||
switchuvm(p);
|
switchuvm(p);
|
||||||
p->state = RUNNING;
|
p->state = RUNNING;
|
||||||
|
@ -408,7 +406,7 @@ forkret(void)
|
||||||
iinit(ROOTDEV);
|
iinit(ROOTDEV);
|
||||||
initlog(ROOTDEV);
|
initlog(ROOTDEV);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return to "caller", actually trapret (see allocproc).
|
// Return to "caller", actually trapret (see allocproc).
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -514,7 +512,7 @@ procdump(void)
|
||||||
int i;
|
int i;
|
||||||
struct proc *p;
|
struct proc *p;
|
||||||
char *state;
|
char *state;
|
||||||
uint pc[10];
|
uint64 pc[10];
|
||||||
|
|
||||||
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
||||||
if(p->state == UNUSED)
|
if(p->state == UNUSED)
|
||||||
|
@ -525,7 +523,7 @@ procdump(void)
|
||||||
state = "???";
|
state = "???";
|
||||||
cprintf("%d %s %s", p->pid, state, p->name);
|
cprintf("%d %s %s", p->pid, state, p->name);
|
||||||
if(p->state == SLEEPING){
|
if(p->state == SLEEPING){
|
||||||
getcallerpcs((uint*)p->context->ebp+2, pc);
|
getcallerpcs((uint64*)p->context->ebp+2, pc);
|
||||||
for(i=0; i<10 && pc[i] != 0; i++)
|
for(i=0; i<10 && pc[i] != 0; i++)
|
||||||
cprintf(" %p", pc[i]);
|
cprintf(" %p", pc[i]);
|
||||||
}
|
}
|
||||||
|
|
21
proc.h
21
proc.h
|
@ -1,5 +1,8 @@
|
||||||
// Per-CPU state
|
// Per-CPU state
|
||||||
struct cpu {
|
struct cpu {
|
||||||
|
uint64 syscallno; // Temporary used by sysentry
|
||||||
|
uint64 usp; // Temporary used by sysentry
|
||||||
|
struct proc *proc; // The process running on this cpu or null
|
||||||
uchar apicid; // Local APIC ID
|
uchar apicid; // Local APIC ID
|
||||||
struct context *scheduler; // swtch() here to enter scheduler
|
struct context *scheduler; // swtch() here to enter scheduler
|
||||||
struct taskstate ts; // Used by x86 to find stack for interrupt
|
struct taskstate ts; // Used by x86 to find stack for interrupt
|
||||||
|
@ -7,7 +10,6 @@ struct cpu {
|
||||||
volatile uint started; // Has the CPU started?
|
volatile uint started; // Has the CPU started?
|
||||||
int ncli; // Depth of pushcli nesting.
|
int ncli; // Depth of pushcli nesting.
|
||||||
int intena; // Were interrupts enabled before pushcli?
|
int intena; // Were interrupts enabled before pushcli?
|
||||||
struct proc *proc; // The process running on this cpu or null
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct cpu cpus[NCPU];
|
extern struct cpu cpus[NCPU];
|
||||||
|
@ -25,20 +27,23 @@ extern int ncpu;
|
||||||
// at the "Switch stacks" comment. Switch doesn't save eip explicitly,
|
// at the "Switch stacks" comment. Switch doesn't save eip explicitly,
|
||||||
// but it is on the stack and allocproc() manipulates it.
|
// but it is on the stack and allocproc() manipulates it.
|
||||||
struct context {
|
struct context {
|
||||||
uint edi;
|
uint64 r15;
|
||||||
uint esi;
|
uint64 r14;
|
||||||
uint ebx;
|
uint64 r13;
|
||||||
uint ebp;
|
uint64 r12;
|
||||||
uint eip;
|
uint64 r11;
|
||||||
|
uint64 rbx;
|
||||||
|
uint64 ebp; //rbp
|
||||||
|
uint64 eip; //rip;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
|
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
|
||||||
|
|
||||||
// Per-process state
|
// Per-process state
|
||||||
struct proc {
|
struct proc {
|
||||||
uint sz; // Size of process memory (bytes)
|
char *kstack; // Bottom of kernel stack for this process, must be first entry
|
||||||
|
uint64 sz; // Size of process memory (bytes)
|
||||||
pde_t* pgdir; // Page table
|
pde_t* pgdir; // Page table
|
||||||
char *kstack; // Bottom of kernel stack for this process
|
|
||||||
enum procstate state; // Process state
|
enum procstate state; // Process state
|
||||||
int pid; // Process ID
|
int pid; // Process ID
|
||||||
struct proc *parent; // Parent process
|
struct proc *parent; // Parent process
|
||||||
|
|
10
spinlock.c
10
spinlock.c
|
@ -69,17 +69,17 @@ release(struct spinlock *lk)
|
||||||
|
|
||||||
// Record the current call stack in pcs[] by following the %ebp chain.
|
// Record the current call stack in pcs[] by following the %ebp chain.
|
||||||
void
|
void
|
||||||
getcallerpcs(void *v, uint pcs[])
|
getcallerpcs(void *v, uint64 pcs[])
|
||||||
{
|
{
|
||||||
uint *ebp;
|
uint64 *ebp;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ebp = (uint*)v - 2;
|
asm volatile("mov %%rbp, %0" : "=r" (ebp));
|
||||||
for(i = 0; i < 10; i++){
|
for(i = 0; i < 10; i++){
|
||||||
if(ebp == 0 || ebp < (uint*)KERNBASE || ebp == (uint*)0xffffffff)
|
if(ebp == 0 || ebp < (uint64*)KERNBASE || ebp == (uint64*)0xffffffff)
|
||||||
break;
|
break;
|
||||||
pcs[i] = ebp[1]; // saved %eip
|
pcs[i] = ebp[1]; // saved %eip
|
||||||
ebp = (uint*)ebp[0]; // saved %ebp
|
ebp = (uint64*)ebp[0]; // saved %ebp
|
||||||
}
|
}
|
||||||
for(; i < 10; i++)
|
for(; i < 10; i++)
|
||||||
pcs[i] = 0;
|
pcs[i] = 0;
|
||||||
|
|
|
@ -5,7 +5,7 @@ struct spinlock {
|
||||||
// For debugging:
|
// For debugging:
|
||||||
char *name; // Name of lock.
|
char *name; // Name of lock.
|
||||||
struct cpu *cpu; // The cpu holding the lock.
|
struct cpu *cpu; // The cpu holding the lock.
|
||||||
uint pcs[10]; // The call stack (an array of program counters)
|
uint64 pcs[10]; // The call stack (an array of program counters)
|
||||||
// that locked the lock.
|
// that locked the lock.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
2
string.c
2
string.c
|
@ -4,7 +4,7 @@
|
||||||
void*
|
void*
|
||||||
memset(void *dst, int c, uint n)
|
memset(void *dst, int c, uint n)
|
||||||
{
|
{
|
||||||
if ((int)dst%4 == 0 && n%4 == 0){
|
if ((uint64)dst%4 == 0 && n%4 == 0){
|
||||||
c &= 0xFF;
|
c &= 0xFF;
|
||||||
stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4);
|
stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4);
|
||||||
} else
|
} else
|
||||||
|
|
36
swtch.S
36
swtch.S
|
@ -8,22 +8,28 @@
|
||||||
|
|
||||||
.globl swtch
|
.globl swtch
|
||||||
swtch:
|
swtch:
|
||||||
movl 4(%esp), %eax
|
# Save old callee-save registers
|
||||||
movl 8(%esp), %edx
|
push %rbp
|
||||||
|
push %rbx
|
||||||
# Save old callee-saved registers
|
push %r11
|
||||||
pushl %ebp
|
push %r12
|
||||||
pushl %ebx
|
push %r13
|
||||||
pushl %esi
|
push %r14
|
||||||
pushl %edi
|
push %r15
|
||||||
|
|
||||||
# Switch stacks
|
# Switch stacks
|
||||||
movl %esp, (%eax)
|
mov %rsp, (%rdi) # first arg is in rdi
|
||||||
movl %edx, %esp
|
mov %rsi, %rsp # second arg is in rsi
|
||||||
|
|
||||||
|
# Load new callee-save registers
|
||||||
|
pop %r15
|
||||||
|
pop %r14
|
||||||
|
pop %r13
|
||||||
|
pop %r12
|
||||||
|
pop %r11
|
||||||
|
pop %rbx
|
||||||
|
pop %rbp
|
||||||
|
|
||||||
# Load new callee-saved registers
|
|
||||||
popl %edi
|
|
||||||
popl %esi
|
|
||||||
popl %ebx
|
|
||||||
popl %ebp
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
|
58
syscall.c
58
syscall.c
|
@ -15,13 +15,13 @@
|
||||||
|
|
||||||
// Fetch the int at addr from the current process.
|
// Fetch the int at addr from the current process.
|
||||||
int
|
int
|
||||||
fetchint(uint addr, int *ip)
|
fetchint(uint64 addr, int *ip)
|
||||||
{
|
{
|
||||||
struct proc *curproc = myproc();
|
struct proc *curproc = myproc();
|
||||||
|
|
||||||
if(addr >= curproc->sz || addr+4 > curproc->sz)
|
if(addr >= curproc->sz || addr+4 > curproc->sz)
|
||||||
return -1;
|
return -1;
|
||||||
*ip = *(int*)(addr);
|
*ip = *(uint64*)(addr);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ fetchint(uint addr, int *ip)
|
||||||
// Doesn't actually copy the string - just sets *pp to point at it.
|
// Doesn't actually copy the string - just sets *pp to point at it.
|
||||||
// Returns length of string, not including nul.
|
// Returns length of string, not including nul.
|
||||||
int
|
int
|
||||||
fetchstr(uint addr, char **pp)
|
fetchstr(uint64 addr, char **pp)
|
||||||
{
|
{
|
||||||
char *s, *ep;
|
char *s, *ep;
|
||||||
struct proc *curproc = myproc();
|
struct proc *curproc = myproc();
|
||||||
|
@ -45,11 +45,51 @@ fetchstr(uint addr, char **pp)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64
|
||||||
|
fetcharg(int n)
|
||||||
|
{
|
||||||
|
struct proc *curproc = myproc();
|
||||||
|
switch (n) {
|
||||||
|
case 0:
|
||||||
|
return curproc->tf->rdi;
|
||||||
|
case 1:
|
||||||
|
return curproc->tf->rsi;
|
||||||
|
case 2:
|
||||||
|
return curproc->tf->rdx;
|
||||||
|
case 3:
|
||||||
|
return curproc->tf->r10;
|
||||||
|
case 4:
|
||||||
|
return curproc->tf->r8;
|
||||||
|
case 5:
|
||||||
|
return curproc->tf->r9;
|
||||||
|
}
|
||||||
|
panic("fetcharg");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
fetchaddr(uint64 addr, uint64 *ip)
|
||||||
|
{
|
||||||
|
struct proc *curproc = myproc();
|
||||||
|
if(addr >= curproc->sz || addr+sizeof(uint64) > curproc->sz)
|
||||||
|
return -1;
|
||||||
|
*ip = *(uint64*)(addr);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch the nth 32-bit system call argument.
|
// Fetch the nth 32-bit system call argument.
|
||||||
int
|
int
|
||||||
argint(int n, int *ip)
|
argint(int n, int *ip)
|
||||||
{
|
{
|
||||||
return fetchint((myproc()->tf->esp) + 4 + 4*n, ip);
|
*ip = fetcharg(n);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
argaddr(int n, uint64 *ip)
|
||||||
|
{
|
||||||
|
*ip = fetcharg(n);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch the nth word-sized system call argument as a pointer
|
// Fetch the nth word-sized system call argument as a pointer
|
||||||
|
@ -58,10 +98,10 @@ argint(int n, int *ip)
|
||||||
int
|
int
|
||||||
argptr(int n, char **pp, int size)
|
argptr(int n, char **pp, int size)
|
||||||
{
|
{
|
||||||
int i;
|
uint64 i;
|
||||||
struct proc *curproc = myproc();
|
struct proc *curproc = myproc();
|
||||||
|
|
||||||
if(argint(n, &i) < 0)
|
if(argaddr(n, &i) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz)
|
if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -134,12 +174,12 @@ syscall(void)
|
||||||
int num;
|
int num;
|
||||||
struct proc *curproc = myproc();
|
struct proc *curproc = myproc();
|
||||||
|
|
||||||
num = curproc->tf->eax;
|
num = curproc->tf->rax;
|
||||||
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
|
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
|
||||||
curproc->tf->eax = syscalls[num]();
|
curproc->tf->rax = syscalls[num]();
|
||||||
} else {
|
} else {
|
||||||
cprintf("%d %s: unknown sys call %d\n",
|
cprintf("%d %s: unknown sys call %d\n",
|
||||||
curproc->pid, curproc->name, num);
|
curproc->pid, curproc->name, num);
|
||||||
curproc->tf->eax = -1;
|
curproc->tf->rax = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -399,16 +399,16 @@ sys_exec(void)
|
||||||
{
|
{
|
||||||
char *path, *argv[MAXARG];
|
char *path, *argv[MAXARG];
|
||||||
int i;
|
int i;
|
||||||
uint uargv, uarg;
|
uint64 uargv, uarg;
|
||||||
|
|
||||||
if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0){
|
if(argstr(0, &path) < 0 || argaddr(1, &uargv) < 0){
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
memset(argv, 0, sizeof(argv));
|
memset(argv, 0, sizeof(argv));
|
||||||
for(i=0;; i++){
|
for(i=0;; i++){
|
||||||
if(i >= NELEM(argv))
|
if(i >= NELEM(argv))
|
||||||
return -1;
|
return -1;
|
||||||
if(fetchint(uargv+4*i, (int*)&uarg) < 0)
|
if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if(uarg == 0){
|
if(uarg == 0){
|
||||||
argv[i] = 0;
|
argv[i] = 0;
|
||||||
|
|
29
trap.c
29
trap.c
|
@ -9,8 +9,8 @@
|
||||||
#include "spinlock.h"
|
#include "spinlock.h"
|
||||||
|
|
||||||
// Interrupt descriptor table (shared by all CPUs).
|
// Interrupt descriptor table (shared by all CPUs).
|
||||||
struct gatedesc idt[256];
|
struct intgate idt[256];
|
||||||
extern uint vectors[]; // in vectors.S: array of 256 entry pointers
|
extern uint64 vectors[]; // in vectors.S: array of 256 entry pointers
|
||||||
struct spinlock tickslock;
|
struct spinlock tickslock;
|
||||||
uint ticks;
|
uint ticks;
|
||||||
|
|
||||||
|
@ -19,17 +19,22 @@ tvinit(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for(i = 0; i < 256; i++)
|
for(i=0; i<256; i++) {
|
||||||
SETGATE(idt[i], 0, SEG_KCODE<<3, vectors[i], 0);
|
idt[i] = INTDESC(KCSEG, vectors[i], INT_P | SEG_INTR64);
|
||||||
SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER);
|
}
|
||||||
|
idtinit();
|
||||||
|
|
||||||
initlock(&tickslock, "time");
|
initlock(&tickslock, "time");
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
idtinit(void)
|
idtinit(void)
|
||||||
{
|
{
|
||||||
lidt(idt, sizeof(idt));
|
struct desctr dtr;
|
||||||
|
|
||||||
|
dtr.limit = sizeof(idt) - 1;
|
||||||
|
dtr.base = (uint64)idt;
|
||||||
|
lidt((void *)&dtr.limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
//PAGEBREAK: 41
|
//PAGEBREAK: 41
|
||||||
|
@ -74,7 +79,7 @@ trap(struct trapframe *tf)
|
||||||
case T_IRQ0 + 7:
|
case T_IRQ0 + 7:
|
||||||
case T_IRQ0 + IRQ_SPURIOUS:
|
case T_IRQ0 + IRQ_SPURIOUS:
|
||||||
cprintf("cpu%d: spurious interrupt at %x:%x\n",
|
cprintf("cpu%d: spurious interrupt at %x:%x\n",
|
||||||
cpuid(), tf->cs, tf->eip);
|
cpuid(), tf->cs, tf->rip);
|
||||||
lapiceoi();
|
lapiceoi();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -83,14 +88,14 @@ trap(struct trapframe *tf)
|
||||||
if(myproc() == 0 || (tf->cs&3) == 0){
|
if(myproc() == 0 || (tf->cs&3) == 0){
|
||||||
// In kernel, it must be our mistake.
|
// In kernel, it must be our mistake.
|
||||||
cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n",
|
cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n",
|
||||||
tf->trapno, cpuid(), tf->eip, rcr2());
|
tf->trapno, cpuid(), tf->rip, rcr2());
|
||||||
panic("trap");
|
panic("trap");
|
||||||
}
|
}
|
||||||
// In user space, assume process misbehaved.
|
// In user space, assume process misbehaved.
|
||||||
cprintf("pid %d %s: trap %d err %d on cpu %d "
|
cprintf("pid %d %s: trap %d err %d on cpu %d "
|
||||||
"eip 0x%x addr 0x%x--kill proc\n",
|
"eip 0x%x addr 0x%x--kill proc\n",
|
||||||
myproc()->pid, myproc()->name, tf->trapno,
|
myproc()->pid, myproc()->name, tf->trapno,
|
||||||
tf->err, cpuid(), tf->eip, rcr2());
|
tf->err, cpuid(), tf->rip, rcr2());
|
||||||
myproc()->killed = 1;
|
myproc()->killed = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,8 +110,10 @@ trap(struct trapframe *tf)
|
||||||
if(myproc() && myproc()->state == RUNNING &&
|
if(myproc() && myproc()->state == RUNNING &&
|
||||||
tf->trapno == T_IRQ0+IRQ_TIMER)
|
tf->trapno == T_IRQ0+IRQ_TIMER)
|
||||||
yield();
|
yield();
|
||||||
|
|
||||||
// Check if the process has been killed since we yielded
|
// Check if the process has been killed since we yielded
|
||||||
if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER)
|
if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER)
|
||||||
exit();
|
exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
150
trapasm.S
150
trapasm.S
|
@ -1,32 +1,136 @@
|
||||||
|
#include "param.h"
|
||||||
|
#include "x86.h"
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
|
|
||||||
# vectors.S sends all traps here.
|
# vectors.S sends all traps here.
|
||||||
.globl alltraps
|
.globl alltraps
|
||||||
alltraps:
|
alltraps:
|
||||||
# Build trap frame.
|
# Build trap frame.
|
||||||
pushl %ds
|
push %r15
|
||||||
pushl %es
|
push %r14
|
||||||
pushl %fs
|
push %r13
|
||||||
pushl %gs
|
push %r12
|
||||||
pushal
|
push %r11
|
||||||
|
push %r10
|
||||||
|
push %r9
|
||||||
|
push %r8
|
||||||
|
push %rdi
|
||||||
|
push %rsi
|
||||||
|
push %rbp
|
||||||
|
push %rdx
|
||||||
|
push %rcx
|
||||||
|
push %rbx
|
||||||
|
push %rax
|
||||||
|
|
||||||
|
cmpw $KCSEG, 32(%rsp) # compare to saved cs
|
||||||
|
jz 1f
|
||||||
|
swapgs
|
||||||
|
|
||||||
# Set up data segments.
|
1:mov %rsp, %rdi # frame in arg1
|
||||||
movw $(SEG_KDATA<<3), %ax
|
|
||||||
movw %ax, %ds
|
|
||||||
movw %ax, %es
|
|
||||||
|
|
||||||
# Call trap(tf), where tf=%esp
|
|
||||||
pushl %esp
|
|
||||||
call trap
|
call trap
|
||||||
addl $4, %esp
|
|
||||||
|
|
||||||
# Return falls through to trapret...
|
# Return falls through to trapret...
|
||||||
.globl trapret
|
.globl trapret
|
||||||
trapret:
|
trapret:
|
||||||
popal
|
cli
|
||||||
popl %gs
|
cmpw $KCSEG, 32(%rsp) # compare to saved cs
|
||||||
popl %fs
|
jz 1f
|
||||||
popl %es
|
swapgs
|
||||||
popl %ds
|
|
||||||
addl $0x8, %esp # trapno and errcode
|
1:pop %rax
|
||||||
iret
|
pop %rbx
|
||||||
|
pop %rcx
|
||||||
|
pop %rdx
|
||||||
|
pop %rbp
|
||||||
|
pop %rsi
|
||||||
|
pop %rdi
|
||||||
|
pop %r8
|
||||||
|
pop %r9
|
||||||
|
pop %r10
|
||||||
|
pop %r11
|
||||||
|
pop %r12
|
||||||
|
pop %r13
|
||||||
|
pop %r14
|
||||||
|
pop %r15
|
||||||
|
|
||||||
|
add $16, %rsp # discard trapnum and errorcode
|
||||||
|
iretq
|
||||||
|
#PAGEBREAK!
|
||||||
|
|
||||||
|
# syscall_entry jumps here after syscall instruction
|
||||||
|
.globl sysentry
|
||||||
|
sysentry: # Build trap frame.
|
||||||
|
// load kernel stack address
|
||||||
|
swapgs
|
||||||
|
movq %rax, %gs:0 // save %rax in syscallno of cpu entry
|
||||||
|
movq %rsp, %gs:8 // user sp
|
||||||
|
movq %gs:16, %rax // proc entry
|
||||||
|
|
||||||
|
movq %ss:0(%rax), %rax // load kstack from proc
|
||||||
|
addq $(KSTACKSIZE), %rax
|
||||||
|
|
||||||
|
movq %rax, %rsp
|
||||||
|
movq %gs:0, %rax // restore rax
|
||||||
|
|
||||||
|
// push usp
|
||||||
|
push $0
|
||||||
|
push %gs:8
|
||||||
|
// safe eflags and eip
|
||||||
|
push %r11
|
||||||
|
push $UCSEG
|
||||||
|
push %rcx
|
||||||
|
// push errno and trapno to make stack look like a trap
|
||||||
|
push $0
|
||||||
|
push $64
|
||||||
|
|
||||||
|
// push values on kernel stack
|
||||||
|
push %r15
|
||||||
|
push %r14
|
||||||
|
push %r13
|
||||||
|
push %r12
|
||||||
|
push %r11
|
||||||
|
push %r10
|
||||||
|
push %r9
|
||||||
|
push %r8
|
||||||
|
push %rdi
|
||||||
|
push %rsi
|
||||||
|
push %rbp
|
||||||
|
push %rdx
|
||||||
|
push %rcx
|
||||||
|
push %rbx
|
||||||
|
push %rax
|
||||||
|
|
||||||
|
mov %rsp, %rdi # frame in arg1
|
||||||
|
|
||||||
|
call trap
|
||||||
|
#PAGEBREAK!
|
||||||
|
|
||||||
|
# Return falls through to trapret...
|
||||||
|
.globl sysexit
|
||||||
|
sysexit:
|
||||||
|
# to make sure we don't get any interrupts on the user stack while in
|
||||||
|
# supervisor mode. insufficient? (see vunerability reports for sysret)
|
||||||
|
cli
|
||||||
|
|
||||||
|
pop %rax
|
||||||
|
pop %rbx
|
||||||
|
pop %rcx
|
||||||
|
pop %rdx
|
||||||
|
pop %rbp
|
||||||
|
pop %rsi
|
||||||
|
pop %rdi
|
||||||
|
pop %r8
|
||||||
|
pop %r9
|
||||||
|
pop %r10
|
||||||
|
pop %r11
|
||||||
|
pop %r12
|
||||||
|
pop %r13
|
||||||
|
pop %r14
|
||||||
|
pop %r15
|
||||||
|
|
||||||
|
add $(5*8), %rsp # discard trapnum, errorcode, rip, cs and rflags
|
||||||
|
mov (%rsp),%rsp # switch to the user stack
|
||||||
|
swapgs
|
||||||
|
|
||||||
|
sysretq
|
||||||
|
|
||||||
|
|
1
traps.h
1
traps.h
|
@ -36,3 +36,4 @@
|
||||||
#define IRQ_ERROR 19
|
#define IRQ_ERROR 19
|
||||||
#define IRQ_SPURIOUS 31
|
#define IRQ_SPURIOUS 31
|
||||||
|
|
||||||
|
|
||||||
|
|
8
types.h
8
types.h
|
@ -1,4 +1,10 @@
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
typedef unsigned short ushort;
|
typedef unsigned short ushort;
|
||||||
typedef unsigned char uchar;
|
typedef unsigned char uchar;
|
||||||
typedef uint pde_t;
|
|
||||||
|
typedef unsigned char uint8;
|
||||||
|
typedef unsigned short uint16;
|
||||||
|
typedef unsigned int uint32;
|
||||||
|
typedef unsigned long uint64;
|
||||||
|
|
||||||
|
typedef uint64 pde_t;
|
||||||
|
|
38
usertests.c
38
usertests.c
|
@ -363,17 +363,29 @@ preempt(void)
|
||||||
|
|
||||||
printf(1, "preempt: ");
|
printf(1, "preempt: ");
|
||||||
pid1 = fork();
|
pid1 = fork();
|
||||||
|
if(pid1 < 0) {
|
||||||
|
printf(1, "fork failed");
|
||||||
|
exit();
|
||||||
|
}
|
||||||
if(pid1 == 0)
|
if(pid1 == 0)
|
||||||
for(;;)
|
for(;;)
|
||||||
;
|
;
|
||||||
|
|
||||||
pid2 = fork();
|
pid2 = fork();
|
||||||
|
if(pid2 < 0) {
|
||||||
|
printf(1, "fork failed\n");
|
||||||
|
exit();
|
||||||
|
}
|
||||||
if(pid2 == 0)
|
if(pid2 == 0)
|
||||||
for(;;)
|
for(;;)
|
||||||
;
|
;
|
||||||
|
|
||||||
pipe(pfds);
|
pipe(pfds);
|
||||||
pid3 = fork();
|
pid3 = fork();
|
||||||
|
if(pid3 < 0) {
|
||||||
|
printf(1, "fork failed\n");
|
||||||
|
exit();
|
||||||
|
}
|
||||||
if(pid3 == 0){
|
if(pid3 == 0){
|
||||||
close(pfds[0]);
|
close(pfds[0]);
|
||||||
if(write(pfds[1], "x", 1) != 1)
|
if(write(pfds[1], "x", 1) != 1)
|
||||||
|
@ -1391,6 +1403,11 @@ forktest(void)
|
||||||
exit();
|
exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n == 0) {
|
||||||
|
printf(1, "no fork at all!\n");
|
||||||
|
exit();
|
||||||
|
}
|
||||||
|
|
||||||
if(n == 1000){
|
if(n == 1000){
|
||||||
printf(1, "fork claimed to work 1000 times!\n");
|
printf(1, "fork claimed to work 1000 times!\n");
|
||||||
exit();
|
exit();
|
||||||
|
@ -1414,16 +1431,16 @@ forktest(void)
|
||||||
void
|
void
|
||||||
sbrktest(void)
|
sbrktest(void)
|
||||||
{
|
{
|
||||||
int fds[2], pid, pids[10], ppid;
|
int i, fds[2], pids[10], pid, ppid;
|
||||||
char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch;
|
char *c, *oldbrk, scratch, *a, *b, *lastaddr, *p;
|
||||||
uint amt;
|
uint64 amt;
|
||||||
|
#define BIG (100*1024*1024)
|
||||||
|
|
||||||
printf(stdout, "sbrk test\n");
|
printf(stdout, "sbrk test\n");
|
||||||
oldbrk = sbrk(0);
|
oldbrk = sbrk(0);
|
||||||
|
|
||||||
// can one sbrk() less than a page?
|
// can one sbrk() less than a page?
|
||||||
a = sbrk(0);
|
a = sbrk(0);
|
||||||
int i;
|
|
||||||
for(i = 0; i < 5000; i++){
|
for(i = 0; i < 5000; i++){
|
||||||
b = sbrk(1);
|
b = sbrk(1);
|
||||||
if(b != a){
|
if(b != a){
|
||||||
|
@ -1449,9 +1466,8 @@ sbrktest(void)
|
||||||
wait();
|
wait();
|
||||||
|
|
||||||
// can one grow address space to something big?
|
// can one grow address space to something big?
|
||||||
#define BIG (100*1024*1024)
|
|
||||||
a = sbrk(0);
|
a = sbrk(0);
|
||||||
amt = (BIG) - (uint)a;
|
amt = (BIG) - (uint64)a;
|
||||||
p = sbrk(amt);
|
p = sbrk(amt);
|
||||||
if (p != a) {
|
if (p != a) {
|
||||||
printf(stdout, "sbrk test failed to grow big address space; enough phys mem?\n");
|
printf(stdout, "sbrk test failed to grow big address space; enough phys mem?\n");
|
||||||
|
@ -1508,7 +1524,7 @@ sbrktest(void)
|
||||||
}
|
}
|
||||||
wait();
|
wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we run the system out of memory, does it clean up the last
|
// if we run the system out of memory, does it clean up the last
|
||||||
// failed allocation?
|
// failed allocation?
|
||||||
if(pipe(fds) != 0){
|
if(pipe(fds) != 0){
|
||||||
|
@ -1518,7 +1534,7 @@ sbrktest(void)
|
||||||
for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){
|
for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){
|
||||||
if((pids[i] = fork()) == 0){
|
if((pids[i] = fork()) == 0){
|
||||||
// allocate a lot of memory
|
// allocate a lot of memory
|
||||||
sbrk(BIG - (uint)sbrk(0));
|
sbrk(BIG - (uint64)sbrk(0));
|
||||||
write(fds[1], "x", 1);
|
write(fds[1], "x", 1);
|
||||||
// sit around until killed
|
// sit around until killed
|
||||||
for(;;) sleep(1000);
|
for(;;) sleep(1000);
|
||||||
|
@ -1526,6 +1542,7 @@ sbrktest(void)
|
||||||
if(pids[i] != -1)
|
if(pids[i] != -1)
|
||||||
read(fds[0], &scratch, 1);
|
read(fds[0], &scratch, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if those failed allocations freed up the pages they did allocate,
|
// if those failed allocations freed up the pages they did allocate,
|
||||||
// we'll be able to allocate here
|
// we'll be able to allocate here
|
||||||
c = sbrk(4096);
|
c = sbrk(4096);
|
||||||
|
@ -1549,7 +1566,7 @@ sbrktest(void)
|
||||||
void
|
void
|
||||||
validateint(int *p)
|
validateint(int *p)
|
||||||
{
|
{
|
||||||
int res;
|
/* XXX int res;
|
||||||
asm("mov %%esp, %%ebx\n\t"
|
asm("mov %%esp, %%ebx\n\t"
|
||||||
"mov %3, %%esp\n\t"
|
"mov %3, %%esp\n\t"
|
||||||
"int %2\n\t"
|
"int %2\n\t"
|
||||||
|
@ -1557,13 +1574,14 @@ validateint(int *p)
|
||||||
"=a" (res) :
|
"=a" (res) :
|
||||||
"a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) :
|
"a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) :
|
||||||
"ebx");
|
"ebx");
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
validatetest(void)
|
validatetest(void)
|
||||||
{
|
{
|
||||||
int hi, pid;
|
int hi, pid;
|
||||||
uint p;
|
uint64 p;
|
||||||
|
|
||||||
printf(stdout, "validate test\n");
|
printf(stdout, "validate test\n");
|
||||||
hi = 1100*1024;
|
hi = 1100*1024;
|
||||||
|
|
2
usys.S
2
usys.S
|
@ -5,7 +5,7 @@
|
||||||
.globl name; \
|
.globl name; \
|
||||||
name: \
|
name: \
|
||||||
movl $SYS_ ## name, %eax; \
|
movl $SYS_ ## name, %eax; \
|
||||||
int $T_SYSCALL; \
|
syscall; \
|
||||||
ret
|
ret
|
||||||
|
|
||||||
SYSCALL(fork)
|
SYSCALL(fork)
|
||||||
|
|
16
vectors.pl
16
vectors.pl
|
@ -12,9 +12,9 @@ for(my $i = 0; $i < 256; $i++){
|
||||||
print ".globl vector$i\n";
|
print ".globl vector$i\n";
|
||||||
print "vector$i:\n";
|
print "vector$i:\n";
|
||||||
if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){
|
if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){
|
||||||
print " pushl \$0\n";
|
print " push \$0\n";
|
||||||
}
|
}
|
||||||
print " pushl \$$i\n";
|
print " push \$$i\n";
|
||||||
print " jmp alltraps\n";
|
print " jmp alltraps\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ print ".data\n";
|
||||||
print ".globl vectors\n";
|
print ".globl vectors\n";
|
||||||
print "vectors:\n";
|
print "vectors:\n";
|
||||||
for(my $i = 0; $i < 256; $i++){
|
for(my $i = 0; $i < 256; $i++){
|
||||||
print " .long vector$i\n";
|
print " .quad vector$i\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
# sample output:
|
# sample output:
|
||||||
|
@ -31,8 +31,8 @@ for(my $i = 0; $i < 256; $i++){
|
||||||
# .globl alltraps
|
# .globl alltraps
|
||||||
# .globl vector0
|
# .globl vector0
|
||||||
# vector0:
|
# vector0:
|
||||||
# pushl $0
|
# push $0
|
||||||
# pushl $0
|
# push $0
|
||||||
# jmp alltraps
|
# jmp alltraps
|
||||||
# ...
|
# ...
|
||||||
#
|
#
|
||||||
|
@ -40,8 +40,8 @@ for(my $i = 0; $i < 256; $i++){
|
||||||
# .data
|
# .data
|
||||||
# .globl vectors
|
# .globl vectors
|
||||||
# vectors:
|
# vectors:
|
||||||
# .long vector0
|
# .quad vector0
|
||||||
# .long vector1
|
# .quad vector1
|
||||||
# .long vector2
|
# .quad vector2
|
||||||
# ...
|
# ...
|
||||||
|
|
||||||
|
|
211
vm.c
211
vm.c
|
@ -2,13 +2,34 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "defs.h"
|
#include "defs.h"
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
|
#include "msr.h"
|
||||||
#include "memlayout.h"
|
#include "memlayout.h"
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
#include "proc.h"
|
#include "proc.h"
|
||||||
#include "elf.h"
|
#include "elf.h"
|
||||||
|
#include "traps.h"
|
||||||
|
|
||||||
extern char data[]; // defined by kernel.ld
|
extern char data[]; // defined by kernel.ld
|
||||||
pde_t *kpgdir; // for use in scheduler()
|
void sysentry(void);
|
||||||
|
|
||||||
|
static pde_t *kpml4; // kernel address space, used by scheduler and bootup
|
||||||
|
|
||||||
|
// Bootstrap GDT. Used by boot.S but defined in C
|
||||||
|
// Map "logical" addresses to virtual addresses using identity map.
|
||||||
|
// Cannot share a CODE descriptor for both kernel and user
|
||||||
|
// because it would have to have DPL_USR, but the CPU forbids
|
||||||
|
// an interrupt from CPL=0 to DPL=3.
|
||||||
|
struct segdesc bootgdt[NSEGS] = {
|
||||||
|
[0] = SEGDESC(0, 0, 0), // null
|
||||||
|
[1] = SEGDESC(0, 0xfffff, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // 32-bit kernel code
|
||||||
|
[2] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_L|SEG_G), // 64-bit kernel code
|
||||||
|
[3] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // kernel data
|
||||||
|
// The order of the user data and user code segments is
|
||||||
|
// important for syscall instructions. See initseg.
|
||||||
|
[6] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(3)|SEG_P|SEG_D|SEG_G), // 64-bit user data
|
||||||
|
[7] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(3)|SEG_P|SEG_L|SEG_G), // 64-bit user code
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Set up CPU's kernel segment descriptors.
|
// Set up CPU's kernel segment descriptors.
|
||||||
// Run once on entry on each CPU.
|
// Run once on entry on each CPU.
|
||||||
|
@ -16,41 +37,82 @@ void
|
||||||
seginit(void)
|
seginit(void)
|
||||||
{
|
{
|
||||||
struct cpu *c;
|
struct cpu *c;
|
||||||
|
struct desctr dtr;
|
||||||
|
|
||||||
// Map "logical" addresses to virtual addresses using identity map.
|
c = mycpu();
|
||||||
// Cannot share a CODE descriptor for both kernel and user
|
memmove(c->gdt, bootgdt, sizeof bootgdt);
|
||||||
// because it would have to have DPL_USR, but the CPU forbids
|
dtr.limit = sizeof(c->gdt)-1;
|
||||||
// an interrupt from CPL=0 to DPL=3.
|
dtr.base = (uint64) c->gdt;
|
||||||
c = &cpus[cpuid()];
|
lgdt((void *)&dtr.limit);
|
||||||
c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);
|
|
||||||
c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
|
// When executing a syscall instruction the CPU sets the SS selector
|
||||||
c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER);
|
// to (star >> 32) + 8 and the CS selector to (star >> 32).
|
||||||
c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER);
|
// When executing a sysret instruction the CPU sets the SS selector
|
||||||
lgdt(c->gdt, sizeof(c->gdt));
|
// to (star >> 48) + 8 and the CS selector to (star >> 48) + 16.
|
||||||
|
uint64 star = ((((uint64)UCSEG|0x3)- 16)<<48)|((uint64)(KCSEG)<<32);
|
||||||
|
writemsr(MSR_STAR, star);
|
||||||
|
writemsr(MSR_LSTAR, (uint64)&sysentry);
|
||||||
|
writemsr(MSR_SFMASK, FL_TF | FL_IF);
|
||||||
|
|
||||||
|
// Initialize cpu-local storage.
|
||||||
|
writegs(KDSEG);
|
||||||
|
writemsr(MSR_GS_BASE, (uint64)c);
|
||||||
|
writemsr(MSR_GS_KERNBASE, (uint64)c);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the address of the PTE in page table pgdir
|
// Return the address of the PTE in page table pgdir
|
||||||
// that corresponds to virtual address va. If alloc!=0,
|
// that corresponds to virtual address va. If alloc!=0,
|
||||||
// create any required page table pages.
|
// create any required page table pages.
|
||||||
static pte_t *
|
static pte_t *
|
||||||
walkpgdir(pde_t *pgdir, const void *va, int alloc)
|
walkpgdir(pde_t *pml4, const void *va, int alloc)
|
||||||
{
|
{
|
||||||
|
pml4e_t *pml4e;
|
||||||
|
pdpe_t *pdp;
|
||||||
|
pdpe_t *pdpe;
|
||||||
pde_t *pde;
|
pde_t *pde;
|
||||||
|
pde_t *pd;
|
||||||
pte_t *pgtab;
|
pte_t *pgtab;
|
||||||
|
|
||||||
pde = &pgdir[PDX(va)];
|
// level 4
|
||||||
if(*pde & PTE_P){
|
pml4e = &pml4[PMX(va)];
|
||||||
pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
|
if(*pml4e & PTE_P)
|
||||||
} else {
|
pdp = (pdpe_t*)P2V(PTE_ADDR(*pml4e));
|
||||||
if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
|
else {
|
||||||
|
if(!alloc || (pdp = (pdpe_t*)kalloc()) == 0)
|
||||||
return 0;
|
return 0;
|
||||||
// Make sure all those PTE_P bits are zero.
|
// Make sure all those PTE_P bits are zero.
|
||||||
memset(pgtab, 0, PGSIZE);
|
memset(pdp, 0, PGSIZE);
|
||||||
// The permissions here are overly generous, but they can
|
// The permissions here are overly generous, but they can
|
||||||
// be further restricted by the permissions in the page table
|
// be further restricted by the permissions in the page table
|
||||||
// entries, if necessary.
|
// entries, if necessary.
|
||||||
|
*pml4e = V2P(pdp) | PTE_P | PTE_W | PTE_U;
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX avoid repetition
|
||||||
|
|
||||||
|
// level 3
|
||||||
|
pdpe = &pdp[PDPX(va)];
|
||||||
|
if(*pdpe & PTE_P)
|
||||||
|
pd = (pde_t*)P2V(PTE_ADDR(*pdpe));
|
||||||
|
else {
|
||||||
|
if(!alloc || (pd = (pde_t*)kalloc()) == 0)
|
||||||
|
return 0;
|
||||||
|
memset(pd, 0, PGSIZE);
|
||||||
|
*pdpe = V2P(pd) | PTE_P | PTE_W | PTE_U;
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 2
|
||||||
|
pde = &pd[PDX(va)];
|
||||||
|
if(*pde & PTE_P)
|
||||||
|
pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
|
||||||
|
else {
|
||||||
|
if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
|
||||||
|
return 0;
|
||||||
|
memset(pgtab, 0, PGSIZE);
|
||||||
*pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
|
*pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// level 1
|
||||||
return &pgtab[PTX(va)];
|
return &pgtab[PTX(va)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,13 +120,13 @@ walkpgdir(pde_t *pgdir, const void *va, int alloc)
|
||||||
// physical addresses starting at pa. va and size might not
|
// physical addresses starting at pa. va and size might not
|
||||||
// be page-aligned.
|
// be page-aligned.
|
||||||
static int
|
static int
|
||||||
mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
|
mappages(pde_t *pgdir, void *va, uint64 size, uint64 pa, int perm)
|
||||||
{
|
{
|
||||||
char *a, *last;
|
char *a, *last;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
|
||||||
a = (char*)PGROUNDDOWN((uint)va);
|
a = (char*)PGROUNDDOWN((uint64)va);
|
||||||
last = (char*)PGROUNDDOWN(((uint)va) + size - 1);
|
last = (char*)PGROUNDDOWN(((uint64)va) + size - 1);
|
||||||
for(;;){
|
for(;;){
|
||||||
if((pte = walkpgdir(pgdir, a, 1)) == 0)
|
if((pte = walkpgdir(pgdir, a, 1)) == 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -80,7 +142,7 @@ mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
|
||||||
}
|
}
|
||||||
|
|
||||||
// There is one page table per process, plus one that's used when
|
// There is one page table per process, plus one that's used when
|
||||||
// a CPU is not running any process (kpgdir). The kernel uses the
|
// a CPU is not running any process (kpml4). The kernel uses the
|
||||||
// current process's page table during system calls and interrupts;
|
// current process's page table during system calls and interrupts;
|
||||||
// page protection bits prevent user code from using the kernel's
|
// page protection bits prevent user code from using the kernel's
|
||||||
// mappings.
|
// mappings.
|
||||||
|
@ -104,35 +166,36 @@ mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
|
||||||
// every process's page table.
|
// every process's page table.
|
||||||
static struct kmap {
|
static struct kmap {
|
||||||
void *virt;
|
void *virt;
|
||||||
uint phys_start;
|
uint64 phys_start;
|
||||||
uint phys_end;
|
uint64 phys_end;
|
||||||
int perm;
|
int perm;
|
||||||
} kmap[] = {
|
} kmap[] = {
|
||||||
{ (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
|
{ (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
|
||||||
{ (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
|
{ (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
|
||||||
{ (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
|
{ (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
|
||||||
{ (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices
|
{ (void*)P2V(DEVSPACE), DEVSPACE, DEVSPACETOP, PTE_W}, // more devices
|
||||||
};
|
};
|
||||||
|
|
||||||
// Set up kernel part of a page table.
|
// Set up kernel part of a page table.
|
||||||
pde_t*
|
pde_t*
|
||||||
setupkvm(void)
|
setupkvm(void)
|
||||||
{
|
{
|
||||||
pde_t *pgdir;
|
pde_t *pml4;
|
||||||
struct kmap *k;
|
struct kmap *k;
|
||||||
|
|
||||||
if((pgdir = (pde_t*)kalloc()) == 0)
|
if((pml4 = (pde_t*)kalloc()) == 0)
|
||||||
return 0;
|
return 0;
|
||||||
memset(pgdir, 0, PGSIZE);
|
memset(pml4, 0, PGSIZE);
|
||||||
if (P2V(PHYSTOP) > (void*)DEVSPACE)
|
if (PHYSTOP > DEVSPACE)
|
||||||
panic("PHYSTOP too high");
|
panic("PHYSTOP too high");
|
||||||
for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
|
for(k = kmap; k < &kmap[NELEM(kmap)]; k++) {
|
||||||
if(mappages(pgdir, k->virt, k->phys_end - k->phys_start,
|
if(mappages(pml4, k->virt, k->phys_end - k->phys_start,
|
||||||
(uint)k->phys_start, k->perm) < 0) {
|
(uint)k->phys_start, k->perm) < 0) {
|
||||||
freevm(pgdir);
|
freevm(pml4, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return pgdir;
|
}
|
||||||
|
return pml4;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate one page table for the machine for the kernel address
|
// Allocate one page table for the machine for the kernel address
|
||||||
|
@ -140,7 +203,7 @@ setupkvm(void)
|
||||||
void
|
void
|
||||||
kvmalloc(void)
|
kvmalloc(void)
|
||||||
{
|
{
|
||||||
kpgdir = setupkvm();
|
kpml4 = setupkvm();
|
||||||
switchkvm();
|
switchkvm();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,13 +212,17 @@ kvmalloc(void)
|
||||||
void
|
void
|
||||||
switchkvm(void)
|
switchkvm(void)
|
||||||
{
|
{
|
||||||
lcr3(V2P(kpgdir)); // switch to the kernel page table
|
lcr3(V2P(kpml4)); // switch to the kernel page table
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Switch TSS and h/w page table to correspond to process p.
|
// Switch TSS and h/w page table to correspond to process p.
|
||||||
void
|
void
|
||||||
switchuvm(struct proc *p)
|
switchuvm(struct proc *p)
|
||||||
{
|
{
|
||||||
|
struct desctr dtr;
|
||||||
|
struct cpu *c;
|
||||||
|
|
||||||
if(p == 0)
|
if(p == 0)
|
||||||
panic("switchuvm: no process");
|
panic("switchuvm: no process");
|
||||||
if(p->kstack == 0)
|
if(p->kstack == 0)
|
||||||
|
@ -164,16 +231,22 @@ switchuvm(struct proc *p)
|
||||||
panic("switchuvm: no pgdir");
|
panic("switchuvm: no pgdir");
|
||||||
|
|
||||||
pushcli();
|
pushcli();
|
||||||
mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts,
|
|
||||||
sizeof(mycpu()->ts)-1, 0);
|
c = mycpu();
|
||||||
mycpu()->gdt[SEG_TSS].s = 0;
|
uint64 base = (uint64) &(c->ts);
|
||||||
mycpu()->ts.ss0 = SEG_KDATA << 3;
|
c->gdt[TSSSEG>>3] = SEGDESC(base, (sizeof(c->ts)-1), SEG_P|SEG_TSS64A);
|
||||||
mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE;
|
c->gdt[(TSSSEG>>3)+1] = SEGDESCHI(base);
|
||||||
// setting IOPL=0 in eflags *and* iomb beyond the tss segment limit
|
c->ts.rsp[0] = (uint64) p->kstack + KSTACKSIZE;
|
||||||
// forbids I/O instructions (e.g., inb and outb) from user space
|
c->ts.iomba = (ushort) 0xFFFF;
|
||||||
mycpu()->ts.iomb = (ushort) 0xFFFF;
|
|
||||||
ltr(SEG_TSS << 3);
|
dtr.limit = sizeof(c->gdt) - 1;
|
||||||
|
dtr.base = (uint64)c->gdt;
|
||||||
|
lgdt((void *)&dtr.limit);
|
||||||
|
|
||||||
|
ltr(TSSSEG);
|
||||||
|
|
||||||
lcr3(V2P(p->pgdir)); // switch to process's address space
|
lcr3(V2P(p->pgdir)); // switch to process's address space
|
||||||
|
|
||||||
popcli();
|
popcli();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,10 +270,11 @@ inituvm(pde_t *pgdir, char *init, uint sz)
|
||||||
int
|
int
|
||||||
loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
|
loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
|
||||||
{
|
{
|
||||||
uint i, pa, n;
|
uint i, n;
|
||||||
|
uint64 pa;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
|
||||||
if((uint) addr % PGSIZE != 0)
|
if((uint64) addr % PGSIZE != 0)
|
||||||
panic("loaduvm: addr must be page aligned");
|
panic("loaduvm: addr must be page aligned");
|
||||||
for(i = 0; i < sz; i += PGSIZE){
|
for(i = 0; i < sz; i += PGSIZE){
|
||||||
if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
|
if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
|
||||||
|
@ -222,7 +296,7 @@ int
|
||||||
allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
|
allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
|
||||||
{
|
{
|
||||||
char *mem;
|
char *mem;
|
||||||
uint a;
|
uint64 a;
|
||||||
|
|
||||||
if(newsz >= KERNBASE)
|
if(newsz >= KERNBASE)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -233,13 +307,11 @@ allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
|
||||||
for(; a < newsz; a += PGSIZE){
|
for(; a < newsz; a += PGSIZE){
|
||||||
mem = kalloc();
|
mem = kalloc();
|
||||||
if(mem == 0){
|
if(mem == 0){
|
||||||
cprintf("allocuvm out of memory\n");
|
|
||||||
deallocuvm(pgdir, newsz, oldsz);
|
deallocuvm(pgdir, newsz, oldsz);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
memset(mem, 0, PGSIZE);
|
memset(mem, 0, PGSIZE);
|
||||||
if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){
|
if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){
|
||||||
cprintf("allocuvm out of memory (2)\n");
|
|
||||||
deallocuvm(pgdir, newsz, oldsz);
|
deallocuvm(pgdir, newsz, oldsz);
|
||||||
kfree(mem);
|
kfree(mem);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -253,10 +325,10 @@ allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
|
||||||
// need to be less than oldsz. oldsz can be larger than the actual
|
// need to be less than oldsz. oldsz can be larger than the actual
|
||||||
// process size. Returns the new process size.
|
// process size. Returns the new process size.
|
||||||
int
|
int
|
||||||
deallocuvm(pde_t *pgdir, uint oldsz, uint newsz)
|
deallocuvm(pde_t *pgdir, uint64 oldsz, uint64 newsz)
|
||||||
{
|
{
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
uint a, pa;
|
uint64 a, pa;
|
||||||
|
|
||||||
if(newsz >= oldsz)
|
if(newsz >= oldsz)
|
||||||
return oldsz;
|
return oldsz;
|
||||||
|
@ -281,20 +353,34 @@ deallocuvm(pde_t *pgdir, uint oldsz, uint newsz)
|
||||||
// Free a page table and all the physical memory pages
|
// Free a page table and all the physical memory pages
|
||||||
// in the user part.
|
// in the user part.
|
||||||
void
|
void
|
||||||
freevm(pde_t *pgdir)
|
freevm(pde_t *pml4, uint64 sz)
|
||||||
{
|
{
|
||||||
uint i;
|
uint i, j, k;
|
||||||
|
pde_t *pdp, *pd, *pt;
|
||||||
|
|
||||||
if(pgdir == 0)
|
if(pml4 == 0)
|
||||||
panic("freevm: no pgdir");
|
panic("freevm: no pgdir");
|
||||||
deallocuvm(pgdir, KERNBASE, 0);
|
|
||||||
|
deallocuvm(pml4, sz, 0);
|
||||||
for(i = 0; i < NPDENTRIES; i++){
|
for(i = 0; i < NPDENTRIES; i++){
|
||||||
if(pgdir[i] & PTE_P){
|
if(pml4[i] & PTE_P){
|
||||||
char * v = P2V(PTE_ADDR(pgdir[i]));
|
pdp = (pdpe_t*)P2V(PTE_ADDR(pml4[i]));
|
||||||
kfree(v);
|
for(j = 0; j < NPDENTRIES; j++){
|
||||||
|
if(pdp[j] & PTE_P){
|
||||||
|
pd = (pde_t*)P2V(PTE_ADDR(pdp[j]));
|
||||||
|
for(k = 0; k < NPDENTRIES; k++){
|
||||||
|
if(pd[k] & PTE_P) {
|
||||||
|
pt = (pde_t*)P2V(PTE_ADDR(pd[k]));
|
||||||
|
kfree((char*)pt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
kfree((char*)pd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
kfree((char*)pdp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
kfree((char*)pgdir);
|
kfree((char*)pml4);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear PTE_U on a page. Used to create an inaccessible
|
// Clear PTE_U on a page. Used to create an inaccessible
|
||||||
|
@ -317,7 +403,8 @@ copyuvm(pde_t *pgdir, uint sz)
|
||||||
{
|
{
|
||||||
pde_t *d;
|
pde_t *d;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
uint pa, i, flags;
|
uint64 pa, i;
|
||||||
|
uint flags;
|
||||||
char *mem;
|
char *mem;
|
||||||
|
|
||||||
if((d = setupkvm()) == 0)
|
if((d = setupkvm()) == 0)
|
||||||
|
@ -340,7 +427,7 @@ copyuvm(pde_t *pgdir, uint sz)
|
||||||
return d;
|
return d;
|
||||||
|
|
||||||
bad:
|
bad:
|
||||||
freevm(d);
|
freevm(d, sz);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,7 +453,7 @@ int
|
||||||
copyout(pde_t *pgdir, uint va, void *p, uint len)
|
copyout(pde_t *pgdir, uint va, void *p, uint len)
|
||||||
{
|
{
|
||||||
char *buf, *pa0;
|
char *buf, *pa0;
|
||||||
uint n, va0;
|
uint64 n, va0;
|
||||||
|
|
||||||
buf = (char*)p;
|
buf = (char*)p;
|
||||||
while(len > 0){
|
while(len > 0){
|
||||||
|
|
104
x86.h
104
x86.h
|
@ -1,5 +1,7 @@
|
||||||
// Routines to let C code use special x86 instructions.
|
// Routines to let C code use special x86 instructions.
|
||||||
|
|
||||||
|
#ifndef __ASSEMBLER__
|
||||||
|
|
||||||
static inline uchar
|
static inline uchar
|
||||||
inb(ushort port)
|
inb(ushort port)
|
||||||
{
|
{
|
||||||
|
@ -57,32 +59,16 @@ stosl(void *addr, int data, int cnt)
|
||||||
"memory", "cc");
|
"memory", "cc");
|
||||||
}
|
}
|
||||||
|
|
||||||
struct segdesc;
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
lgdt(struct segdesc *p, int size)
|
lgdt(void *p)
|
||||||
{
|
{
|
||||||
volatile ushort pd[3];
|
asm volatile("lgdt (%0)" : : "r" (p) : "memory");
|
||||||
|
|
||||||
pd[0] = size-1;
|
|
||||||
pd[1] = (uint)p;
|
|
||||||
pd[2] = (uint)p >> 16;
|
|
||||||
|
|
||||||
asm volatile("lgdt (%0)" : : "r" (pd));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gatedesc;
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
lidt(struct gatedesc *p, int size)
|
lidt(void *p)
|
||||||
{
|
{
|
||||||
volatile ushort pd[3];
|
asm volatile("lidt (%0)" : : "r" (p) : "memory");
|
||||||
|
|
||||||
pd[0] = size-1;
|
|
||||||
pd[1] = (uint)p;
|
|
||||||
pd[2] = (uint)p >> 16;
|
|
||||||
|
|
||||||
asm volatile("lidt (%0)" : : "r" (pd));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
@ -91,11 +77,11 @@ ltr(ushort sel)
|
||||||
asm volatile("ltr %0" : : "r" (sel));
|
asm volatile("ltr %0" : : "r" (sel));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint
|
static inline uint64
|
||||||
readeflags(void)
|
readeflags(void)
|
||||||
{
|
{
|
||||||
uint eflags;
|
uint64 eflags;
|
||||||
asm volatile("pushfl; popl %0" : "=r" (eflags));
|
asm volatile("pushf; pop %0" : "=r" (eflags));
|
||||||
return eflags;
|
return eflags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,51 +119,53 @@ xchg(volatile uint *addr, uint newval)
|
||||||
static inline uint
|
static inline uint
|
||||||
rcr2(void)
|
rcr2(void)
|
||||||
{
|
{
|
||||||
uint val;
|
uint64 val;
|
||||||
asm volatile("movl %%cr2,%0" : "=r" (val));
|
asm volatile("mov %%cr2,%0" : "=r" (val));
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
lcr3(uint val)
|
lcr3(uint64 val)
|
||||||
{
|
{
|
||||||
asm volatile("movl %0,%%cr3" : : "r" (val));
|
asm volatile("mov %0,%%cr3" : : "r" (val));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
writegs(uint16 v)
|
||||||
|
{
|
||||||
|
__asm volatile("movw %0, %%gs" : : "r" (v));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//PAGEBREAK: 36
|
//PAGEBREAK: 36
|
||||||
// Layout of the trap frame built on the stack by the
|
// Layout of the trap frame built on the stack by the
|
||||||
// hardware and by trapasm.S, and passed to trap().
|
// hardware and by trapasm.S, and passed to trap().
|
||||||
struct trapframe {
|
struct trapframe {
|
||||||
// registers as pushed by pusha
|
uint64 rax;
|
||||||
uint edi;
|
uint64 rbx;
|
||||||
uint esi;
|
uint64 rcx;
|
||||||
uint ebp;
|
uint64 rdx;
|
||||||
uint oesp; // useless & ignored
|
uint64 rbp;
|
||||||
uint ebx;
|
uint64 rsi;
|
||||||
uint edx;
|
uint64 rdi;
|
||||||
uint ecx;
|
uint64 r8;
|
||||||
uint eax;
|
uint64 r9;
|
||||||
|
uint64 r10;
|
||||||
|
uint64 r11;
|
||||||
|
uint64 r12;
|
||||||
|
uint64 r13;
|
||||||
|
uint64 r14;
|
||||||
|
uint64 r15;
|
||||||
|
uint64 trapno;
|
||||||
|
uint64 err;
|
||||||
|
uint64 rip;
|
||||||
|
uint16 cs;
|
||||||
|
uint16 padding[3];
|
||||||
|
uint64 rflags;
|
||||||
|
uint64 rsp;
|
||||||
|
uint64 ss;
|
||||||
|
}__attribute__((packed));
|
||||||
|
|
||||||
// rest of trap frame
|
#endif
|
||||||
ushort gs;
|
|
||||||
ushort padding1;
|
|
||||||
ushort fs;
|
|
||||||
ushort padding2;
|
|
||||||
ushort es;
|
|
||||||
ushort padding3;
|
|
||||||
ushort ds;
|
|
||||||
ushort padding4;
|
|
||||||
uint trapno;
|
|
||||||
|
|
||||||
// below here defined by x86 hardware
|
#define TF_CS 144 // offset in trapframe for saved cs
|
||||||
uint err;
|
|
||||||
uint eip;
|
|
||||||
ushort cs;
|
|
||||||
ushort padding5;
|
|
||||||
uint eflags;
|
|
||||||
|
|
||||||
// below here only when crossing rings, such as from user to kernel
|
|
||||||
uint esp;
|
|
||||||
ushort ss;
|
|
||||||
ushort padding6;
|
|
||||||
};
|
|
||||||
|
|
Loading…
Reference in a new issue