diff --git a/Makefile b/Makefile index a0c901d..ef9b66d 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ AS = $(TOOLPREFIX)gas LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) ASFLAGS = -m32 -gdwarf-2 # FreeBSD ld wants ``elf_i386_fbsd'' diff --git a/asm.h b/asm.h index 0c052db..68210d7 100644 --- a/asm.h +++ b/asm.h @@ -6,6 +6,8 @@ .word 0, 0; \ .byte 0, 0, 0, 0 +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode. #define SEG_ASM(type,base,lim) \ .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ diff --git a/bootasm.S b/bootasm.S index f6af255..56175ce 100644 --- a/bootasm.S +++ b/bootasm.S @@ -51,8 +51,10 @@ seta20.2: orl $CR0_PE, %eax movl %eax, %cr0 - # Jump to next instruction, but in 32-bit code segment. - # Switches processor into 32-bit mode. + # This ljmp is how you load the CS (Code Segment) register. + # SEG_ASM produces segment descriptors with the 32-bit mode + # flag set (the D flag), so addresses and word operands will + # default to 32 bits after this jump. ljmp $(SEG_KCODE<<3), $start32 .code32 # Assemble for 32-bit mode diff --git a/bootother.S b/bootother.S index 11d32f1..899669a 100644 --- a/bootother.S +++ b/bootother.S @@ -45,8 +45,10 @@ start: orl $CR0_PE, %eax movl %eax, %cr0 - # Jump to next instruction, but in 32-bit code segment. - # Switches processor into 32-bit mode. + # This ljmp is how you load the CS (Code Segment) register. + # SEG_ASM produces segment descriptors with the 32-bit mode + # flag set (the D flag), so addresses and word operands will + # default to 32 bits after this jump. ljmp $(SEG_KCODE<<3), $start32 .code32 # Assemble for 32-bit mode diff --git a/defs.h b/defs.h index 86268b2..0197e70 100644 --- a/defs.h +++ b/defs.h @@ -110,7 +110,6 @@ void yield(void); // swtch.S void swtch(struct context**, struct context*); -void jstack(uint); // spinlock.c void acquire(struct spinlock*); @@ -143,7 +142,7 @@ void timerinit(void); // trap.c void idtinit(void); -extern int ticks; +extern uint ticks; void tvinit(void); extern struct spinlock tickslock; @@ -153,23 +152,20 @@ void uartintr(void); void uartputc(int); // vm.c -#define PGROUNDUP(sz) ((sz+PGSIZE-1) & ~(PGSIZE-1)) -extern pde_t *kpgdir; void pminit(void); void ksegment(void); void kvmalloc(void); void vminit(void); -void jkstack(); -void printstack(void); -void printpgdir(pde_t *); pde_t* setupkvm(void); char* uva2ka(pde_t*, char*); int allocuvm(pde_t*, char*, uint); +int deallocuvm(pde_t *pgdir, char *addr, uint sz); void freevm(pde_t*); void inituvm(pde_t*, char*, char*, uint); int loaduvm(pde_t*, char*, struct inode *ip, uint, uint); pde_t* copyuvm(pde_t*,uint); -void loadvm(struct proc*); +void switchuvm(struct proc*); +void switchkvm(); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/exec.c b/exec.c index 8a92e99..4f11695 100644 --- a/exec.c +++ b/exec.c @@ -43,13 +43,16 @@ exec(char *path, char **argv) goto bad; if (!allocuvm(pgdir, (char *)ph.va, ph.memsz)) goto bad; - sz += PGROUNDUP(ph.memsz); + if(ph.va + ph.memsz > sz) + sz = ph.va + ph.memsz; if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz)) goto bad; } iunlockput(ip); // Allocate and initialize stack at sz + sz = PGROUNDUP(sz); + sz += PGSIZE; // leave an invalid page if (!allocuvm(pgdir, (char *)sz, PGSIZE)) goto bad; mem = uva2ka(pgdir, (char *)sz); @@ -95,7 +98,7 @@ exec(char *path, char **argv) proc->tf->eip = elf.entry; // main proc->tf->esp = sp; - loadvm(proc); + switchuvm(proc); freevm(oldpgdir); diff --git a/kalloc.c b/kalloc.c index 5661105..ca87018 100644 --- a/kalloc.c +++ b/kalloc.c @@ -1,9 +1,8 @@ // Physical memory allocator, intended to allocate -// memory for user processes. Allocates in 4096-byte "pages". +// memory for user processes. Allocates in 4096-byte pages. // Free list is kept sorted and combines adjacent pages into // long runs, to make it easier to allocate big segments. -// One reason the page size is 4k is that the x86 segment size -// granularity is 4k. +// This combining is not useful now that xv6 uses paging. #include "types.h" #include "defs.h" @@ -24,14 +23,10 @@ struct { int nfreemem; // Initialize free list of physical pages. -// This code cheats by just considering one megabyte of -// pages after end. Real systems would determine the -// amount of memory available in the system and use it all. void kinit(char *p, uint len) { initlock(&kmem.lock, "kmem"); - cprintf("end 0x%x free = %d(0x%x)\n", p, len); nfreemem = 0; kfree(p, len); } diff --git a/main.c b/main.c index 78cd334..a6088cb 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,8 @@ static void bootothers(void); static void mpmain(void); -void jkstack(void) __attribute__((noreturn)); +void jkstack(void) __attribute__((noreturn)); +void mainc(void); // Bootstrap processor starts running C code here. int @@ -15,21 +16,32 @@ main(void) { mpinit(); // collect info about this machine lapicinit(mpbcpu()); - ksegment(); + ksegment(); // set up segments picinit(); // interrupt controller ioapicinit(); // another interrupt controller consoleinit(); // I/O devices & their interrupts uartinit(); // serial port - pminit(); // physical memory for kernel - jkstack(); // Jump to mainc on a proper-allocated kernel stack + pminit(); // discover how much memory there is + jkstack(); // call mainc() on a properly-allocated stack +} + +void +jkstack(void) +{ + char *kstack = kalloc(PGSIZE); + if (!kstack) + panic("jkstack\n"); + char *top = kstack + PGSIZE; + asm volatile("movl %0,%%esp" : : "r" (top)); + asm volatile("call mainc"); + panic("jkstack"); } void mainc(void) { - cprintf("cpus %p cpu %p\n", cpus, cpu); cprintf("\ncpu%d: starting xv6\n\n", cpu->id); - kvmalloc(); // allocate the kernel page table + kvmalloc(); // initialze the kernel page table pinit(); // process table tvinit(); // trap vectors binit(); // buffer cache @@ -45,22 +57,21 @@ mainc(void) mpmain(); } -// Bootstrap processor gets here after setting up the hardware. -// Additional processors start here. +// Common CPU setup code. +// Bootstrap CPU comes here from mainc(). +// Other CPUs jump here from bootother.S. static void mpmain(void) { if(cpunum() != mpbcpu()) { ksegment(); - cprintf("other cpu\n"); lapicinit(cpunum()); } - vminit(); // Run with paging on each processor - cprintf("cpu%d: mpmain\n", cpu->id); - idtinit(); + vminit(); // turn on paging + cprintf("cpu%d: starting\n", cpu->id); + idtinit(); // load idt register xchg(&cpu->booted, 1); - cprintf("cpu%d: scheduling\n", cpu->id); - scheduler(); + scheduler(); // start running processes } static void @@ -75,6 +86,7 @@ bootothers(void) // placed the start of bootother.S there. code = (uchar *) 0x7000; memmove(code, _binary_bootother_start, (uint)_binary_bootother_size); + for(c = cpus; c < cpus+ncpu; c++){ if(c == cpus+cpunum()) // We've started already. continue; @@ -85,7 +97,7 @@ bootothers(void) *(void**)(code-8) = mpmain; lapicstartap(c->id, (uint)code); - // Wait for cpu to get through bootstrap. + // Wait for cpu to finish mpmain() while(c->booted == 0) ; } diff --git a/mmu.h b/mmu.h index 378ae22..f4fc732 100644 --- a/mmu.h +++ b/mmu.h @@ -85,32 +85,20 @@ struct segdesc { // | Page Directory | Page Table | Offset within Page | // | Index | Index | | // +----------------+----------------+---------------------+ -// \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ -// \----------- PPN(la) -----------/ -// -// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. -// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), -// use PGADDR(PDX(la), PTX(la), PGOFF(la)). - -// page number field of address -#define PPN(la) (((uint) (la)) >> PTXSHIFT) -#define VPN(la) PPN(la) // used to index into vpt[] +// \--- PDX(la) --/ \--- PTX(la) --/ // page directory index #define PDX(la) ((((uint) (la)) >> PDXSHIFT) & 0x3FF) -#define VPD(la) PDX(la) // used to index into vpd[] // page table index #define PTX(la) ((((uint) (la)) >> PTXSHIFT) & 0x3FF) -// offset in page -#define PGOFF(la) (((uint) (la)) & 0xFFF) - // construct linear address from indexes and offset #define PGADDR(d, t, o) ((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) -// mapping from physical addresses to virtual addresses is the identity one -// (really linear addresses, but we map linear to physical also directly) +// turn a kernel linear address into a physical address. +// all of the kernel data structures have linear and +// physical addresses that are equal. #define PADDR(a) ((uint) a) // Page directory and page table constants. @@ -120,12 +108,12 @@ struct segdesc { #define PGSIZE 4096 // bytes mapped by a page #define PGSHIFT 12 // log2(PGSIZE) -#define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry -#define PTSHIFT 22 // log2(PTSIZE) - #define PTXSHIFT 12 // offset of PTX in a linear address #define PDXSHIFT 22 // offset of PDX in a linear address +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) ((char*)((((unsigned int)a) & ~(PGSIZE-1)))) + // Page table/directory entry flags. #define PTE_P 0x001 // Present #define PTE_W 0x002 // Writeable @@ -137,13 +125,6 @@ struct segdesc { #define PTE_PS 0x080 // Page Size #define PTE_MBZ 0x180 // Bits must be zero -// The PTE_AVAIL bits aren't used by the kernel or interpreted by the -// hardware, so user processes are allowed to set them arbitrarily. -#define PTE_AVAIL 0xE00 // Available for software use - -// Only flags in PTE_USER may be used in system calls. -#define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U) - // Address in page table or page directory entry #define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF) diff --git a/proc.c b/proc.c index c1faec6..e69bacf 100644 --- a/proc.c +++ b/proc.c @@ -142,10 +142,15 @@ userinit(void) int growproc(int n) { - if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) - return -1; + if(n > 0){ + if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) + return -1; + } else if(n < 0){ + if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n)) + return -1; + } proc->sz += n; - loadvm(proc); + switchuvm(proc); return 0; } @@ -214,9 +219,10 @@ scheduler(void) // to release ptable.lock and then reacquire it // before jumping back to us. proc = p; - loadvm(p); + switchuvm(p); p->state = RUNNING; swtch(&cpu->scheduler, proc->context); + switchkvm(); // Process is done running for now. // It should have changed its p->state before coming back. @@ -242,7 +248,6 @@ sched(void) panic("sched running"); if(readeflags()&FL_IF) panic("sched interruptible"); - lcr3(PADDR(kpgdir)); // Switch to the kernel page table intena = cpu->intena; swtch(&proc->context, cpu->scheduler); cpu->intena = intena; @@ -414,9 +419,9 @@ wait(void) // Found one. pid = p->pid; kfree(p->kstack, KSTACKSIZE); - freevm(p->pgdir); + p->kstack = 0; + freevm(p->pgdir); p->state = UNUSED; - p->kstack = 0; p->pid = 0; p->parent = 0; p->name[0] = 0; diff --git a/proc.h b/proc.h index ebc42f1..7d97dfa 100644 --- a/proc.h +++ b/proc.h @@ -3,8 +3,8 @@ #define SEG_KCODE 1 // kernel code #define SEG_KDATA 2 // kernel data+stack #define SEG_KCPU 3 // kernel per-cpu data -#define SEG_UCODE 4 -#define SEG_UDATA 5 +#define SEG_UCODE 4 // user code +#define SEG_UDATA 5 // user data+stack #define SEG_TSS 6 // this process's task state #define NSEGS 7 @@ -16,7 +16,7 @@ // Contexts are stored at the bottom of the stack they // describe; the stack pointer is the address of the context. // The layout of the context matches the layout of the stack in swtch.S -// at "Switch stacks" comment. Switch itself doesn't save eip explicitly, +// at the "Switch stacks" comment. Switch doesn't save eip explicitly, // but it is on the stack and allocproc() manipulates it. struct context { uint edi; @@ -31,7 +31,7 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; // Per-process state struct proc { uint sz; // Size of process memory (bytes) - pde_t* pgdir; // linear address of proc's pgdir + pde_t* pgdir; // Linear address of proc's pgdir char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state volatile int pid; // Process ID @@ -48,6 +48,7 @@ struct proc { // Process memory is laid out contiguously, low addresses first: // text // original data and bss +// invalid page // fixed-size stack // expandable heap diff --git a/runoff.list b/runoff.list index c469aae..f39ce18 100644 --- a/runoff.list +++ b/runoff.list @@ -23,6 +23,7 @@ proc.c swtch.S vm.c kalloc.c +vm.c # system calls traps.h diff --git a/sh.c b/sh.c index e8d65f0..16e325b 100644 --- a/sh.c +++ b/sh.c @@ -420,7 +420,6 @@ parseexec(char **ps, char *es) int tok, argc; struct execcmd *cmd; struct cmd *ret; - int *x = (int *) peek; if(peek(ps, es, "(")) return parseblock(ps, es); diff --git a/swtch.S b/swtch.S index 49efdf9..8751317 100644 --- a/swtch.S +++ b/swtch.S @@ -26,11 +26,3 @@ swtch: popl %ebx popl %ebp ret - -# Jump on a new stack, fake C calling conventions -.globl jstack -jstack: - movl 4(%esp), %esp - subl $16, %esp # space for arguments - movl $0, %ebp # terminate functions that follow ebp's - call mainc # continue at mainc diff --git a/syscall.c b/syscall.c index ce79dbd..9296cff 100644 --- a/syscall.c +++ b/syscall.c @@ -100,6 +100,7 @@ extern int sys_sleep(void); extern int sys_unlink(void); extern int sys_wait(void); extern int sys_write(void); +extern int sys_uptime(void); static int (*syscalls[])(void) = { [SYS_chdir] sys_chdir, @@ -122,6 +123,7 @@ static int (*syscalls[])(void) = { [SYS_unlink] sys_unlink, [SYS_wait] sys_wait, [SYS_write] sys_write, +[SYS_uptime] sys_uptime, }; void diff --git a/syscall.h b/syscall.h index f4b7807..3a0fbca 100644 --- a/syscall.h +++ b/syscall.h @@ -19,3 +19,4 @@ #define SYS_getpid 18 #define SYS_sbrk 19 #define SYS_sleep 20 +#define SYS_uptime 21 diff --git a/sysproc.c b/sysproc.c index 11770ff..efaa372 100644 --- a/sysproc.c +++ b/sysproc.c @@ -57,7 +57,8 @@ sys_sbrk(void) int sys_sleep(void) { - int n, ticks0; + int n; + uint ticks0; if(argint(0, &n) < 0) return -1; @@ -73,3 +74,16 @@ sys_sleep(void) release(&tickslock); return 0; } + +// return how many clock tick interrupts have occurred +// since boot. +int +sys_uptime(void) +{ + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/trap.c b/trap.c index 1f35708..daee22f 100644 --- a/trap.c +++ b/trap.c @@ -11,7 +11,7 @@ struct gatedesc idt[256]; extern uint vectors[]; // in vectors.S: array of 256 entry pointers struct spinlock tickslock; -int ticks; +uint ticks; void tvinit(void) diff --git a/usertests.c b/usertests.c index cc2601c..670a4a8 100644 --- a/usertests.c +++ b/usertests.c @@ -322,8 +322,9 @@ void mem(void) { void *m1, *m2; - int pid; + int pid, ppid; + ppid = getpid(); if((pid = fork()) == 0){ m1 = 0; while((m2 = malloc(10001)) != 0) { @@ -338,6 +339,7 @@ mem(void) m1 = malloc(1024*20); if(m1 == 0) { printf(1, "couldn't allocate mem?!!\n"); + kill(ppid); exit(); } free(m1); @@ -1229,6 +1231,136 @@ forktest(void) printf(1, "fork test OK\n"); } +void +sbrktest(void) +{ + int pid; + char *oldbrk = sbrk(0); + + printf(stdout, "sbrk test\n"); + + // can one sbrk() less than a page? + char *a = sbrk(0); + int i; + for(i = 0; i < 5000; i++){ + char *b = sbrk(1); + if(b != a){ + printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); + exit(); + } + *b = 1; + a = b + 1; + } + pid = fork(); + if(pid < 0){ + printf(stdout, "sbrk test fork failed\n"); + exit(); + } + char *c = sbrk(1); + c = sbrk(1); + if(c != a + 1){ + printf(stdout, "sbrk test failed post-fork\n"); + exit(); + } + if(pid == 0) + exit(); + wait(); + + // can one allocate the full 640K? + a = sbrk(0); + uint amt = (640 * 1024) - (uint) a; + char *p = sbrk(amt); + if(p != a){ + printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a); + exit(); + } + char *lastaddr = (char *)(640 * 1024 - 1); + *lastaddr = 99; + + // is one forbidden from allocating more than 640K? + c = sbrk(4096); + if(c != (char *) 0xffffffff){ + printf(stdout, "sbrk allocated more than 640K, c %x\n", c); + exit(); + } + + // can one de-allocate? + a = sbrk(0); + c = sbrk(-4096); + if(c == (char *) 0xffffffff){ + printf(stdout, "sbrk could not deallocate\n"); + exit(); + } + c = sbrk(0); + if(c != a - 4096){ + printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); + exit(); + } + + // can one re-allocate that page? + a = sbrk(0); + c = sbrk(4096); + if(c != a || sbrk(0) != a + 4096){ + printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); + exit(); + } + if(*lastaddr == 99){ + // should be zero + printf(stdout, "sbrk de-allocation didn't really deallocate\n"); + exit(); + } + + c = sbrk(4096); + if(c != (char *) 0xffffffff){ + printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c); + exit(); + } + + // can we read the kernel's memory? + for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){ + int ppid = getpid(); + int pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + if(pid == 0){ + printf(stdout, "oops could read %x = %x\n", a, *a); + kill(ppid); + exit(); + } + wait(); + } + + if(sbrk(0) > oldbrk) + sbrk(-(sbrk(0) - oldbrk)); + + printf(stdout, "sbrk test OK\n"); +} + +void +stacktest(void) +{ + printf(stdout, "stack test\n"); + char dummy = 1; + char *p = &dummy; + int ppid = getpid(); + int pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + if(pid == 0){ + // should cause a trap: + p[-4096] = 'z'; + kill(ppid); + printf(stdout, "stack test failed: page before stack was writeable\n"); + exit(); + } + wait(); + printf(stdout, "stack test OK\n"); +} + int main(int argc, char *argv[]) { @@ -1240,6 +1372,9 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + stacktest(); + sbrktest(); + opentest(); writetest(); writetest1(); diff --git a/usys.S b/usys.S index 2291b02..8bfd8a1 100644 --- a/usys.S +++ b/usys.S @@ -28,3 +28,4 @@ SYSCALL(dup) SYSCALL(getpid) SYSCALL(sbrk) SYSCALL(sleep) +SYSCALL(uptime) diff --git a/vm.c b/vm.c index 231e133..98ac108 100644 --- a/vm.c +++ b/vm.c @@ -8,13 +8,20 @@ // The mappings from logical to linear are one to one (i.e., // segmentation doesn't do anything). -// The mapping from linear to physical are one to one for the kernel. -// The mappings for the kernel include all of physical memory (until -// PHYSTOP), including the I/O hole, and the top of physical address -// space, where additional devices are located. -// The kernel itself is linked to be at 1MB, and its physical memory -// is also at 1MB. -// Physical memory for user programs is allocated from physical memory +// There is one page table per process, plus one that's used +// when a CPU is not running any process (kpgdir). +// A user process uses the same page table as the kernel; the +// page protection bits prevent it from using anything other +// than its memory. +// +// setupkvm() and exec() set up every page table like this: +// 0..640K : user memory (text, data, stack, heap) +// 640K..1M : mapped direct (for IO space) +// 1M..kernend : mapped direct (for the kernel's text and data) +// kernend..PHYSTOP : mapped direct (kernel heap and user pages) +// 0xfe000000..0 : mapped direct (devices such as ioapic) +// +// The kernel allocates memory for its heap and for user memory // between kernend and the end of physical memory (PHYSTOP). // The virtual address space of each user program includes the kernel // (which is inaccessible in user mode). The user program addresses @@ -22,7 +29,7 @@ // (both in physical memory and in the kernel's virtual address // space). -#define PHYSTOP 0x300000 +#define PHYSTOP 0x1000000 #define USERTOP 0xA0000 static uint kerntext; // Linker starts kernel at 1MB @@ -31,29 +38,11 @@ static uint kerndata; static uint kerndsz; static uint kernend; static uint freesz; -pde_t *kpgdir; // One kernel page table for scheduler procs - -void -printpgdir(pde_t *pgdir) -{ - uint i; - uint j; - - cprintf("printpgdir 0x%x\n", pgdir); - for (i = 0; i < NPDENTRIES; i++) { - if (pgdir[i] != 0 && i < 100) { - cprintf("pgdir %d, v=0x%x\n", i, pgdir[i]); - pte_t *pgtab = (pte_t*) PTE_ADDR(pgdir[i]); - for (j = 0; j < NPTENTRIES; j++) { - if (pgtab[j] != 0) - cprintf("pgtab %d, v=0x%x, addr=0x%x\n", j, PGADDR(i, j, 0), - PTE_ADDR(pgtab[j])); - } - } - } - cprintf("printpgdir done\n", pgdir); -} +static pde_t *kpgdir; // for use in scheduler() +// return the address of the PTE in page table pgdir +// that corresponds to linear address va. if create!=0, +// create any required page table pages. static pte_t * walkpgdir(pde_t *pgdir, const void *va, int create) { @@ -80,16 +69,26 @@ walkpgdir(pde_t *pgdir, const void *va, int create) return &pgtab[PTX(va)]; } +// create PTEs for linear addresses starting at la that refer to +// physical addresses starting at pa. la and size might not +// be page-aligned. static int mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) { - uint i; - pte_t *pte; - - for (i = 0; i < size; i += PGSIZE) { - if (!(pte = walkpgdir(pgdir, (void*)(la + i), 1))) + char *first = PGROUNDDOWN(la); + char *last = PGROUNDDOWN(la + size - 1); + char *a = first; + while(1){ + pte_t *pte = walkpgdir(pgdir, a, 1); + if(pte == 0) return 0; - *pte = (pa + i) | perm | PTE_P; + if(*pte & PTE_P) + panic("remap"); + *pte = pa | perm | PTE_P; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; } return 1; } @@ -101,12 +100,15 @@ ksegment(void) { struct cpu *c; - // Map once virtual addresses to linear addresses using identity map + // Map virtual addresses to linear addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. c = &cpus[cpunum()]; c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); - c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0x0, 0xffffffff, DPL_USER); - c->gdt[SEG_UDATA] = SEG(STA_W, 0x0, 0xffffffff, DPL_USER); + c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); // map cpu, and curproc c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); @@ -119,9 +121,9 @@ ksegment(void) proc = 0; } -// Setup address space and current process task state. +// Switch h/w page table and TSS registers to point to process p. void -loadvm(struct proc *p) +switchuvm(struct proc *p) { pushcli(); @@ -133,14 +135,21 @@ loadvm(struct proc *p) ltr(SEG_TSS << 3); if (p->pgdir == 0) - panic("loadvm: no pgdir\n"); + panic("switchuvm: no pgdir\n"); lcr3(PADDR(p->pgdir)); // switch to new address space popcli(); } -// Setup kernel part of a page table. Linear adresses map one-to-one -// on physical addresses. +// Switch h/w page table register to the kernel-only page table, for when +// no process is running. +void +switchkvm() +{ + lcr3(PADDR(kpgdir)); // Switch to the kernel page table +} + +// Set up kernel part of a page table. pde_t* setupkvm(void) { @@ -153,10 +162,10 @@ setupkvm(void) // Map IO space from 640K to 1Mbyte if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W)) return 0; - // Map kernel text from kern text addr read-only + // Map kernel text read-only if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0)) return 0; - // Map kernel data form kern data addr R/W + // Map kernel data read/write if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W)) return 0; // Map dynamically-allocated memory read/write (kernel stacks, user mem) @@ -168,6 +177,10 @@ setupkvm(void) return pgdir; } +// return the physical address that a given user address +// maps to. the result is also a kernel logical address, +// since the kernel maps the physical memory allocated to user +// processes directly. char* uva2ka(pde_t *pgdir, char *uva) { @@ -177,25 +190,60 @@ uva2ka(pde_t *pgdir, char *uva) return (char *)pa; } +// allocate sz bytes more memory for a process starting at the +// given user address; allocates physical memory and page +// table entries. addr and sz need not be page-aligned. +// it is a no-op for any parts of the requested memory +// that are already allocated. int allocuvm(pde_t *pgdir, char *addr, uint sz) { - uint i, n; - char *mem; - - n = PGROUNDUP(sz); - if (addr + n >= USERTOP) + if (addr + sz > (char*)USERTOP) return 0; - for (i = 0; i < n; i += PGSIZE) { - if (!(mem = kalloc(PGSIZE))) { // XXX cleanup what we did? - return 0; + char *first = PGROUNDDOWN(addr); + char *last = PGROUNDDOWN(addr + sz - 1); + char *a; + for(a = first; a <= last; a += PGSIZE){ + pte_t *pte = walkpgdir(pgdir, a, 0); + if(pte == 0 || (*pte & PTE_P) == 0){ + char *mem = kalloc(PGSIZE); + if(mem == 0){ + // XXX clean up? + return 0; + } + memset(mem, 0, PGSIZE); + mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U); } - memset(mem, 0, PGSIZE); - mappages(pgdir, addr + i, PGSIZE, PADDR(mem), PTE_W|PTE_U); } return 1; } +// deallocate some of the user pages, in response to sbrk() +// with a negative argument. if addr is not page-aligned, +// then only deallocates starting at the next page boundary. +int +deallocuvm(pde_t *pgdir, char *addr, uint sz) +{ + if (addr + sz > (char*)USERTOP) + return 0; + char *first = (char*) PGROUNDUP((uint)addr); + char *last = PGROUNDDOWN(addr + sz - 1); + char *a; + for(a = first; a <= last; a += PGSIZE){ + pte_t *pte = walkpgdir(pgdir, a, 0); + if(pte && (*pte & PTE_P) != 0){ + uint pa = PTE_ADDR(*pte); + if(pa == 0) + panic("deallocuvm"); + kfree((void *) pa, PGSIZE); + *pte = 0; + } + } + return 1; +} + +// free a page table and all the physical memory pages +// in the user part. void freevm(pde_t *pgdir) { @@ -211,9 +259,8 @@ freevm(pde_t *pgdir) if (pgtab[j] != 0) { uint pa = PTE_ADDR(pgtab[j]); uint va = PGADDR(i, j, 0); - if (va >= USERTOP) // done with user part? - break; - kfree((void *) pa, PGSIZE); + if (va < USERTOP) // user memory + kfree((void *) pa, PGSIZE); pgtab[j] = 0; } } @@ -261,6 +308,8 @@ inituvm(pde_t *pgdir, char *addr, char *init, uint sz) } } +// given a parent process's page table, create a copy +// of it for a child. pde_t* copyuvm(pde_t *pgdir, uint sz) { @@ -273,17 +322,22 @@ copyuvm(pde_t *pgdir, uint sz) for (i = 0; i < sz; i += PGSIZE) { if (!(pte = walkpgdir(pgdir, (void *)i, 0))) panic("copyuvm: pte should exist\n"); - pa = PTE_ADDR(*pte); - if (!(mem = kalloc(PGSIZE))) - return 0; - memmove(mem, (char *)pa, PGSIZE); - if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) - return 0; + if(*pte & PTE_P){ + pa = PTE_ADDR(*pte); + if (!(mem = kalloc(PGSIZE))) + return 0; + memmove(mem, (char *)pa, PGSIZE); + if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) + return 0; + } } return d; } -// Gather about physical memory layout. Called once during boot. +// Gather information about physical memory layout. +// Called once during boot. +// Really should find out how much physical memory +// there is rather than assuming PHYSTOP. void pminit(void) { @@ -298,27 +352,13 @@ pminit(void) kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1); kerntext = ph[0].va; kerndata = ph[1].va; - kerntsz = kerndata - kerntext; - kerndsz = kernend - kerndata; + kerntsz = ph[0].memsz; + kerndsz = ph[1].memsz; freesz = PHYSTOP - kernend; - cprintf("kerntext@0x%x(sz=0x%x), kerndata@0x%x(sz=0x%x), kernend 0x%x freesz = 0x%x\n", - kerntext, kerntsz, kerndata, kerndsz, kernend, freesz); - kinit((char *)kernend, freesz); } -// Jump to mainc on a properly-allocated kernel stack -void -jkstack(void) -{ - char *kstack = kalloc(PGSIZE); - if (!kstack) - panic("jkstack\n"); - char *top = kstack + PGSIZE; - jstack((uint) top); -} - // Allocate one page table for the machine for the kernel address // space for scheduler processes. void