From 65bd8e139a8368e987455a10ec59dd7b079b3af1 Mon Sep 17 00:00:00 2001 From: rsc Date: Sun, 16 Jul 2006 01:15:28 +0000 Subject: [PATCH] New scheduler. Removed cli and sti stack in favor of tracking number of locks held on each CPU and explicit conditionals in spinlock.c. --- console.c | 2 +- defs.h | 3 - dot-bochsrc | 2 +- main.c | 10 +- proc.c | 371 +++++++++++++++++++++++++++------------------------- proc.h | 5 +- spinlock.c | 46 +++---- syscall.c | 3 +- trap.c | 11 +- trapasm.S | 12 +- x86.h | 14 ++ 11 files changed, 249 insertions(+), 230 deletions(-) diff --git a/console.c b/console.c index d228edb..86c080e 100644 --- a/console.c +++ b/console.c @@ -113,7 +113,7 @@ void cprintf(char *fmt, ...) { int i, state = 0, c; - unsigned int *ap = (unsigned int *) &fmt + 1; + unsigned int *ap = (unsigned int *)(void*)&fmt + 1; if(use_console_lock) acquire(&console_lock); diff --git a/defs.h b/defs.h index dc8a831..824b986 100644 --- a/defs.h +++ b/defs.h @@ -13,7 +13,6 @@ struct proc; struct jmpbuf; void setupsegs(struct proc *); struct proc * newproc(void); -void swtch(int); struct spinlock; void sleep(void *, struct spinlock *); void wakeup(void *); @@ -22,8 +21,6 @@ void proc_exit(void); int proc_kill(int); int proc_wait(void); void yield(void); -void cli(void); -void sti(void); // swtch.S struct jmpbuf; diff --git a/dot-bochsrc b/dot-bochsrc index d8afbd6..5490056 100755 --- a/dot-bochsrc +++ b/dot-bochsrc @@ -107,7 +107,7 @@ romimage: file=$BXSHARE/BIOS-bochs-latest, address=0xf0000 # 650Mhz Athlon K-7 with Linux 2.4.4/egcs-2.91.66 2 to 2.5 Mips # 400Mhz Pentium II with Linux 2.0.36/egcs-1.0.3 1 to 1.8 Mips #======================================================================= -cpu: count=2, ips=10000000 +cpu: count=2, ips=10000000, reset_on_triple_fault=0 #======================================================================= # MEGS diff --git a/main.c b/main.c index 402aab6..70e93cf 100644 --- a/main.c +++ b/main.c @@ -18,19 +18,19 @@ extern uint8_t _binary_userfs_start[], _binary_userfs_size[]; extern int use_console_lock; +struct spinlock sillylock; // hold this to keep interrupts disabled + int main() { struct proc *p; if (acpu) { - cpus[cpu()].clis = 1; cprintf("an application processor\n"); idtinit(); // CPU's idt lapic_init(cpu()); lapic_timerinit(); lapic_enableintr(); - sti(); scheduler(); } acpu = 1; @@ -40,10 +40,9 @@ main() mp_init(); // collect info about this machine + acquire(&sillylock); use_console_lock = 1; - cpus[cpu()].clis = 1; // cpu starts as if we had called cli() - lapic_init(mp_bcpu()); cprintf("\nxV6\n\n"); @@ -56,7 +55,7 @@ main() // create fake process zero p = &proc[0]; memset(p, 0, sizeof *p); - p->state = WAITING; + p->state = SLEEPING; p->sz = 4 * PAGE; p->mem = kalloc(p->sz); memset(p->mem, 0, p->sz); @@ -88,6 +87,7 @@ main() //load_icode(p, _binary_userfs_start, (unsigned) _binary_userfs_size); p->state = RUNNABLE; cprintf("loaded userfs\n"); + release(&sillylock); scheduler(); diff --git a/proc.c b/proc.c index 76ec64e..01d8f2f 100644 --- a/proc.c +++ b/proc.c @@ -12,6 +12,7 @@ struct spinlock proc_table_lock; struct proc proc[NPROC]; struct proc *curproc[NCPU]; int next_pid = 1; +extern void forkret(void); /* * set up a process's task state and segment descriptors @@ -96,12 +97,14 @@ newproc() *(np->tf) = *(op->tf); np->tf->tf_regs.reg_eax = 0; // so fork() returns 0 in child - // set up new jmpbuf to start executing at trapret with esp pointing at tf + // Set up new jmpbuf to start executing forkret (see trapasm.S) + // with esp pointing at tf. Forkret will call forkret1 (below) to release + // the proc_table_lock and then jump into the usual trap return code. memset(&np->jmpbuf, 0, sizeof np->jmpbuf); - np->jmpbuf.jb_eip = (unsigned) trapret; + np->jmpbuf.jb_eip = (unsigned) forkret; np->jmpbuf.jb_esp = (unsigned) np->tf - 4; // -4 for the %eip that isn't actually there - // copy file descriptors + // Copy file descriptors for(fd = 0; fd < NOFILE; fd++){ np->fds[fd] = op->fds[fd]; if(np->fds[fd]) @@ -111,128 +114,153 @@ newproc() return np; } +void +forkret1(void) +{ + release(&proc_table_lock); +} + +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run +// - longjmp to start running that process +// - eventually that process transfers control back +// via longjmp back to the top of scheduler. void scheduler(void) { - struct proc *op, *np; + struct proc *p; int i; cprintf("start scheduler on cpu %d jmpbuf %p\n", cpu(), &cpus[cpu()].jmpbuf); cpus[cpu()].lastproc = &proc[0]; - setjmp(&cpus[cpu()].jmpbuf); - - op = curproc[cpu()]; - - if(op == 0 || op->mtx != &proc_table_lock) - acquire1(&proc_table_lock, op); - - if(op){ - if(op->newstate <= 0 || op->newstate > ZOMBIE) - panic("scheduler"); - op->state = op->newstate; - op->newstate = -1; - if(op->mtx){ - struct spinlock *mtx = op->mtx; - op->mtx = 0; - if(mtx != &proc_table_lock) - release1(mtx, op); - } - } - - // find a runnable process and switch to it - curproc[cpu()] = 0; - np = cpus[cpu()].lastproc + 1; - while(1){ - for(i = 0; i < NPROC; i++){ - if(np >= &proc[NPROC]) - np = &proc[0]; - if(np->state == RUNNABLE) - break; - np++; - } - - if(i < NPROC){ - np->state = RUNNING; - release1(&proc_table_lock, op); - break; - } - - release1(&proc_table_lock, op); - op = 0; + for(;;){ + // Loop over process table looking for process to run. acquire(&proc_table_lock); - np = &proc[0]; + for(i = 0; i < NPROC; i++){ + p = &proc[i]; + if(p->state != RUNNABLE) + continue; + + // Run this process. + // XXX move this into swtch or trapret or something. + // It can run on the other stack. + // h/w sets busy bit in TSS descriptor sometimes, and faults + // if it's set in LTR. so clear tss descriptor busy bit. + p->gdt[SEG_TSS].sd_type = STS_T32A; + + // XXX should probably have an lgdt() function in x86.h + // to confine all the inline assembly. + // XXX probably ought to lgdt on trap return too, in case + // a system call has moved a program or changed its size. + asm volatile("lgdt %0" : : "g" (p->gdt_pd.pd_lim)); + ltr(SEG_TSS << 3); + + // Switch to chosen process. It is the process's job + // to release proc_table_lock and then reacquire it + // before jumping back to us. + if(0) cprintf("cpu%d: run %d\n", cpu(), p-proc); + curproc[cpu()] = p; + p->state = RUNNING; + if(setjmp(&cpus[cpu()].jmpbuf) == 0) + longjmp(&p->jmpbuf); + + // Process is done running for now. + // It should have changed its p->state before coming back. + curproc[cpu()] = 0; + if(p->state == RUNNING) + panic("swtch to scheduler with state=RUNNING"); + + // XXX if not holding proc_table_lock panic. + } + release(&proc_table_lock); + + if(cpus[cpu()].nlock != 0) + panic("holding locks in scheduler"); + + // With proc_table_lock released, there are no + // locks held on this cpu, so interrupts are enabled. + // Hardware interrupts can happen here. + // Also, releasing the lock here lets the other CPUs + // look for runnable processes too. } - - cpus[cpu()].lastproc = np; - curproc[cpu()] = np; - - // h/w sets busy bit in TSS descriptor sometimes, and faults - // if it's set in LTR. so clear tss descriptor busy bit. - np->gdt[SEG_TSS].sd_type = STS_T32A; - - // XXX should probably have an lgdt() function in x86.h - // to confine all the inline assembly. - // XXX probably ought to lgdt on trap return too, in case - // a system call has moved a program or changed its size. - asm volatile("lgdt %0" : : "g" (np->gdt_pd.pd_lim)); - ltr(SEG_TSS << 3); - - if(0) cprintf("cpu%d: run %d esp=%p callerpc=%p\n", cpu(), np-proc); - longjmp(&np->jmpbuf); } -// give up the cpu by switching to the scheduler, -// which runs on the per-cpu stack. +// Enter scheduler. Must already hold proc_table_lock +// and have changed curproc[cpu()]->state. void -swtch(int newstate) +sched(void) { - struct proc *p = curproc[cpu()]; - - if(p == 0) - panic("swtch no proc"); - if(p->mtx == 0 && p->locks != 0) - panic("swtch w/ locks"); - if(p->mtx && p->locks != 1) - panic("swtch w/ locks 1"); - if(p->mtx && p->mtx->locked == 0) - panic("switch w/ lock but not held"); - if(p->locks && (read_eflags() & FL_IF)) - panic("swtch w/ lock but FL_IF"); - - p->newstate = newstate; // basically an argument to scheduler() - if(setjmp(&p->jmpbuf) == 0) + if(setjmp(&curproc[cpu()]->jmpbuf) == 0) longjmp(&cpus[cpu()].jmpbuf); } +// Give up the CPU for one scheduling round. void -sleep(void *chan, struct spinlock *mtx) +yield() +{ + struct proc *p; + + if((p=curproc[cpu()]) == 0 || curproc[cpu()]->state != RUNNING) + panic("yield"); + acquire(&proc_table_lock); + p->state = RUNNABLE; + sched(); + release(&proc_table_lock); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when reawakened. +void +sleep(void *chan, struct spinlock *lk) { struct proc *p = curproc[cpu()]; if(p == 0) panic("sleep"); - p->chan = chan; - p->mtx = mtx; // scheduler will release it + // Must acquire proc_table_lock in order to + // change p->state and then call sched. + // Once we hold proc_table_lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup runs with proc_table_lock locked), + // so it's okay to release lk. + if(lk != &proc_table_lock){ + acquire(&proc_table_lock); + release(lk); + } - swtch(WAITING); - - if(mtx) - acquire(mtx); + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + sched(); + + // Tidy up. p->chan = 0; + + // Reacquire original lock. + if(lk != &proc_table_lock){ + release(&proc_table_lock); + acquire(lk); + } } +// Wake up all processes sleeping on chan. +// Proc_table_lock must be held. void wakeup1(void *chan) { struct proc *p; for(p = proc; p < &proc[NPROC]; p++) - if(p->state == WAITING && p->chan == chan) + if(p->state == SLEEPING && p->chan == chan) p->state = RUNNABLE; } +// Wake up all processes sleeping on chan. +// Proc_table_lock is acquired and released. void wakeup(void *chan) { @@ -241,77 +269,9 @@ wakeup(void *chan) release(&proc_table_lock); } -// give up the CPU but stay marked as RUNNABLE -void -yield() -{ - if(curproc[cpu()] == 0 || curproc[cpu()]->state != RUNNING) - panic("yield"); - swtch(RUNNABLE); -} - -void -proc_exit() -{ - struct proc *p; - struct proc *cp = curproc[cpu()]; - int fd; - - for(fd = 0; fd < NOFILE; fd++){ - if(cp->fds[fd]){ - fd_close(cp->fds[fd]); - cp->fds[fd] = 0; - } - } - - acquire(&proc_table_lock); - - // wake up parent - for(p = proc; p < &proc[NPROC]; p++) - if(p->pid == cp->ppid) - wakeup1(p); - - // abandon children - for(p = proc; p < &proc[NPROC]; p++) - if(p->ppid == cp->pid) - p->pid = 1; - - cp->mtx = &proc_table_lock; - swtch(ZOMBIE); - panic("a zombie revived"); -} - -int -proc_wait(void) -{ - struct proc *p; - struct proc *cp = curproc[cpu()]; - int any, pid; - - acquire(&proc_table_lock); - - while(1){ - any = 0; - for(p = proc; p < &proc[NPROC]; p++){ - if(p->state == ZOMBIE && p->ppid == cp->pid){ - kfree(p->mem, p->sz); - kfree(p->kstack, KSTACKSIZE); - pid = p->pid; - p->state = UNUSED; - release(&proc_table_lock); - return pid; - } - if(p->state != UNUSED && p->ppid == cp->pid) - any = 1; - } - if(any == 0){ - release(&proc_table_lock); - return -1; - } - sleep(cp, &proc_table_lock); - } -} - +// Kill the process with the given pid. +// Process won't actually exit until it returns +// to user space (see trap in trap.c). int proc_kill(int pid) { @@ -319,9 +279,10 @@ proc_kill(int pid) acquire(&proc_table_lock); for(p = proc; p < &proc[NPROC]; p++){ - if(p->pid == pid && p->state != UNUSED){ + if(p->pid == pid){ p->killed = 1; - if(p->state == WAITING) + // Wake process from sleep if necessary. + if(p->state == SLEEPING) p->state = RUNNABLE; release(&proc_table_lock); return 0; @@ -331,26 +292,80 @@ proc_kill(int pid) return -1; } -// disable interrupts +// Exit the current process. Does not return. +// Exited processes remain in the zombie state +// until their parent calls wait() to find out they exited. void -cli(void) +proc_exit() { - if(cpus[cpu()].clis == 0) - __asm __volatile("cli"); - cpus[cpu()].clis += 1; - if((read_eflags() & FL_IF) != 0) - panic("cli but enabled"); + struct proc *p; + struct proc *cp = curproc[cpu()]; + int fd; + + // Close all open files. + for(fd = 0; fd < NOFILE; fd++){ + if(cp->fds[fd]){ + fd_close(cp->fds[fd]); + cp->fds[fd] = 0; + } + } + + acquire(&proc_table_lock); + + // Wake up our parent. + for(p = proc; p < &proc[NPROC]; p++) + if(p->pid == cp->ppid) + wakeup1(p); + + // Reparent our children to process 1. + for(p = proc; p < &proc[NPROC]; p++) + if(p->ppid == cp->pid) + p->ppid = 1; + + // Jump into the scheduler, never to return. + cp->state = ZOMBIE; + sched(); + panic("zombie exit"); } -// enable interrupts -void -sti(void) +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +proc_wait(void) { - if((read_eflags() & FL_IF) != 0) - panic("sti but enabled"); - if(cpus[cpu()].clis < 1) - panic("sti"); - cpus[cpu()].clis -= 1; - if(cpus[cpu()].clis < 1) - __asm __volatile("sti"); + struct proc *p; + struct proc *cp = curproc[cpu()]; + int i, havekids, pid; + + acquire(&proc_table_lock); + for(;;){ + // Scan through table looking zombie children. + havekids = 0; + for(i = 0; i < NPROC; i++){ + p = &proc[i]; + if(p->ppid == cp->pid){ + if(p->state == ZOMBIE){ + // Found one. + kfree(p->mem, p->sz); + kfree(p->kstack, KSTACKSIZE); + pid = p->pid; + p->state = UNUSED; + p->pid = 0; + release(&proc_table_lock); + return pid; + } + havekids = 1; + } + } + + // No point waiting if we don't have any children. + if(!havekids){ + release(&proc_table_lock); + return -1; + } + + // Wait for children to exit. (See wakeup1 call in proc_exit.) + sleep(cp, &proc_table_lock); + } } + diff --git a/proc.h b/proc.h index d8aa84f..60ef7f5 100644 --- a/proc.h +++ b/proc.h @@ -33,7 +33,7 @@ struct jmpbuf { int jb_eip; }; -enum proc_state { UNUSED, EMBRYO, WAITING, RUNNABLE, RUNNING, ZOMBIE }; +enum proc_state { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; struct proc{ char *mem; // start of process's physical memory @@ -46,7 +46,6 @@ struct proc{ int ppid; void *chan; // sleep int killed; - int locks; // # of locks currently held struct fd *fds[NOFILE]; struct Taskstate ts; // only to give cpu address of kernel stack @@ -71,7 +70,7 @@ struct cpu { struct jmpbuf jmpbuf; char mpstack[MPSTACK]; // per-cpu start-up stack, only used to get into main() struct proc *lastproc; // last proc scheduled on this cpu (never NULL) - int clis; // cli() nesting depth + int nlock; // # of locks currently held }; extern struct cpu cpus[NCPU]; diff --git a/spinlock.c b/spinlock.c index 4c11064..aa11ad5 100644 --- a/spinlock.c +++ b/spinlock.c @@ -6,53 +6,47 @@ #include "proc.h" #include "spinlock.h" -#define DEBUG 0 +// Can't call cprintf from inside these routines, +// because cprintf uses them itself. +#define cprintf dont_use_cprintf extern int use_console_lock; -int getcallerpc(void *v) { - return ((int*)v)[-1]; +int +getcallerpc(void *v) +{ + return ((int*)v)[-1]; } void acquire1(struct spinlock * lock, struct proc *cp) { - if(DEBUG) cprintf("cpu%d: acquiring at %x\n", cpu(), getcallerpc(&lock)); - - cli(); - while ( cmpxchg(0, 1, &lock->locked) == 1 ) { ; } - lock->locker_pc = getcallerpc(&lock); - - if(cp) - cp->locks += 1; - - if(DEBUG) cprintf("cpu%d: acquired at %x\n", cpu(), getcallerpc(&lock)); + if(cpus[cpu()].nlock++ == 0) + cli(); + while(cmpxchg(0, 1, &lock->locked) == 1) + ; + cpuid(0, 0, 0, 0, 0); // memory barrier + lock->locker_pc = getcallerpc(&lock); } void release1(struct spinlock * lock, struct proc *cp) { - - if(DEBUG) cprintf ("cpu%d: releasing at %x\n", cpu(), getcallerpc(&lock)); - - if(lock->locked != 1) - panic("release"); - - if(cp) - cp->locks -= 1; - - cmpxchg(1, 0, &lock->locked); - sti(); + cpuid(0, 0, 0, 0, 0); // memory barrier + lock->locked = 0; + if(--cpus[cpu()].nlock == 0) + sti(); } void acquire(struct spinlock *lock) { - acquire1(lock, curproc[cpu()]); + acquire1(lock, curproc[cpu()]); } void release(struct spinlock *lock) { - release1(lock, curproc[cpu()]); + release1(lock, curproc[cpu()]); } + diff --git a/syscall.c b/syscall.c index e03901d..420a578 100644 --- a/syscall.c +++ b/syscall.c @@ -34,8 +34,9 @@ fetchint(struct proc *p, unsigned addr, int *ip) return 0; } +// This arg is void* so that both int* and uint* can be passed. int -fetcharg(int argno, int *ip) +fetcharg(int argno, void *ip) { unsigned esp; diff --git a/trap.c b/trap.c index 66b15e0..b210541 100644 --- a/trap.c +++ b/trap.c @@ -36,11 +36,6 @@ trap(struct Trapframe *tf) { int v = tf->tf_trapno; - if(cpus[cpu()].clis){ - cprintf("cpu %d v %d eip %x\n", cpu(), v, tf->tf_eip); - panic("interrupt while interrupts are off"); - } - if(v == T_SYSCALL){ struct proc *cp = curproc[cpu()]; int num = cp->tf->tf_regs.reg_eax; @@ -56,12 +51,10 @@ trap(struct Trapframe *tf) panic("trap ret but not RUNNING"); if(tf != cp->tf) panic("trap ret wrong tf"); - if(cp->locks){ + if(cpus[cpu()].nlock){ cprintf("num=%d\n", num); panic("syscall returning locks held"); } - if(cpus[cpu()].clis) - panic("syscall returning but clis != 0"); if((read_eflags() & FL_IF) == 0) panic("syscall returning but FL_IF clear"); if(read_esp() < (unsigned)cp->kstack || @@ -75,7 +68,7 @@ trap(struct Trapframe *tf) if(v == (IRQ_OFFSET + IRQ_TIMER)){ struct proc *cp = curproc[cpu()]; lapic_timerintr(); - if(cp && cp->locks) + if(cpus[cpu()].nlock) panic("timer interrupt while holding a lock"); if(cp){ #if 1 diff --git a/trapasm.S b/trapasm.S index 2608328..e0e27be 100644 --- a/trapasm.S +++ b/trapasm.S @@ -1,8 +1,10 @@ #include "mmu.h" .text - .globl alltraps - .globl trap +.globl trap +.globl trapret1 + +.globl alltraps alltraps: /* vectors.S sends all traps here */ pushl %ds # build @@ -16,11 +18,11 @@ alltraps: addl $4, %esp # return falls through to trapret... - .globl trapret /* * a forked process RETs here * expects ESP to point to a Trapframe */ +.globl trapret trapret: popal popl %es @@ -28,6 +30,10 @@ trapret: addl $0x8, %esp /* trapno and errcode */ iret +.globl forkret +forkret: + call forkret1 + jmp trapret .globl acpu acpu: diff --git a/x86.h b/x86.h index cc809e7..ee7e6ce 100644 --- a/x86.h +++ b/x86.h @@ -29,6 +29,8 @@ static __inline uint32_t read_ebp(void) __attribute__((always_inline)); static __inline uint32_t read_esp(void) __attribute__((always_inline)); static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp); static __inline uint64_t read_tsc(void) __attribute__((always_inline)); +static __inline void cli(void) __attribute__((always_inline)); +static __inline void sti(void) __attribute__((always_inline)); static __inline void breakpoint(void) @@ -304,6 +306,18 @@ read_tsc(void) return tsc; } +static __inline void +cli(void) +{ + __asm__ volatile("cli"); +} + +static __inline void +sti(void) +{ + __asm__ volatile("sti"); +} + struct PushRegs { /* registers as pushed by pusha */ uint32_t reg_edi;