Merge commit 'origin/master' into page

This commit is contained in:
Frans Kaashoek 2011-07-27 20:49:45 -04:00
commit dccb915282
61 changed files with 1606 additions and 1616 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
*~
_*
*.o
*.d

View file

@ -26,19 +26,53 @@ OBJS = \
uart.o\
vectors.o\
vm.o\
log.o\
# Cross-compiling (e.g., on Mac OS X)
TOOLPREFIX = i386-jos-elf-
#TOOLPREFIX = i386-jos-elf-
# Using native tools (e.g., on X86 Linux)
#TOOLPREFIX =
# Try to infer the correct TOOLPREFIX if not set
ifndef TOOLPREFIX
TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \
then echo 'i386-jos-elf-'; \
elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \
then echo ''; \
else echo "***" 1>&2; \
echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \
echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \
echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \
echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \
echo "*** environment variable to that prefix and run 'make' again." 1>&2; \
echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \
echo "***" 1>&2; exit 1; fi)
endif
# If the makefile can't find QEMU, specify its path here
#QEMU =
# Try to infer the correct QEMU
ifndef QEMU
QEMU = $(shell if which qemu > /dev/null; \
then echo qemu; exit; \
else \
qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \
if test -x $$qemu; then echo $$qemu; exit; fi; fi; \
echo "***" 1>&2; \
echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \
echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \
echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \
echo "***" 1>&2; exit 1)
endif
CC = $(TOOLPREFIX)gcc
AS = $(TOOLPREFIX)gas
LD = $(TOOLPREFIX)ld
OBJCOPY = $(TOOLPREFIX)objcopy
OBJDUMP = $(TOOLPREFIX)objdump
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
ASFLAGS = -m32 -gdwarf-2
# FreeBSD ld wants ``elf_i386_fbsd''
@ -49,6 +83,11 @@ xv6.img: bootblock kernel fs.img
dd if=bootblock of=xv6.img conv=notrunc
dd if=kernel of=xv6.img seek=1 conv=notrunc
xv6memfs.img: bootblock kernelmemfs
dd if=/dev/zero of=xv6memfs.img count=10000
dd if=bootblock of=xv6memfs.img conv=notrunc
dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc
bootblock: bootasm.S bootmain.c
$(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c
$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S
@ -69,11 +108,23 @@ initcode: initcode.S
$(OBJCOPY) -S -O binary initcode.out initcode
$(OBJDUMP) -S initcode.o > initcode.asm
kernel: $(OBJS) bootother initcode
$(LD) $(LDFLAGS) -Ttext 0x100000 -e main -o kernel $(OBJS) -b binary initcode bootother
kernel: $(OBJS) multiboot.o data.o bootother initcode
$(LD) $(LDFLAGS) -Ttext 0x100000 -e main -o kernel multiboot.o data.o $(OBJS) -b binary initcode bootother
$(OBJDUMP) -S kernel > kernel.asm
$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym
# kernelmemfs is a copy of kernel that maintains the
# disk image in memory instead of writing to a disk.
# This is not so useful for testing persistent storage or
# exploring disk buffering implementations, but it is
# great for testing the kernel on real hardware without
# needing a scratch disk.
MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o
kernelmemfs: $(MEMFSOBJS) multiboot.o data.o bootother initcode fs.img
$(LD) $(LDFLAGS) -Ttext 0x100000 -e main -o kernelmemfs multiboot.o data.o $(MEMFSOBJS) -b binary initcode bootother fs.img
$(OBJDUMP) -S kernelmemfs > kernelmemfs.asm
$(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym
tags: $(OBJS) bootother.S _init
etags *.S *.c
@ -94,7 +145,7 @@ _forktest: forktest.o $(ULIB)
$(OBJDUMP) -S _forktest > forktest.asm
mkfs: mkfs.c fs.h
gcc -Wall -o mkfs mkfs.c
gcc -m32 -Werror -Wall -o mkfs mkfs.c
UPROGS=\
_cat\
@ -126,7 +177,7 @@ clean:
# make a printout
FILES = $(shell grep -v '^\#' runoff.list)
PRINT = runoff.list $(FILES)
PRINT = runoff.list runoff.spec $(FILES)
xv6.pdf: $(PRINT)
./runoff
@ -143,27 +194,33 @@ bochs : fs.img xv6.img
# try to generate a unique GDB port
GDBPORT = $(shell expr `id -u` % 5000 + 25000)
# QEMU's gdb stub command line changed in 0.11
QEMUGDB = $(shell if qemu -help | grep -q '^-gdb'; \
QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \
then echo "-gdb tcp::$(GDBPORT)"; \
else echo "-s -p $(GDBPORT)"; fi)
QEMUOPTS = -smp 2 -hdb fs.img xv6.img
ifndef CPUS
CPUS := 2
endif
QEMUOPTS = -hdb fs.img xv6.img -smp $(CPUS)
qemu: fs.img xv6.img
qemu -serial mon:stdio $(QEMUOPTS)
$(QEMU) -serial mon:stdio $(QEMUOPTS)
qemu-memfs: xv6memfs.img
$(QEMU) xv6memfs.img -smp $(CPUS)
qemu-nox: fs.img xv6.img
qemu -nographic $(QEMUOPTS)
$(QEMU) -nographic $(QEMUOPTS)
.gdbinit: .gdbinit.tmpl
sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@
qemu-gdb: fs.img xv6.img .gdbinit
@echo "*** Now run 'gdb'." 1>&2
qemu -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB)
$(QEMU) -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB)
qemu-nox-gdb: fs.img xv6.img .gdbinit
@echo "*** Now run 'gdb'." 1>&2
qemu -nographic $(QEMUOPTS) -S $(QEMUGDB)
$(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB)
# CUT HERE
# prepare dist for students
@ -195,14 +252,16 @@ dist-test:
rm -rf dist-test
mkdir dist-test
cp dist/* dist-test
cd dist-test; ../m print
cd dist-test; ../m bochs || true
cd dist-test; ../m qemu
cd dist-test; $(MAKE) print
cd dist-test; $(MAKE) bochs || true
cd dist-test; $(MAKE) qemu
# update this rule (change rev1) when it is time to
# update this rule (change rev#) when it is time to
# make a new revision.
tar:
rm -rf /tmp/xv6
mkdir -p /tmp/xv6
cp dist/* dist/.gdbinit.tmpl /tmp/xv6
(cd /tmp; tar cf - xv6) | gzip >xv6-rev3.tar.gz
(cd /tmp; tar cf - xv6) | gzip >xv6-rev5.tar.gz
.PHONY: dist-test dist

13
README
View file

@ -19,6 +19,11 @@ The following people made contributions:
Russ Cox (context switching, locking)
Cliff Frey (MP)
Xiao Yu (MP)
Nickolai Zeldovich
Austin Clements
In addition, we are grateful for the patches contributed by Greg
Price, Yandong Mao, and Hitoshi Mitake.
The code in the files that constitute xv6 is
Copyright 2006-2007 Frans Kaashoek, Robert Morris, and Russ Cox.
@ -39,9 +44,7 @@ Then run "make TOOLPREFIX=i386-jos-elf-".
To run xv6, you can use Bochs or QEMU, both PC simulators.
Bochs makes debugging easier, but QEMU is much faster.
To run in Bochs, run "make bochs" and then type "c" at the bochs prompt.
To run in QEMU, run "make qemu". Both log the xv6 screen output to
standard output.
To run in QEMU, run "make qemu".
To create a typeset version of the code, run "make xv6.pdf".
This requires the "mpage" text formatting utility.
See http://www.mesa.nl/pub/mpage/.
To create a typeset version of the code, run "make xv6.pdf". This
requires the "mpage" utility. See http://www.mesa.nl/pub/mpage/.

View file

@ -13,7 +13,7 @@
.code16 # Assemble for 16-bit mode
.globl start
start:
cli # Disable interrupts
cli # BIOS enabled interrupts; disable
# Set up the important data segment registers (DS, ES, SS).
xorw %ax,%ax # Segment number zero
@ -21,10 +21,8 @@ start:
movw %ax,%es # -> Extra Segment
movw %ax,%ss # -> Stack Segment
# Enable A20:
# For backwards compatibility with the earliest PCs, physical
# address line 20 is tied low, so that addresses higher than
# 1MB wrap around to zero by default. This code undoes this.
# Physical address line A20 is tied to zero so that the first PCs
# with 2 MB would run software that assumed 1 MB. Undo that.
seta20.1:
inb $0x64,%al # Wait for not busy
testb $0x2,%al
@ -41,23 +39,21 @@ seta20.2:
movb $0xdf,%al # 0xdf -> port 0x60
outb %al,$0x60
//PAGEBREAK!
# Switch from real to protected mode, using a bootstrap GDT
# and segment translation that makes virtual addresses
# identical to physical addresses, so that the
# effective memory map does not change during the switch.
# Switch from real to protected mode. Use a bootstrap GDT that makes
# virtual addresses map dierctly to physical addresses so that the
# effective memory map doesn't change during the transition.
lgdt gdtdesc
movl %cr0, %eax
orl $CR0_PE, %eax
movl %eax, %cr0
# This ljmp is how you load the CS (Code Segment) register.
# SEG_ASM produces segment descriptors with the 32-bit mode
# flag set (the D flag), so addresses and word operands will
# default to 32 bits after this jump.
//PAGEBREAK!
# Complete transition to 32-bit protected mode by using long jmp
# to reload %cs and %eip. The segment registers are set up with no
# translation, so that the mapping is still the identity mapping.
ljmp $(SEG_KCODE<<3), $start32
.code32 # Assemble for 32-bit mode
.code32 # Tell assembler to generate 32-bit code now.
start32:
# Set up the protected-mode data segment registers
movw $(SEG_KDATA<<3), %ax # Our data segment selector

View file

@ -33,8 +33,8 @@ bootmain(void)
// Load each program segment (ignores ph flags).
ph = (struct proghdr*)((uchar*)elf + elf->phoff);
eph = ph + elf->phnum;
for(; ph < eph; ph++) {
va = (uchar*)(ph->va & 0xFFFFFF);
for(; ph < eph; ph++){
va = (uchar*)ph->va;
readseg(va, ph->filesz, ph->offset);
if(ph->memsz > ph->filesz)
stosb(va + ph->filesz, 0, ph->memsz - ph->filesz);
@ -42,7 +42,7 @@ bootmain(void)
// Call the entry point from the ELF header.
// Does not return!
entry = (void(*)(void))(elf->entry & 0xFFFFFF);
entry = (void(*)(void))(elf->entry);
entry();
}

View file

@ -9,80 +9,69 @@
# Because this code sets DS to zero, it must sit
# at an address in the low 2^16 bytes.
#
# Bootothers (in main.c) sends the STARTUPs, one at a time.
# It puts this code (start) at 0x7000.
# It puts the correct %esp in start-4,
# and the place to jump to in start-8.
# Bootothers (in main.c) sends the STARTUPs one at a time.
# It copies this code (start) at 0x7000.
# It puts the address of a newly allocated per-core stack in start-4,
# and the address of the place to jump to (mpmain) in start-8.
#
# This code is identical to bootasm.S except:
# - it does not need to enable A20
# - it uses the address at start-4 for the %esp
# - it jumps to the address at start-8 instead of calling bootmain
#define SEG_KCODE 1 // kernel code
#define SEG_KDATA 2 // kernel data+stack
#define SEG_KCODE 1
#define SEG_KDATA 2
#define CR0_PE 1 // protected mode enable bit
#define CR0_PE 1
.code16 # Assemble for 16-bit mode
.code16
.globl start
start:
cli # Disable interrupts
cli
# Set up the important data segment registers (DS, ES, SS).
xorw %ax,%ax # Segment number zero
movw %ax,%ds # -> Data Segment
movw %ax,%es # -> Extra Segment
movw %ax,%ss # -> Stack Segment
xorw %ax,%ax
movw %ax,%ds
movw %ax,%es
movw %ax,%ss
//PAGEBREAK!
# Switch from real to protected mode, using a bootstrap GDT
# and segment translation that makes virtual addresses
# identical to physical addresses, so that the
# effective memory map does not change during the switch.
lgdt gdtdesc
movl %cr0, %eax
orl $CR0_PE, %eax
movl %eax, %cr0
# This ljmp is how you load the CS (Code Segment) register.
# SEG_ASM produces segment descriptors with the 32-bit mode
# flag set (the D flag), so addresses and word operands will
# default to 32 bits after this jump.
//PAGEBREAK!
ljmp $(SEG_KCODE<<3), $start32
.code32 # Assemble for 32-bit mode
.code32
start32:
# Set up the protected-mode data segment registers
movw $(SEG_KDATA<<3), %ax # Our data segment selector
movw %ax, %ds # -> DS: Data Segment
movw %ax, %es # -> ES: Extra Segment
movw %ax, %ss # -> SS: Stack Segment
movw $0, %ax # Zero segments not ready for use
movw %ax, %fs # -> FS
movw %ax, %gs # -> GS
movw $(SEG_KDATA<<3), %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
movw $0, %ax
movw %ax, %fs
movw %ax, %gs
# Set up the stack pointer and call into C.
# switch to the stack allocated by bootothers()
movl start-4, %esp
# call mpmain()
call *(start-8)
# If the call returns (it shouldn't), trigger a Bochs
# breakpoint if running under Bochs, then loop.
movw $0x8a00, %ax # 0x8a00 -> port 0x8a00
movw $0x8a00, %ax
movw %ax, %dx
outw %ax, %dx
movw $0x8ae0, %ax # 0x8ae0 -> port 0x8a00
movw $0x8ae0, %ax
outw %ax, %dx
spin:
jmp spin
# Bootstrap GDT
.p2align 2 # force 4 byte alignment
.p2align 2
gdt:
SEG_NULLASM # null seg
SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg
SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg
SEG_NULLASM
SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff)
SEG_ASM(STA_W, 0x0, 0xffffffff)
gdtdesc:
.word (gdtdesc - gdt - 1) # sizeof(gdt) - 1
.long gdt # address gdt
.word (gdtdesc - gdt - 1)
.long gdt

View file

@ -18,28 +18,29 @@ static void consputc(int);
static int panicked = 0;
static struct {
struct spinlock lock;
int locking;
struct spinlock lock;
int locking;
} cons;
static void
printint(int xx, int base, int sgn)
printint(int xx, int base, int sign)
{
static char digits[] = "0123456789abcdef";
char buf[16];
int i = 0, neg = 0;
int i;
uint x;
if(sgn && xx < 0){
neg = 1;
if(sign && (sign = xx < 0))
x = -xx;
} else
else
x = xx;
i = 0;
do{
buf[i++] = digits[x % base];
}while((x /= base) != 0);
if(neg)
if(sign)
buf[i++] = '-';
while(--i >= 0)
@ -136,8 +137,7 @@ cgaputc(int c)
if(c == '\n')
pos += 80 - pos%80;
else if(c == BACKSPACE){
if(pos > 0)
crt[--pos] = ' ' | 0x0700;
if(pos > 0) --pos;
} else
crt[pos++] = (c&0xff) | 0x0700; // black on white
@ -163,16 +163,13 @@ consputc(int c)
;
}
if (c == BACKSPACE) {
uartputc('\b');
uartputc(' ');
uartputc('\b');
if(c == BACKSPACE){
uartputc('\b'); uartputc(' '); uartputc('\b');
} else
uartputc(c);
cgaputc(c);
}
//PAGEBREAK: 50
#define INPUT_BUF 128
struct {
struct spinlock lock;
@ -202,8 +199,7 @@ consoleintr(int (*getc)(void))
consputc(BACKSPACE);
}
break;
case C('H'): // Backspace
case '\x7f':
case C('H'): case '\x7f': // Backspace
if(input.e != input.w){
input.e--;
consputc(BACKSPACE);
@ -211,9 +207,7 @@ consoleintr(int (*getc)(void))
break;
default:
if(c != 0 && input.e-input.r < INPUT_BUF){
// The serial port produces 0x13, not 0x10
if(c == '\r')
c = '\n';
c = (c == '\r') ? '\n' : c;
input.buf[input.e++ % INPUT_BUF] = c;
consputc(c);
if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){

26
data.S Normal file
View file

@ -0,0 +1,26 @@
// The kernel layout is:
//
// text
// rodata
// data
// bss
//
// Conventionally, Unix linkers provide pseudo-symbols
// etext, edata, and end, at the end of the text, data, and bss.
// For the kernel mapping, we need the address at the beginning
// of the data section, but that's not one of the conventional
// symbols, because the convention started before there was a
// read-only rodata section between text and data.
//
// To get the address of the data section, we define a symbol
// named data and make sure this is the first object passed to
// the linker, so that it will be the first symbol in the data section.
//
// Alternative approaches would be to parse our own ELF header
// or to write a linker script, but this is simplest.
.data
.align 4096
.globl data
data:
.word 1

31
defs.h
View file

@ -6,6 +6,7 @@ struct pipe;
struct proc;
struct spinlock;
struct stat;
struct superblock;
// bio.c
void binit(void);
@ -32,6 +33,7 @@ int filestat(struct file*, struct stat*);
int filewrite(struct file*, char*, int n);
// fs.c
void readsb(int dev, struct superblock *sb);
int dirlink(struct inode*, char*, uint);
struct inode* dirlookup(struct inode*, char*, uint*);
struct inode* ialloc(uint, short);
@ -62,7 +64,7 @@ void ioapicinit(void);
// kalloc.c
char* kalloc(void);
void kfree(char*);
void kinit(char*,uint);
void kinit(void);
// kbd.c
void kbdintr(void);
@ -75,6 +77,12 @@ void lapicinit(int);
void lapicstartap(uchar, uint);
void microdelay(int);
// log.c
void initlog(void);
void log_write(struct buf*);
void begin_trans();
void commit_trans();
// mp.c
extern int ismp;
int mpbcpu(void);
@ -101,6 +109,7 @@ int kill(int);
void pinit(void);
void procdump(void);
void scheduler(void) __attribute__((noreturn));
void sched(void);
void sleep(void*, struct spinlock*);
void userinit(void);
int wait(void);
@ -116,8 +125,8 @@ void getcallerpcs(void*, uint*);
int holding(struct spinlock*);
void initlock(struct spinlock*, char*);
void release(struct spinlock*);
void pushcli();
void popcli();
void pushcli(void);
void popcli(void);
// string.c
int memcmp(const void*, const void*, uint);
@ -151,20 +160,20 @@ void uartintr(void);
void uartputc(int);
// vm.c
void pminit(void);
void ksegment(void);
void seginit(void);
void kvmalloc(void);
void vmenable(void);
pde_t* setupkvm(void);
char* uva2ka(pde_t*, char*);
int allocuvm(pde_t*, char*, uint);
int deallocuvm(pde_t *pgdir, char *addr, uint sz);
int allocuvm(pde_t*, uint, uint);
int deallocuvm(pde_t*, uint, uint);
void freevm(pde_t*);
void inituvm(pde_t*, char*, char*, uint);
int loaduvm(pde_t*, char*, struct inode *ip, uint, uint);
pde_t* copyuvm(pde_t*,uint);
void inituvm(pde_t*, char*, uint);
int loaduvm(pde_t*, char*, struct inode*, uint, uint);
pde_t* copyuvm(pde_t*, uint);
void switchuvm(struct proc*);
void switchkvm();
void switchkvm(void);
int copyout(pde_t*, uint, void*, uint);
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))

77
exec.c
View file

@ -9,20 +9,18 @@
int
exec(char *path, char **argv)
{
char *mem, *s, *last;
int i, argc, arglen, len, off;
uint sz, sp, spoffset, argp;
char *s, *last;
int i, off;
uint argc, sz, sp, ustack[3+MAXARG+1];
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
pde_t *pgdir, *oldpgdir;
pgdir = 0;
sz = 0;
if((ip = namei(path)) == 0)
return -1;
ilock(ip);
pgdir = 0;
// Check ELF header
if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf))
@ -30,10 +28,11 @@ exec(char *path, char **argv)
if(elf.magic != ELF_MAGIC)
goto bad;
if (!(pgdir = setupkvm()))
if((pgdir = setupkvm()) == 0)
goto bad;
// Load program into memory.
sz = 0;
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
@ -41,49 +40,39 @@ exec(char *path, char **argv)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if (!allocuvm(pgdir, (char *)ph.va, ph.memsz))
if((sz = allocuvm(pgdir, sz, ph.va + ph.memsz)) == 0)
goto bad;
if(ph.va + ph.memsz > sz)
sz = ph.va + ph.memsz;
if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz))
if(loaduvm(pgdir, (char*)ph.va, ip, ph.offset, ph.filesz) < 0)
goto bad;
}
iunlockput(ip);
ip = 0;
// Allocate and initialize stack at sz
// Allocate a one-page stack at the next page boundary
sz = PGROUNDUP(sz);
sz += PGSIZE; // leave an invalid page
if (!allocuvm(pgdir, (char *)sz, PGSIZE))
if((sz = allocuvm(pgdir, sz, sz + PGSIZE)) == 0)
goto bad;
mem = uva2ka(pgdir, (char *)sz);
spoffset = sz;
sz += PGSIZE;
arglen = 0;
for(argc=0; argv[argc]; argc++)
arglen += strlen(argv[argc]) + 1;
arglen = (arglen+3) & ~3;
// Push argument strings, prepare rest of stack in ustack.
sp = sz;
argp = sz - arglen - 4*(argc+1);
// Copy argv strings and pointers to stack.
*(uint*)(mem+argp-spoffset + 4*argc) = 0; // argv[argc]
for(i=argc-1; i>=0; i--){
len = strlen(argv[i]) + 1;
sp -= len;
memmove(mem+sp-spoffset, argv[i], len);
*(uint*)(mem+argp-spoffset + 4*i) = sp; // argv[i]
for(argc = 0; argv[argc]; argc++) {
if(argc >= MAXARG)
goto bad;
sp -= strlen(argv[argc]) + 1;
sp &= ~3;
if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
goto bad;
ustack[3+argc] = sp;
}
ustack[3+argc] = 0;
// Stack frame for main(argc, argv), below arguments.
sp = argp;
sp -= 4;
*(uint*)(mem+sp-spoffset) = argp;
sp -= 4;
*(uint*)(mem+sp-spoffset) = argc;
sp -= 4;
*(uint*)(mem+sp-spoffset) = 0xffffffff; // fake return pc
ustack[0] = 0xffffffff; // fake return PC
ustack[1] = argc;
ustack[2] = sp - (argc+1)*4; // argv pointer
sp -= (3+argc+1) * 4;
if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
goto bad;
// Save program name for debugging.
for(last=s=path; *s; s++)
@ -97,15 +86,15 @@ exec(char *path, char **argv)
proc->sz = sz;
proc->tf->eip = elf.entry; // main
proc->tf->esp = sp;
switchuvm(proc);
switchuvm(proc);
freevm(oldpgdir);
return 0;
bad:
if (pgdir) freevm(pgdir);
iunlockput(ip);
if(pgdir)
freevm(pgdir);
if(ip)
iunlockput(ip);
return -1;
}

14
fs.c
View file

@ -25,7 +25,7 @@
static void itrunc(struct inode*);
// Read the super block.
static void
void
readsb(int dev, struct superblock *sb)
{
struct buf *bp;
@ -61,11 +61,11 @@ balloc(uint dev)
readsb(dev, &sb);
for(b = 0; b < sb.size; b += BPB){
bp = bread(dev, BBLOCK(b, sb.ninodes));
for(bi = 0; bi < BPB; bi++){
for(bi = 0; bi < BPB && bi < (sb.size - b); bi++){
m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0){ // Is block free?
bp->data[bi/8] |= m; // Mark block in use on disk.
bwrite(bp);
log_write(bp);
brelse(bp);
return b + bi;
}
@ -92,7 +92,7 @@ bfree(int dev, uint b)
if((bp->data[bi/8] & m) == 0)
panic("freeing free block");
bp->data[bi/8] &= ~m; // Mark block free on disk.
bwrite(bp);
log_write(bp);
brelse(bp);
}
@ -159,7 +159,7 @@ ialloc(uint dev, short type)
if(dip->type == 0){ // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
bwrite(bp); // mark it allocated on the disk
log_write(bp); // mark it allocated on the disk
brelse(bp);
return iget(dev, inum);
}
@ -183,7 +183,7 @@ iupdate(struct inode *ip)
dip->nlink = ip->nlink;
dip->size = ip->size;
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
bwrite(bp);
log_write(bp);
brelse(bp);
}
@ -339,7 +339,7 @@ bmap(struct inode *ip, uint bn)
a = (uint*)bp->data;
if((addr = a[bn]) == 0){
a[bn] = addr = balloc(ip->dev);
bwrite(bp);
log_write(bp);
}
brelse(bp);
return addr;

2
fs.h
View file

@ -13,6 +13,7 @@ struct superblock {
uint size; // Size of file system image (blocks)
uint nblocks; // Number of data blocks
uint ninodes; // Number of inodes.
uint nlog; // Number of log blocks
};
#define NDIRECT 12
@ -41,7 +42,6 @@ struct dinode {
// Block containing bit for block b
#define BBLOCK(b, ninodes) (b/BPB + (ninodes)/IPB + 3)
// PAGEBREAK: 10
// Directory is a file containing a sequence of dirent structures.
#define DIRSIZ 14

6
ide.c
View file

@ -96,7 +96,7 @@ ideintr(void)
acquire(&idelock);
if((b = idequeue) == 0){
release(&idelock);
cprintf("Spurious IDE interrupt.\n");
// cprintf("spurious IDE interrupt\n");
return;
}
idequeue = b->qnext;
@ -131,7 +131,7 @@ iderw(struct buf *b)
if((b->flags & (B_VALID|B_DIRTY)) == B_VALID)
panic("iderw: nothing to do");
if(b->dev != 0 && !havedisk1)
panic("idrw: ide disk 1 not present");
panic("iderw: ide disk 1 not present");
acquire(&idelock);
@ -147,7 +147,7 @@ iderw(struct buf *b)
// Wait for request to finish.
// Assuming will not sleep too long: ignore proc->killed.
while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) {
while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){
sleep(b, &idelock);
}

View file

@ -3,9 +3,12 @@
#include "syscall.h"
#include "traps.h"
# exec(init, argv)
.globl start
start:
movl $SYS_init, %eax
int $T_SYSCALL
pushl $argv
pushl $init
pushl $0 // where caller pc would be

View file

@ -17,17 +17,21 @@ struct {
struct run *freelist;
} kmem;
extern char end[]; // first address after kernel loaded from ELF file
// Initialize free list of physical pages.
void
kinit(char *p, uint len)
kinit(void)
{
char *p;
initlock(&kmem.lock, "kmem");
char *p1 = (char*)PGROUNDUP((uint)p);
char *p2 = PGROUNDDOWN(p + len);
for( ; p1 < p2; p1 += 4096)
kfree(p1);
p = (char*)PGROUNDUP((uint)end);
for(; p + PGSIZE <= (char*)PHYSTOP; p += PGSIZE)
kfree(p);
}
//PAGEBREAK: 21
// Free the page of physical memory pointed at by v,
// which normally should have been returned by a
// call to kalloc(). (The exception is when
@ -37,14 +41,14 @@ kfree(char *v)
{
struct run *r;
if(((uint) v) % PGSIZE || (uint)v < 1024*1024 || (uint)v >= PHYSTOP)
if((uint)v % PGSIZE || v < end || (uint)v >= PHYSTOP)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(v, 1, PGSIZE);
acquire(&kmem.lock);
r = (struct run *) v;
r = (struct run*)v;
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
@ -54,7 +58,7 @@ kfree(char *v)
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
char*
kalloc()
kalloc(void)
{
struct run *r;
@ -63,6 +67,6 @@ kalloc()
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
return (char*) r;
return (char*)r;
}

164
log.c Normal file
View file

@ -0,0 +1,164 @@
#include "types.h"
#include "defs.h"
#include "param.h"
#include "mmu.h"
#include "proc.h"
#include "x86.h"
#include "spinlock.h"
#include "fs.h"
#include "buf.h"
// Dirt simple "logging" supporting only one transaction. All file system calls
// that potentially write a block should be wrapped in begin_trans and commit_trans,
// so that there is never more than one transaction. This serializes all file system
// operations that potentially write, but simplifies recovery (only the last
// one transaction to recover) and concurrency (don't have to worry about reading a modified
// block from a transaction that hasn't committed yet).
// The header of the log. If head == 0, there are no log entries. All entries till head
// are committed. sector[] records the home sector for each block in the log
// (i.e., physical logging).
struct logheader {
int head;
int sector[LOGSIZE];
};
struct {
struct spinlock lock;
int start;
int size;
int intrans;
int dev;
struct logheader lh;
} log;
static void recover_from_log(void);
void
initlog(void)
{
if (sizeof(struct logheader) >= BSIZE)
panic("initlog: too big logheader");
struct superblock sb;
initlock(&log.lock, "log");
readsb(ROOTDEV, &sb);
log.start = sb.size - sb.nlog;
log.size = sb.nlog;
log.dev = ROOTDEV;
recover_from_log();
}
// Copy committed blocks from log to their home location
static void
install_trans(void)
{
int tail;
if (log.lh.head > 0)
cprintf("install_trans %d\n", log.lh.head);
for (tail = 0; tail < log.lh.head; tail++) {
cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]);
struct buf *lbuf = bread(log.dev, log.start+tail+1); // read i'th block from log
struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst block
memmove(dbuf->data, lbuf->data, BSIZE);
bwrite(dbuf);
brelse(lbuf);
brelse(dbuf);
}
}
// Read the log header from disk into the in-memory log header
static void
read_head(void)
{
struct buf *buf = bread(log.dev, log.start);
struct logheader *lh = (struct logheader *) (buf->data);
int i;
log.lh.head = lh->head;
for (i = 0; i < log.lh.head; i++) {
log.lh.sector[i] = lh->sector[i];
}
brelse(buf);
if (log.lh.head > 0)
cprintf("read_head: %d\n", log.lh.head);
}
// Write the in-memory log header to disk, committing log entries till head
static void
write_head(void)
{
if (log.lh.head > 0)
cprintf("write_head: %d\n", log.lh.head);
struct buf *buf = bread(log.dev, log.start);
struct logheader *hb = (struct logheader *) (buf->data);
int i;
hb->head = log.lh.head;
for (i = 0; i < log.lh.head; i++) {
hb->sector[i] = log.lh.sector[i];
}
bwrite(buf);
brelse(buf);
}
static void
recover_from_log(void)
{
read_head();
install_trans(); // Install all transactions till head
log.lh.head = 0;
write_head(); // Reclaim log
}
void
begin_trans(void)
{
acquire(&log.lock);
while (log.intrans) {
sleep(&log, &log.lock);
}
log.intrans = 1;
release(&log.lock);
}
void
commit_trans(void)
{
write_head(); // This causes all blocks till log.head to be commited
install_trans(); // Install all the transactions till head
log.lh.head = 0;
write_head(); // Reclaim log
acquire(&log.lock);
log.intrans = 0;
wakeup(&log);
release(&log.lock);
}
// Write buffer into the log at log.head and record the block number log.lh.entry, but
// don't write the log header (which would commit the write).
void
log_write(struct buf *b)
{
int i;
if (log.lh.head >= LOGSIZE)
panic("too big a transaction");
if (!log.intrans)
panic("write outside of trans");
cprintf("log_write: %d %d\n", b->sector, log.lh.head);
for (i = 0; i < log.lh.head; i++) {
if (log.lh.sector[i] == b->sector) // log absorbtion?
break;
}
log.lh.sector[i] = b->sector;
struct buf *lbuf = bread(b->dev, log.start+i+1);
memmove(lbuf->data, b->data, BSIZE);
bwrite(lbuf);
brelse(lbuf);
if (i == log.lh.head)
log.lh.head++;
}

59
main.c
View file

@ -7,40 +7,45 @@
static void bootothers(void);
static void mpmain(void);
void jkstack(void) __attribute__((noreturn));
void jmpkstack(void) __attribute__((noreturn));
void mainc(void);
// Bootstrap processor starts running C code here.
// Allocate a real stack and switch to it, first
// doing some setup required for memory allocator to work.
int
main(void)
{
mpinit(); // collect info about this machine
lapicinit(mpbcpu());
ksegment(); // set up segments
picinit(); // interrupt controller
ioapicinit(); // another interrupt controller
consoleinit(); // I/O devices & their interrupts
uartinit(); // serial port
pminit(); // discover how much memory there is
jkstack(); // call mainc() on a properly-allocated stack
seginit(); // set up segments
kinit(); // initialize memory allocator
jmpkstack(); // call mainc() on a properly-allocated stack
}
void
jkstack(void)
jmpkstack(void)
{
char *kstack = kalloc();
if (!kstack)
panic("jkstack\n");
char *top = kstack + PGSIZE;
asm volatile("movl %0,%%esp" : : "r" (top));
asm volatile("call mainc");
panic("jkstack");
char *kstack, *top;
kstack = kalloc();
if(kstack == 0)
panic("jmpkstack kalloc");
top = kstack + PGSIZE;
asm volatile("movl %0,%%esp; call mainc" : : "r" (top));
panic("jmpkstack");
}
// Set up hardware and software.
// Runs only on the boostrap processor.
void
mainc(void)
{
cprintf("\ncpu%d: starting xv6\n\n", cpu->id);
picinit(); // interrupt controller
ioapicinit(); // another interrupt controller
consoleinit(); // I/O devices & their interrupts
uartinit(); // serial port
kvmalloc(); // initialize the kernel page table
pinit(); // process table
tvinit(); // trap vectors
@ -63,17 +68,18 @@ mainc(void)
static void
mpmain(void)
{
if(cpunum() != mpbcpu()) {
ksegment();
if(cpunum() != mpbcpu()){
seginit();
lapicinit(cpunum());
}
vmenable(); // turn on paging
cprintf("cpu%d: starting\n", cpu->id);
idtinit(); // load idt register
xchg(&cpu->booted, 1);
xchg(&cpu->booted, 1); // tell bootothers() we're up
scheduler(); // start running processes
}
// Start the non-boot processors.
static void
bootothers(void)
{
@ -82,19 +88,23 @@ bootothers(void)
struct cpu *c;
char *stack;
// Write bootstrap code to unused memory at 0x7000. The linker has
// placed the start of bootother.S there.
code = (uchar *) 0x7000;
// Write bootstrap code to unused memory at 0x7000.
// The linker has placed the image of bootother.S in
// _binary_bootother_start.
code = (uchar*)0x7000;
memmove(code, _binary_bootother_start, (uint)_binary_bootother_size);
for(c = cpus; c < cpus+ncpu; c++){
if(c == cpus+cpunum()) // We've started already.
continue;
// Fill in %esp, %eip and start code on cpu.
// Tell bootother.S what stack to use and the address of mpmain;
// it expects to find these two addresses stored just before
// its first instruction.
stack = kalloc();
*(void**)(code-4) = stack + KSTACKSIZE;
*(void**)(code-8) = mpmain;
lapicstartap(c->id, (uint)code);
// Wait for cpu to finish mpmain()
@ -103,3 +113,6 @@ bootothers(void)
}
}
//PAGEBREAK!
// Blank page.

58
memide.c Normal file
View file

@ -0,0 +1,58 @@
// Fake IDE disk; stores blocks in memory.
// Useful for running kernel without scratch disk.
#include "types.h"
#include "defs.h"
#include "param.h"
#include "mmu.h"
#include "proc.h"
#include "x86.h"
#include "traps.h"
#include "spinlock.h"
#include "buf.h"
extern uchar _binary_fs_img_start[], _binary_fs_img_size[];
static int disksize;
static uchar *memdisk;
void
ideinit(void)
{
memdisk = _binary_fs_img_start;
disksize = (uint)_binary_fs_img_size/512;
}
// Interrupt handler.
void
ideintr(void)
{
// no-op
}
// Sync buf with disk.
// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID.
// Else if B_VALID is not set, read buf from disk, set B_VALID.
void
iderw(struct buf *b)
{
uchar *p;
if(!(b->flags & B_BUSY))
panic("iderw: buf not busy");
if((b->flags & (B_VALID|B_DIRTY)) == B_VALID)
panic("iderw: nothing to do");
if(b->dev != 1)
panic("iderw: request not for disk 1");
if(b->sector >= disksize)
panic("iderw: sector out of range");
p = memdisk + b->sector*512;
if(b->flags & B_DIRTY){
b->flags &= ~B_DIRTY;
memmove(p, b->data, 512);
} else
memmove(b->data, p, 512);
b->flags |= B_VALID;
}

47
mkfs.c
View file

@ -4,11 +4,15 @@
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#define stat xv6_stat // avoid clash with host struct stat
#include "types.h"
#include "fs.h"
#include "stat.h"
#include "param.h"
int nblocks = 995;
int nblocks = 985;
int nlog = LOGSIZE;
int ninodes = 200;
int size = 1024;
@ -33,7 +37,7 @@ ushort
xshort(ushort x)
{
ushort y;
uchar *a = (uchar*) &y;
uchar *a = (uchar*)&y;
a[0] = x;
a[1] = x >> 8;
return y;
@ -43,7 +47,7 @@ uint
xint(uint x)
{
uint y;
uchar *a = (uchar*) &y;
uchar *a = (uchar*)&y;
a[0] = x;
a[1] = x >> 8;
a[2] = x >> 16;
@ -77,20 +81,23 @@ main(int argc, char *argv[])
sb.size = xint(size);
sb.nblocks = xint(nblocks); // so whole disk is size sectors
sb.ninodes = xint(ninodes);
sb.nlog = xint(nlog);
bitblocks = size/(512*8) + 1;
usedblocks = ninodes / IPB + 3 + bitblocks;
freeblock = usedblocks;
printf("used %d (bit %d ninode %lu) free %u total %d\n", usedblocks,
bitblocks, ninodes/IPB + 1, freeblock, nblocks+usedblocks);
printf("used %d (bit %d ninode %zu) free %u log %u total %d\n", usedblocks,
bitblocks, ninodes/IPB + 1, freeblock, nlog, nblocks+usedblocks+nlog);
assert(nblocks + usedblocks == size);
assert(nblocks + usedblocks + nlog == size);
for(i = 0; i < nblocks + usedblocks; i++)
for(i = 0; i < nblocks + usedblocks + nlog; i++)
wsect(i, zeroes);
wsect(1, &sb);
memset(buf, 0, sizeof(buf));
memmove(buf, &sb, sizeof(sb));
wsect(1, buf);
rootino = ialloc(T_DIR);
assert(rootino == ROOTINO);
@ -173,7 +180,7 @@ winode(uint inum, struct dinode *ip)
bn = i2b(inum);
rsect(bn, buf);
dip = ((struct dinode*) buf) + (inum % IPB);
dip = ((struct dinode*)buf) + (inum % IPB);
*dip = *ip;
wsect(bn, buf);
}
@ -187,7 +194,7 @@ rinode(uint inum, struct dinode *ip)
bn = i2b(inum);
rsect(bn, buf);
dip = ((struct dinode*) buf) + (inum % IPB);
dip = ((struct dinode*)buf) + (inum % IPB);
*ip = *dip;
}
@ -225,12 +232,12 @@ balloc(int used)
int i;
printf("balloc: first %d blocks have been allocated\n", used);
assert(used < 512);
assert(used < 512*8);
bzero(buf, 512);
for(i = 0; i < used; i++) {
for(i = 0; i < used; i++){
buf[i/8] = buf[i/8] | (0x1 << (i%8));
}
printf("balloc: write bitmap block at sector %lu\n", ninodes/IPB + 3);
printf("balloc: write bitmap block at sector %zu\n", ninodes/IPB + 3);
wsect(ninodes / IPB + 3, buf);
}
@ -239,7 +246,7 @@ balloc(int used)
void
iappend(uint inum, void *xp, int n)
{
char *p = (char*) xp;
char *p = (char*)xp;
uint fbn, off, n1;
struct dinode din;
char buf[512];
@ -252,24 +259,24 @@ iappend(uint inum, void *xp, int n)
while(n > 0){
fbn = off / 512;
assert(fbn < MAXFILE);
if(fbn < NDIRECT) {
if(xint(din.addrs[fbn]) == 0) {
if(fbn < NDIRECT){
if(xint(din.addrs[fbn]) == 0){
din.addrs[fbn] = xint(freeblock++);
usedblocks++;
}
x = xint(din.addrs[fbn]);
} else {
if(xint(din.addrs[NDIRECT]) == 0) {
if(xint(din.addrs[NDIRECT]) == 0){
// printf("allocate indirect block\n");
din.addrs[NDIRECT] = xint(freeblock++);
usedblocks++;
}
// printf("read indirect block\n");
rsect(xint(din.addrs[NDIRECT]), (char*) indirect);
if(indirect[fbn - NDIRECT] == 0) {
rsect(xint(din.addrs[NDIRECT]), (char*)indirect);
if(indirect[fbn - NDIRECT] == 0){
indirect[fbn - NDIRECT] = xint(freeblock++);
usedblocks++;
wsect(xint(din.addrs[NDIRECT]), (char*) indirect);
wsect(xint(din.addrs[NDIRECT]), (char*)indirect);
}
x = xint(indirect[fbn-NDIRECT]);
}

47
mmu.h
View file

@ -24,6 +24,20 @@
#define FL_VIP 0x00100000 // Virtual Interrupt Pending
#define FL_ID 0x00200000 // ID flag
// Control Register flags
#define CR0_PE 0x00000001 // Protection Enable
#define CR0_MP 0x00000002 // Monitor coProcessor
#define CR0_EM 0x00000004 // Emulation
#define CR0_TS 0x00000008 // Task Switched
#define CR0_ET 0x00000010 // Extension Type
#define CR0_NE 0x00000020 // Numeric Errror
#define CR0_WP 0x00010000 // Write Protect
#define CR0_AM 0x00040000 // Alignment Mask
#define CR0_NW 0x20000000 // Not Writethrough
#define CR0_CD 0x40000000 // Cache Disable
#define CR0_PG 0x80000000 // Paging
//PAGEBREAK!
// Segment Descriptor
struct segdesc {
uint lim_15_0 : 16; // Low bits of segment limit
@ -46,7 +60,6 @@ struct segdesc {
{ ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff, \
((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \
(uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 }
#define SEG16(type, base, lim, dpl) (struct segdesc) \
{ (lim) & 0xffff, (uint)(base) & 0xffff, \
((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \
@ -62,8 +75,6 @@ struct segdesc {
#define STA_R 0x2 // Readable (executable segments)
#define STA_A 0x1 // Accessed
//
// System segment type bits
#define STS_T16A 0x1 // Available 16-bit TSS
#define STS_LDT 0x2 // Local Descriptor Table
@ -78,7 +89,6 @@ struct segdesc {
#define STS_IG32 0xE // 32-bit Interrupt Gate
#define STS_TG32 0xF // 32-bit Trap Gate
// A linear address 'la' has a three-part structure as follows:
//
// +--------10------+-------10-------+---------12----------+
@ -88,18 +98,18 @@ struct segdesc {
// \--- PDX(la) --/ \--- PTX(la) --/
// page directory index
#define PDX(la) ((((uint) (la)) >> PDXSHIFT) & 0x3FF)
#define PDX(la) (((uint)(la) >> PDXSHIFT) & 0x3FF)
// page table index
#define PTX(la) ((((uint) (la)) >> PTXSHIFT) & 0x3FF)
#define PTX(la) (((uint)(la) >> PTXSHIFT) & 0x3FF)
// construct linear address from indexes and offset
#define PGADDR(d, t, o) ((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
// turn a kernel linear address into a physical address.
// all of the kernel data structures have linear and
// physical addresses that are equal.
#define PADDR(a) ((uint) a)
#define PADDR(a) ((uint)(a))
// Page directory and page table constants.
#define NPDENTRIES 1024 // page directory entries per page directory
@ -126,25 +136,10 @@ struct segdesc {
#define PTE_MBZ 0x180 // Bits must be zero
// Address in page table or page directory entry
#define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF)
#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF)
typedef uint pte_t;
// Control Register flags
#define CR0_PE 0x00000001 // Protection Enable
#define CR0_MP 0x00000002 // Monitor coProcessor
#define CR0_EM 0x00000004 // Emulation
#define CR0_TS 0x00000008 // Task Switched
#define CR0_ET 0x00000010 // Extension Type
#define CR0_NE 0x00000020 // Numeric Errror
#define CR0_WP 0x00010000 // Write Protect
#define CR0_AM 0x00040000 // Alignment Mask
#define CR0_NW 0x20000000 // Not Writethrough
#define CR0_CD 0x40000000 // Cache Disable
#define CR0_PG 0x80000000 // Paging
// PAGEBREAK: 40
// Task state segment format
struct taskstate {
uint link; // Old ts selector
@ -210,7 +205,7 @@ struct gatedesc {
// this interrupt/trap gate explicitly using an int instruction.
#define SETGATE(gate, istrap, sel, off, d) \
{ \
(gate).off_15_0 = (uint) (off) & 0xffff; \
(gate).off_15_0 = (uint)(off) & 0xffff; \
(gate).cs = (sel); \
(gate).args = 0; \
(gate).rsv1 = 0; \
@ -218,6 +213,6 @@ struct gatedesc {
(gate).s = 0; \
(gate).dpl = (d); \
(gate).p = 1; \
(gate).off_31_16 = (uint) (off) >> 16; \
(gate).off_31_16 = (uint)(off) >> 16; \
}

17
mp.c
View file

@ -39,7 +39,6 @@ mpsearch1(uchar *addr, int len)
{
uchar *e, *p;
cprintf("mpsearch1 0x%x %d\n", addr, len);
e = addr+len;
for(p = addr; p < e; p += sizeof(struct mp))
if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0)
@ -113,9 +112,9 @@ mpinit(void)
switch(*p){
case MPPROC:
proc = (struct mpproc*)p;
if(ncpu != proc->apicid) {
cprintf("mpinit: ncpu=%d apicpid=%d", ncpu, proc->apicid);
panic("mpinit");
if(ncpu != proc->apicid){
cprintf("mpinit: ncpu=%d apicid=%d\n", ncpu, proc->apicid);
ismp = 0;
}
if(proc->flags & MPBOOT)
bcpu = &cpus[ncpu];
@ -135,9 +134,17 @@ mpinit(void)
continue;
default:
cprintf("mpinit: unknown config type %x\n", *p);
panic("mpinit");
ismp = 0;
}
}
if(!ismp){
// Didn't like what we found; fall back to no MP.
ncpu = 1;
lapic = 0;
ioapicid = 0;
return;
}
if(mp->imcrp){
// Bochs doesn't support IMCR, so this doesn't run on Bochs.
// But it would on real hardware.

75
multiboot.S Normal file
View file

@ -0,0 +1,75 @@
# Multiboot header, for multiboot boot loaders like GNU Grub.
# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html
#
# Using GRUB 2, you can boot xv6 from a file stored in a
# Linux file system by copying kernel or kernelmemfs to /boot
# and then adding this menu entry:
#
# menuentry "xv6" {
# insmod ext2
# set root='(hd0,msdos1)'
# set kernel='/boot/kernel'
# echo "Loading ${kernel}..."
# multiboot ${kernel} ${kernel}
# boot
# }
#include "asm.h"
#define STACK 4096
#define SEG_KCODE 1 // kernel code
#define SEG_KDATA 2 // kernel data+stack
# Multiboot header. Data to direct multiboot loader.
.p2align 2
.text
.globl multiboot_header
multiboot_header:
#define magic 0x1badb002
#define flags (1<<16 | 1<<0)
.long magic
.long flags
.long (-magic-flags)
.long multiboot_header # beginning of image
.long multiboot_header
.long edata
.long end
.long multiboot_entry
# Multiboot entry point. Machine is mostly set up.
# Configure the GDT to match the environment that our usual
# boot loader - bootasm.S - sets up.
.globl multiboot_entry
multiboot_entry:
lgdt gdtdesc
ljmp $(SEG_KCODE<<3), $mbstart32
mbstart32:
# Set up the protected-mode data segment registers
movw $(SEG_KDATA<<3), %ax # Our data segment selector
movw %ax, %ds # -> DS: Data Segment
movw %ax, %es # -> ES: Extra Segment
movw %ax, %ss # -> SS: Stack Segment
movw $0, %ax # Zero segments not ready for use
movw %ax, %fs # -> FS
movw %ax, %gs # -> GS
# Set up the stack pointer and call into C.
movl $(stack + STACK), %esp
call main
spin:
jmp spin
# Bootstrap GDT
.p2align 2 # force 4 byte alignment
gdt:
SEG_NULLASM # null seg
SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg
SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg
gdtdesc:
.word (gdtdesc - gdt - 1) # sizeof(gdt) - 1
.long gdt # address gdt
.comm stack, STACK

View file

@ -7,4 +7,8 @@
#define NINODE 50 // maximum number of active i-nodes
#define NDEV 10 // maximum major device number
#define ROOTDEV 1 // device number of file system root disk
#define USERTOP 0xA0000 // end of user address space
#define PHYSTOP 0x1000000 // use phys mem up to here as free pool
#define MAXARG 32 // max exec arguments
#define LOGSIZE 10 // size of log

View file

@ -82,32 +82,3 @@ picinit(void)
if(irqmask != 0xFFFF)
picsetmask(irqmask);
}
// Blank page.

4
pipe.c
View file

@ -66,7 +66,7 @@ pipeclose(struct pipe *p, int writable)
p->readopen = 0;
wakeup(&p->nwrite);
}
if(p->readopen == 0 && p->writeopen == 0) {
if(p->readopen == 0 && p->writeopen == 0){
release(&p->lock);
kfree((char*)p);
} else
@ -81,7 +81,7 @@ pipewrite(struct pipe *p, char *addr, int n)
acquire(&p->lock);
for(i = 0; i < n; i++){
while(p->nwrite == p->nread + PIPESIZE) { //DOC: pipewrite-full
while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full
if(p->readopen == 0 || proc->killed){
release(&p->lock);
return -1;

259
proc.c
View file

@ -17,53 +17,18 @@ int nextpid = 1;
extern void forkret(void);
extern void trapret(void);
static void wakeup1(void *chan);
void
pinit(void)
{
initlock(&ptable.lock, "ptable");
}
//PAGEBREAK: 36
// Print a process listing to console. For debugging.
// Runs when user types ^P on console.
// No lock to avoid wedging a stuck machine further.
void
procdump(void)
{
static char *states[] = {
[UNUSED] "unused",
[EMBRYO] "embryo",
[SLEEPING] "sleep ",
[RUNNABLE] "runble",
[RUNNING] "run ",
[ZOMBIE] "zombie"
};
int i;
struct proc *p;
char *state;
uint pc[10];
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->state == UNUSED)
continue;
if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
state = states[p->state];
else
state = "???";
cprintf("%d %s %s", p->pid, state, p->name);
if(p->state == SLEEPING){
getcallerpcs((uint*)p->context->ebp+2, pc);
for(i=0; i<10 && pc[i] != 0; i++)
cprintf(" %p", pc[i]);
}
cprintf("\n");
}
}
//PAGEBREAK: 32
// Look in the process table for an UNUSED proc.
// If found, change state to EMBRYO and return it.
// If found, change state to EMBRYO and initialize
// state required to run in the kernel.
// Otherwise return 0.
static struct proc*
allocproc(void)
@ -95,7 +60,7 @@ found:
p->tf = (struct trapframe*)sp;
// Set up new context to start executing at forkret,
// which returns to trapret (see below).
// which returns to trapret.
sp -= 4;
*(uint*)sp = (uint)trapret;
@ -103,6 +68,7 @@ found:
p->context = (struct context*)sp;
memset(p->context, 0, sizeof *p->context);
p->context->eip = (uint)forkret;
return p;
}
@ -116,12 +82,10 @@ userinit(void)
p = allocproc();
initproc = p;
if (!(p->pgdir = setupkvm()))
if((p->pgdir = setupkvm()) == 0)
panic("userinit: out of memory?");
if (!allocuvm(p->pgdir, 0x0, (int)_binary_initcode_size))
panic("userinit: out of memory?");
inituvm(p->pgdir, 0x0, _binary_initcode_start, (int)_binary_initcode_size);
p->sz = PGROUNDUP((int)_binary_initcode_size);
inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size);
p->sz = PGSIZE;
memset(p->tf, 0, sizeof(*p->tf));
p->tf->cs = (SEG_UCODE << 3) | DPL_USER;
p->tf->ds = (SEG_UDATA << 3) | DPL_USER;
@ -142,14 +106,17 @@ userinit(void)
int
growproc(int n)
{
uint sz;
sz = proc->sz;
if(n > 0){
if (!allocuvm(proc->pgdir, (char *)proc->sz, n))
if((sz = allocuvm(proc->pgdir, sz, sz + n)) == 0)
return -1;
} else if(n < 0){
if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n))
if((sz = deallocuvm(proc->pgdir, sz, sz + n)) == 0)
return -1;
}
proc->sz += n;
proc->sz = sz;
switchuvm(proc);
return 0;
}
@ -168,7 +135,7 @@ fork(void)
return -1;
// Copy process state from p.
if (!(np->pgdir = copyuvm(proc->pgdir, proc->sz))) {
if((np->pgdir = copyuvm(proc->pgdir, proc->sz)) == 0){
kfree(np->kstack);
np->kstack = 0;
np->state = UNUSED;
@ -192,6 +159,92 @@ fork(void)
return pid;
}
// Exit the current process. Does not return.
// An exited process remains in the zombie state
// until its parent calls wait() to find out it exited.
void
exit(void)
{
struct proc *p;
int fd;
if(proc == initproc)
panic("init exiting");
// Close all open files.
for(fd = 0; fd < NOFILE; fd++){
if(proc->ofile[fd]){
fileclose(proc->ofile[fd]);
proc->ofile[fd] = 0;
}
}
iput(proc->cwd);
proc->cwd = 0;
acquire(&ptable.lock);
// Parent might be sleeping in wait().
wakeup1(proc->parent);
// Pass abandoned children to init.
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->parent == proc){
p->parent = initproc;
if(p->state == ZOMBIE)
wakeup1(initproc);
}
}
// Jump into the scheduler, never to return.
proc->state = ZOMBIE;
sched();
panic("zombie exit");
}
// Wait for a child process to exit and return its pid.
// Return -1 if this process has no children.
int
wait(void)
{
struct proc *p;
int havekids, pid;
acquire(&ptable.lock);
for(;;){
// Scan through table looking for zombie children.
havekids = 0;
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->parent != proc)
continue;
havekids = 1;
if(p->state == ZOMBIE){
// Found one.
pid = p->pid;
kfree(p->kstack);
p->kstack = 0;
freevm(p->pgdir);
p->state = UNUSED;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->killed = 0;
release(&ptable.lock);
return pid;
}
}
// No point waiting if we don't have any children.
if(!havekids || proc->killed){
release(&ptable.lock);
return -1;
}
// Wait for children to exit. (See wakeup1 call in proc_exit.)
sleep(proc, &ptable.lock); //DOC: wait-sleep
}
}
//PAGEBREAK: 42
// Per-CPU process scheduler.
// Each CPU calls scheduler() after setting itself up.
@ -356,89 +409,41 @@ kill(int pid)
return -1;
}
// Exit the current process. Does not return.
// An exited process remains in the zombie state
// until its parent calls wait() to find out it exited.
//PAGEBREAK: 36
// Print a process listing to console. For debugging.
// Runs when user types ^P on console.
// No lock to avoid wedging a stuck machine further.
void
exit(void)
procdump(void)
{
static char *states[] = {
[UNUSED] "unused",
[EMBRYO] "embryo",
[SLEEPING] "sleep ",
[RUNNABLE] "runble",
[RUNNING] "run ",
[ZOMBIE] "zombie"
};
int i;
struct proc *p;
int fd;
if(proc == initproc)
panic("init exiting");
// Close all open files.
for(fd = 0; fd < NOFILE; fd++){
if(proc->ofile[fd]){
fileclose(proc->ofile[fd]);
proc->ofile[fd] = 0;
}
}
iput(proc->cwd);
proc->cwd = 0;
acquire(&ptable.lock);
// Parent might be sleeping in wait().
wakeup1(proc->parent);
// Pass abandoned children to init.
char *state;
uint pc[10];
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->parent == proc){
p->parent = initproc;
if(p->state == ZOMBIE)
wakeup1(initproc);
if(p->state == UNUSED)
continue;
if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
state = states[p->state];
else
state = "???";
cprintf("%d %s %s", p->pid, state, p->name);
if(p->state == SLEEPING){
getcallerpcs((uint*)p->context->ebp+2, pc);
for(i=0; i<10 && pc[i] != 0; i++)
cprintf(" %p", pc[i]);
}
}
// Jump into the scheduler, never to return.
proc->state = ZOMBIE;
sched();
panic("zombie exit");
}
// Wait for a child process to exit and return its pid.
// Return -1 if this process has no children.
int
wait(void)
{
struct proc *p;
int havekids, pid;
acquire(&ptable.lock);
for(;;){
// Scan through table looking for zombie children.
havekids = 0;
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->parent != proc)
continue;
havekids = 1;
if(p->state == ZOMBIE){
// Found one.
pid = p->pid;
kfree(p->kstack);
p->kstack = 0;
freevm(p->pgdir);
p->state = UNUSED;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->killed = 0;
release(&ptable.lock);
return pid;
}
}
// No point waiting if we don't have any children.
if(!havekids || proc->killed){
release(&ptable.lock);
return -1;
}
// Wait for children to exit. (See wakeup1 call in proc_exit.)
sleep(proc, &ptable.lock); //DOC: wait-sleep
cprintf("\n");
}
}

64
proc.h
View file

@ -8,6 +8,36 @@
#define SEG_TSS 6 // this process's task state
#define NSEGS 7
// Per-CPU state
struct cpu {
uchar id; // Local APIC ID; index into cpus[] below
struct context *scheduler; // swtch() here to enter scheduler
struct taskstate ts; // Used by x86 to find stack for interrupt
struct segdesc gdt[NSEGS]; // x86 global descriptor table
volatile uint booted; // Has the CPU started?
int ncli; // Depth of pushcli nesting.
int intena; // Were interrupts enabled before pushcli?
// Cpu-local storage variables; see below
struct cpu *cpu;
struct proc *proc; // The currently-running process.
};
extern struct cpu cpus[NCPU];
extern int ncpu;
// Per-CPU variables, holding pointers to the
// current cpu and to the current process.
// The asm suffix tells gcc to use "%gs:0" to refer to cpu
// and "%gs:4" to refer to proc. seginit sets up the
// %gs segment register so that %gs refers to the memory
// holding those two variables in the local cpu's struct cpu.
// This is similar to how thread-local variables are implemented
// in thread libraries such as Linux pthreads.
extern struct cpu *cpu asm("%gs:0"); // &cpus[cpunum()]
extern struct proc *proc asm("%gs:4"); // cpus[cpunum()].proc
//PAGEBREAK: 17
// Saved registers for kernel context switches.
// Don't need to save all the segment registers (%cs, etc),
// because they are constant across kernel contexts.
@ -31,13 +61,13 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state
struct proc {
uint sz; // Size of process memory (bytes)
pde_t* pgdir; // Linear address of proc's pgdir
pde_t* pgdir; // Page table
char *kstack; // Bottom of kernel stack for this process
enum procstate state; // Process state
volatile int pid; // Process ID
struct proc *parent; // Parent process
struct trapframe *tf; // Trap frame for current syscall
struct context *context; // Switch here to run process
struct context *context; // swtch() here to run process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
struct file *ofile[NOFILE]; // Open files
@ -48,35 +78,5 @@ struct proc {
// Process memory is laid out contiguously, low addresses first:
// text
// original data and bss
// invalid page
// fixed-size stack
// expandable heap
// Per-CPU state
struct cpu {
uchar id; // Local APIC ID; index into cpus[] below
struct context *scheduler; // Switch here to enter scheduler
struct taskstate ts; // Used by x86 to find stack for interrupt
struct segdesc gdt[NSEGS]; // x86 global descriptor table
volatile uint booted; // Has the CPU started?
int ncli; // Depth of pushcli nesting.
int intena; // Were interrupts enabled before pushcli?
// Cpu-local storage variables; see below
struct cpu *cpu;
struct proc *proc;
};
extern struct cpu cpus[NCPU];
extern int ncpu;
// Per-CPU variables, holding pointers to the
// current cpu and to the current process.
// The asm suffix tells gcc to use "%gs:0" to refer to cpu
// and "%gs:4" to refer to proc. ksegment sets up the
// %gs segment register so that %gs refers to the memory
// holding those two variables in the local cpu's struct cpu.
// This is similar to how thread-local variables are implemented
// in thread libraries such as Linux pthreads.
extern struct cpu *cpu asm("%gs:0"); // This cpu.
extern struct proc *proc asm("%gs:4"); // Current proc on this cpu.

24
runoff
View file

@ -58,6 +58,13 @@ perl -e '
next;
}
if(/sheet1: (left|right)$/){
print STDERR "assuming that sheet 1 is a $1 page. double-check!\n";
$left = $1 eq "left" ? "13579" : "02468";
$right = $1 eq "left" ? "02468" : "13579";
next;
}
if(/even: (.*)/){
$file = $1;
if(!defined($toc{$file})){
@ -89,18 +96,13 @@ perl -e '
print STDERR "Have no toc for $file\n";
next;
}
# this assumes that sheet 1 of code is a left page
# double-check the PDF
if(!$leftwarn++) {
print STDERR "assuming that sheet 1 is a left page. double-check!\n";
}
if($what eq "left" && !($toc{$file} =~ /^\d[13579]0/)){
print STDERR "$file does not start on a fresh left page [$toc{$file}]\n";
if($what eq "left" && !($toc{$file} =~ /^\d[$left][05]/)){
print STDERR "$file does not start on a left page [$toc{$file}]\n";
}
# why does this not work if I inline $x in the if?
$x = ($toc{$file} =~ /^\d[02468]0/);
$x = ($toc{$file} =~ /^\d[$right][05]/);
if($what eq "right" && !$x){
print STDERR "$file does not start on a fresh right page [$toc{$file}] [$x]\n";
print STDERR "$file does not start on a right page [$toc{$file}] [$x]\n";
}
next;
}
@ -189,7 +191,9 @@ do
uses=`egrep -h '([^a-zA-Z_0-9])'$i'($|[^a-zA-Z_0-9])' alltext | awk '{print $1}'`
if [ "x$defs" != "x$uses" ]; then
echo $i $defs
echo $uses |fmt -24 | sed 's/^/ /'
echo $uses |fmt -29 | sed 's/^/ /'
# else
# echo $i defined but not used >&2
fi
done
) >refs

View file

@ -22,8 +22,8 @@ proc.h
proc.c
swtch.S
kalloc.c
data.S
vm.c
# system calls
traps.h
vectors.pl
@ -46,11 +46,10 @@ file.c
sysfile.c
exec.c
# pipes
pipe.c
# string operations
string.c
@ -65,6 +64,7 @@ kbd.c
console.c
timer.c
uart.c
multiboot.S
# user-level
initcode.S
@ -73,3 +73,6 @@ init.c
sh.c

View file

@ -1,3 +1,16 @@
sheet1: left
# "left" and "right" specify which page of a two-page spread a file
# must start on. "left" means that a file must start on the first of
# the two pages. "right" means it must start on the second of the two
# pages. The file may start in either column.
#
# "even" and "odd" specify which column a file must start on. "even"
# means it must start in the left of the two columns (00). "odd" means it
# must start in the right of the two columns (50).
#
# You'd think these would be the other way around.
# types.h either
# param.h either
# defs.h either
@ -9,25 +22,36 @@
even: bootasm.S # mild preference
even: bootother.S # mild preference
# bootmain.c either
even: bootmain.c # mild preference
even: main.c
# mp.c don't care at all
# even: initcode.S
# odd: init.c
# spinlock.h either
# spinlock.c either
even: proc.h # mild preference
left: spinlock.h # mild preference
even: spinlock.h # mild preference
# This gets struct proc and allocproc on the same spread
left: proc.h
even: proc.h
# goal is to have two action-packed 2-page spreads,
# one with
# ksegment usegment allocproc userinit growproc fork
# userinit growproc fork exit wait
# and another with
# scheduler sched yield forkret sleep wakeup1 wakeup
right: proc.c # VERY important
even: proc.c # VERY important
# A few more action packed spreads
# page table creation and process loading
# walkpgdir mappages setupkvm vmenable switch[ku]vm inituvm loaduvm
# process memory management
# allocuvm deallocuvm freevm
left: vm.c
odd: vm.c
# setjmp.S either
# vm.c either
# kalloc.c either
# syscall.h either
@ -45,15 +69,25 @@ right: proc.c # VERY important
# file.h either
# fs.h either
# fsvar.h either
left: ide.c
# left: ide.c # mild preference
even: ide.c
# odd: bio.c
# with fs.c starting on 2nd column of a left page, we get these 2-page spreads:
# ialloc iupdate iget idup ilock iunlock iput iunlockput
# bmap itrunc stati readi writei
# namecmp dirlookup dirlink skipelem namex namei
# fielinit filealloc filedup fileclose filestat fileread filewrite
# starting on 2nd column of a right page is not terrible either
odd: fs.c # VERY important
left: fs.c # mild preference
# file.c either
# exec.c either
# sysfile.c either
# even: pipe.c # mild preference
# string.c either
left: kbd.h
# left: kbd.h # mild preference
even: kbd.h
even: console.c
odd: sh.c

View file

@ -33,7 +33,7 @@ for($i=0; $i<@lines; ){
last if $i>=@lines;
# If the rest of the file fits, use the whole thing.
if(@lines <= $i+50){
if(@lines <= $i+50 && !grep { /PAGEBREAK/ } @lines){
$breakbefore = @lines;
}else{
# Find a good next page break;

View file

@ -23,7 +23,7 @@ initlock(struct spinlock *lk, char *name)
void
acquire(struct spinlock *lk)
{
pushcli();
pushcli(); // disable interrupts to avoid deadlock.
if(holding(lk))
panic("acquire");
@ -71,7 +71,7 @@ getcallerpcs(void *v, uint pcs[])
ebp = (uint*)v - 2;
for(i = 0; i < 10; i++){
if(ebp == 0 || ebp < (uint *) 0x100000 || ebp == (uint*)0xffffffff)
if(ebp == 0 || ebp < (uint*)0x100000 || ebp == (uint*)0xffffffff)
break;
pcs[i] = ebp[1]; // saved %eip
ebp = (uint*)ebp[0]; // saved %ebp

View file

@ -14,21 +14,20 @@
int
main(int argc, char *argv[])
{
int i;
int fd, i;
char path[] = "stressfs0";
printf(1, "stressfs starting\n");
for (i = 0; i < 4; i++) {
if (fork() > 0) {
for(i = 0; i < 4; i++)
if(fork() > 0)
break;
}
}
printf(1, "%d\n", i);
char path[] = "stressfs0";
path[8] += i;
int fd = open(path, O_CREATE | O_RDWR);
for (i = 0; i < 100; i++)
fd = open(path, O_CREATE | O_RDWR);
for(i = 0; i < 100; i++)
printf(fd, "%d\n", i);
close(fd);

View file

@ -22,8 +22,6 @@ fetchint(struct proc *p, uint addr, int *ip)
return 0;
}
// XXX should we copy the string?
// Fetch the nul-terminated string at addr from process p.
// Doesn't actually copy the string - just sets *pp to point at it.
// Returns length of string, not including nul.
@ -34,8 +32,8 @@ fetchstr(struct proc *p, uint addr, char **pp)
if(addr >= p->sz)
return -1;
*pp = (char *) addr;
ep = (char *) p->sz;
*pp = (char*)addr;
ep = (char*)p->sz;
for(s = *pp; s < ep; s++)
if(*s == 0)
return s - *pp;
@ -46,8 +44,7 @@ fetchstr(struct proc *p, uint addr, char **pp)
int
argint(int n, int *ip)
{
int x = fetchint(proc, proc->tf->esp + 4 + 4*n, ip);
return x;
return fetchint(proc, proc->tf->esp + 4 + 4*n, ip);
}
// Fetch the nth word-sized system call argument as a pointer
@ -60,10 +57,9 @@ argptr(int n, char **pp, int size)
if(argint(n, &i) < 0)
return -1;
if((uint)i >= proc->sz || (uint)i+size >= proc->sz)
if((uint)i >= proc->sz || (uint)i+size > proc->sz)
return -1;
// *pp = proc->mem + i; // XXXXX
*pp = (char *) i; // XXXXX
*pp = (char*)i;
return 0;
}
@ -102,39 +98,52 @@ extern int sys_wait(void);
extern int sys_write(void);
extern int sys_uptime(void);
int
sys_init(void)
{
initlog();
return 0;
}
static int (*syscalls[])(void) = {
[SYS_chdir] sys_chdir,
[SYS_close] sys_close,
[SYS_dup] sys_dup,
[SYS_exec] sys_exec,
[SYS_exit] sys_exit,
[SYS_init] sys_init,
[SYS_fork] sys_fork,
[SYS_fstat] sys_fstat,
[SYS_getpid] sys_getpid,
[SYS_kill] sys_kill,
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_mknod] sys_mknod,
[SYS_open] sys_open,
[SYS_exit] sys_exit,
[SYS_wait] sys_wait,
[SYS_pipe] sys_pipe,
[SYS_read] sys_read,
[SYS_kill] sys_kill,
[SYS_exec] sys_exec,
[SYS_fstat] sys_fstat,
[SYS_chdir] sys_chdir,
[SYS_dup] sys_dup,
[SYS_getpid] sys_getpid,
[SYS_sbrk] sys_sbrk,
[SYS_sleep] sys_sleep,
[SYS_unlink] sys_unlink,
[SYS_wait] sys_wait,
[SYS_write] sys_write,
[SYS_uptime] sys_uptime,
// File system calls that are run in a transaction:
[SYS_open] sys_open,
[SYS_write] sys_write,
[SYS_mknod] sys_mknod,
[SYS_unlink] sys_unlink,
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
};
void
syscall(void)
{
int num;
num = proc->tf->eax;
if(num >= 0 && num < NELEM(syscalls) && syscalls[num])
if(num >= 0 && num < SYS_open && syscalls[num]) {
proc->tf->eax = syscalls[num]();
else {
} else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) {
begin_trans();
proc->tf->eax = syscalls[num]();
commit_trans();
} else {
cprintf("%d %s: unknown sys call %d\n",
proc->pid, proc->name, num);
proc->tf->eax = -1;

View file

@ -1,22 +1,24 @@
// System call numbers
#define SYS_init 0
#define SYS_fork 1
#define SYS_exit 2
#define SYS_wait 3
#define SYS_pipe 4
#define SYS_write 5
#define SYS_read 6
#define SYS_close 7
#define SYS_kill 8
#define SYS_exec 9
#define SYS_open 10
#define SYS_mknod 11
#define SYS_unlink 12
#define SYS_fstat 13
#define SYS_link 14
#define SYS_mkdir 15
#define SYS_chdir 16
#define SYS_dup 17
#define SYS_getpid 18
#define SYS_sbrk 19
#define SYS_sleep 20
#define SYS_uptime 21
#define SYS_read 5
#define SYS_kill 6
#define SYS_exec 7
#define SYS_fstat 8
#define SYS_chdir 9
#define SYS_dup 10
#define SYS_getpid 11
#define SYS_sbrk 12
#define SYS_sleep 13
#define SYS_uptime 14
#define SYS_open 15
#define SYS_write 16
#define SYS_mknod 17
#define SYS_unlink 18
#define SYS_link 19
#define SYS_mkdir 20
#define SYS_close 21

View file

@ -344,11 +344,11 @@ sys_chdir(void)
int
sys_exec(void)
{
char *path, *argv[20];
char *path, *argv[MAXARG];
int i;
uint uargv, uarg;
if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) {
if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0){
return -1;
}
memset(argv, 0, sizeof(argv));

View file

@ -6,9 +6,8 @@ on the same line as the name, the line number (or, in a few cases, numbers)
where the name is defined. Successive lines in an entry list the line
numbers where the name is used. For example, this entry:
swtch 2208
0318 1928 1967 2207
2208
swtch 2358
0317 2128 2166 2357 2358
indicates that swtch is defined on line 2208 and is mentioned on five lines
on sheets 03, 19, and 22.
indicates that swtch is defined on line 2358 and is mentioned on five lines
on sheets 03, 21, and 23.

8
trap.c
View file

@ -59,6 +59,9 @@ trap(struct trapframe *tf)
ideintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE+1:
// Bochs generates spurious IDE1 interrupts.
break;
case T_IRQ0 + IRQ_KBD:
kbdintr();
lapiceoi();
@ -83,9 +86,10 @@ trap(struct trapframe *tf)
panic("trap");
}
// In user space, assume process misbehaved.
cprintf("pid %d %s: trap %d err %d on cpu %d eip 0x%x addr 0x%x--kill proc\n",
cprintf("pid %d %s: trap %d err %d on cpu %d "
"eip 0x%x addr 0x%x--kill proc\n",
proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip,
rcr2());
rcr2());
proc->killed = 1;
}

2
ulib.c
View file

@ -45,7 +45,7 @@ strchr(const char *s, char c)
{
for(; *s; s++)
if(*s == c)
return (char*) s;
return (char*)s;
return 0;
}

View file

@ -26,7 +26,7 @@ free(void *ap)
{
Header *bp, *p;
bp = (Header*) ap - 1;
bp = (Header*)ap - 1;
for(p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr)
if(p >= p->s.ptr && (bp > p || bp < p->s.ptr))
break;
@ -52,7 +52,7 @@ morecore(uint nu)
if(nu < 4096)
nu = 4096;
p = sbrk(nu * sizeof(Header));
if(p == (char*) -1)
if(p == (char*)-1)
return 0;
hp = (Header*)p;
hp->s.size = nu;
@ -81,7 +81,7 @@ malloc(uint nbytes)
p->s.size = nunits;
}
freep = prevp;
return (void*) (p + 1);
return (void*)(p + 1);
}
if(p == freep)
if((p = morecore(nunits)) == 0)

4
user.h
View file

@ -18,10 +18,10 @@ int link(char*, char*);
int mkdir(char*);
int chdir(char*);
int dup(int);
int getpid();
int getpid(void);
char* sbrk(int);
int sleep(int);
int uptime();
int uptime(void);
// ulib.c
int stat(char*, struct stat*);

View file

@ -3,6 +3,8 @@
#include "user.h"
#include "fs.h"
#include "fcntl.h"
#include "syscall.h"
#include "traps.h"
char buf[2048];
char name[3];
@ -45,12 +47,12 @@ writetest(void)
printf(stdout, "error: creat small failed!\n");
exit();
}
for(i = 0; i < 100; i++) {
if(write(fd, "aaaaaaaaaa", 10) != 10) {
for(i = 0; i < 100; i++){
if(write(fd, "aaaaaaaaaa", 10) != 10){
printf(stdout, "error: write aa %d new file failed\n", i);
exit();
}
if(write(fd, "bbbbbbbbbb", 10) != 10) {
if(write(fd, "bbbbbbbbbb", 10) != 10){
printf(stdout, "error: write bb %d new file failed\n", i);
exit();
}
@ -65,7 +67,7 @@ writetest(void)
exit();
}
i = read(fd, buf, 2000);
if(i == 2000) {
if(i == 2000){
printf(stdout, "read succeeded ok\n");
} else {
printf(stdout, "read failed\n");
@ -73,7 +75,7 @@ writetest(void)
}
close(fd);
if(unlink("small") < 0) {
if(unlink("small") < 0){
printf(stdout, "unlink small failed\n");
exit();
}
@ -93,9 +95,9 @@ writetest1(void)
exit();
}
for(i = 0; i < MAXFILE; i++) {
((int*) buf)[0] = i;
if(write(fd, buf, 512) != 512) {
for(i = 0; i < MAXFILE; i++){
((int*)buf)[0] = i;
if(write(fd, buf, 512) != 512){
printf(stdout, "error: write big file failed\n", i);
exit();
}
@ -110,19 +112,19 @@ writetest1(void)
}
n = 0;
for(;;) {
for(;;){
i = read(fd, buf, 512);
if(i == 0) {
if(n == MAXFILE - 1) {
if(i == 0){
if(n == MAXFILE - 1){
printf(stdout, "read only %d blocks from big", n);
exit();
}
break;
} else if(i != 512) {
} else if(i != 512){
printf(stdout, "read failed %d\n", i);
exit();
}
if(((int*)buf)[0] != n) {
if(((int*)buf)[0] != n){
printf(stdout, "read content of block %d is %d\n",
n, ((int*)buf)[0]);
exit();
@ -130,7 +132,7 @@ writetest1(void)
n++;
}
close(fd);
if(unlink("big") < 0) {
if(unlink("big") < 0){
printf(stdout, "unlink big failed\n");
exit();
}
@ -146,14 +148,14 @@ createtest(void)
name[0] = 'a';
name[2] = '\0';
for(i = 0; i < 52; i++) {
for(i = 0; i < 52; i++){
name[1] = '0' + i;
fd = open(name, O_CREATE|O_RDWR);
close(fd);
}
name[0] = 'a';
name[2] = '\0';
for(i = 0; i < 52; i++) {
for(i = 0; i < 52; i++){
name[1] = '0' + i;
unlink(name);
}
@ -164,22 +166,22 @@ void dirtest(void)
{
printf(stdout, "mkdir test\n");
if(mkdir("dir0") < 0) {
if(mkdir("dir0") < 0){
printf(stdout, "mkdir failed\n");
exit();
}
if(chdir("dir0") < 0) {
if(chdir("dir0") < 0){
printf(stdout, "chdir dir0 failed\n");
exit();
}
if(chdir("..") < 0) {
if(chdir("..") < 0){
printf(stdout, "chdir .. failed\n");
exit();
}
if(unlink("dir0") < 0) {
if(unlink("dir0") < 0){
printf(stdout, "unlink dir0 failed\n");
exit();
}
@ -190,7 +192,7 @@ void
exectest(void)
{
printf(stdout, "exec test\n");
if(exec("echo", echoargv) < 0) {
if(exec("echo", echoargv) < 0){
printf(stdout, "exec echo failed\n");
exit();
}
@ -324,20 +326,21 @@ mem(void)
void *m1, *m2;
int pid, ppid;
printf(1, "mem test\n");
ppid = getpid();
if((pid = fork()) == 0){
m1 = 0;
while((m2 = malloc(10001)) != 0) {
*(char**) m2 = m1;
while((m2 = malloc(10001)) != 0){
*(char**)m2 = m1;
m1 = m2;
}
while(m1) {
while(m1){
m2 = *(char**)m1;
free(m1);
m1 = m2;
}
m1 = malloc(1024*20);
if(m1 == 0) {
if(m1 == 0){
printf(1, "couldn't allocate mem?!!\n");
kill(ppid);
exit();
@ -1234,16 +1237,18 @@ forktest(void)
void
sbrktest(void)
{
int pid;
char *oldbrk = sbrk(0);
int fds[2], pid, pids[32], ppid;
char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch;
uint amt;
printf(stdout, "sbrk test\n");
oldbrk = sbrk(0);
// can one sbrk() less than a page?
char *a = sbrk(0);
a = sbrk(0);
int i;
for(i = 0; i < 5000; i++){
char *b = sbrk(1);
b = sbrk(1);
if(b != a){
printf(stdout, "sbrk test failed %d %x %x\n", i, a, b);
exit();
@ -1256,7 +1261,7 @@ sbrktest(void)
printf(stdout, "sbrk test fork failed\n");
exit();
}
char *c = sbrk(1);
c = sbrk(1);
c = sbrk(1);
if(c != a + 1){
printf(stdout, "sbrk test failed post-fork\n");
@ -1268,18 +1273,18 @@ sbrktest(void)
// can one allocate the full 640K?
a = sbrk(0);
uint amt = (640 * 1024) - (uint) a;
char *p = sbrk(amt);
amt = (640 * 1024) - (uint)a;
p = sbrk(amt);
if(p != a){
printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a);
exit();
}
char *lastaddr = (char *)(640 * 1024 - 1);
lastaddr = (char*)(640 * 1024 - 1);
*lastaddr = 99;
// is one forbidden from allocating more than 640K?
c = sbrk(4096);
if(c != (char *) 0xffffffff){
if(c != (char*)0xffffffff){
printf(stdout, "sbrk allocated more than 640K, c %x\n", c);
exit();
}
@ -1287,7 +1292,7 @@ sbrktest(void)
// can one de-allocate?
a = sbrk(0);
c = sbrk(-4096);
if(c == (char *) 0xffffffff){
if(c == (char*)0xffffffff){
printf(stdout, "sbrk could not deallocate\n");
exit();
}
@ -1311,15 +1316,15 @@ sbrktest(void)
}
c = sbrk(4096);
if(c != (char *) 0xffffffff){
if(c != (char*)0xffffffff){
printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c);
exit();
}
// can we read the kernel's memory?
for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){
int ppid = getpid();
int pid = fork();
for(a = (char*)(640*1024); a < (char*)2000000; a += 50000){
ppid = getpid();
pid = fork();
if(pid < 0){
printf(stdout, "fork failed\n");
exit();
@ -1332,6 +1337,38 @@ sbrktest(void)
wait();
}
// if we run the system out of memory, does it clean up the last
// failed allocation?
sbrk(-(sbrk(0) - oldbrk));
if(pipe(fds) != 0){
printf(1, "pipe() failed\n");
exit();
}
for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){
if((pids[i] = fork()) == 0){
// allocate the full 640K
sbrk((640 * 1024) - (uint)sbrk(0));
write(fds[1], "x", 1);
// sit around until killed
for(;;) sleep(1000);
}
if(pids[i] != -1)
read(fds[0], &scratch, 1);
}
// if those failed allocations freed up the pages they did allocate,
// we'll be able to allocate here
c = sbrk(4096);
for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){
if(pids[i] == -1)
continue;
kill(pids[i]);
wait();
}
if(c == (char*)0xffffffff){
printf(stdout, "failed sbrk leaked memory\n");
exit();
}
if(sbrk(0) > oldbrk)
sbrk(-(sbrk(0) - oldbrk));
@ -1339,26 +1376,89 @@ sbrktest(void)
}
void
stacktest(void)
validateint(int *p)
{
printf(stdout, "stack test\n");
char dummy = 1;
char *p = &dummy;
int ppid = getpid();
int pid = fork();
if(pid < 0){
printf(stdout, "fork failed\n");
exit();
int res;
asm("mov %%esp, %%ebx\n\t"
"mov %3, %%esp\n\t"
"int %2\n\t"
"mov %%ebx, %%esp" :
"=a" (res) :
"a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) :
"ebx");
}
void
validatetest(void)
{
int hi, pid;
uint p;
printf(stdout, "validate test\n");
hi = 1100*1024;
for(p = 0; p <= (uint)hi; p += 4096){
if((pid = fork()) == 0){
// try to crash the kernel by passing in a badly placed integer
validateint((int*)p);
exit();
}
sleep(0);
sleep(0);
kill(pid);
wait();
// try to crash the kernel by passing in a bad string pointer
if(link("nosuchfile", (char*)p) != -1){
printf(stdout, "link should not succeed\n");
exit();
}
}
printf(stdout, "validate ok\n");
}
// does unintialized data start out zero?
char uninit[10000];
void
bsstest(void)
{
int i;
printf(stdout, "bss test\n");
for(i = 0; i < sizeof(uninit); i++){
if(uninit[i] != '\0'){
printf(stdout, "bss test failed\n");
exit();
}
}
printf(stdout, "bss test ok\n");
}
// does exec do something sensible if the arguments
// are larger than a page?
void
bigargtest(void)
{
int pid, ppid;
ppid = getpid();
pid = fork();
if(pid == 0){
// should cause a trap:
p[-4096] = 'z';
kill(ppid);
printf(stdout, "stack test failed: page before stack was writeable\n");
char *args[32+1];
int i;
for(i = 0; i < 32; i++)
args[i] = "bigargs test: failed\n ";
args[32] = 0;
printf(stdout, "bigarg test\n");
exec("echo", args);
printf(stdout, "bigarg test ok\n");
exit();
} else if(pid < 0){
printf(stdout, "bigargtest: fork failed\n");
exit();
}
wait();
printf(stdout, "stack test OK\n");
}
int
@ -1372,8 +1472,10 @@ main(int argc, char *argv[])
}
close(open("usertests.ran", O_CREATE));
stacktest();
bigargtest();
bsstest();
sbrktest();
validatetest();
opentest();
writetest();

605
vm.c
View file

@ -6,96 +6,22 @@
#include "proc.h"
#include "elf.h"
// The mappings from logical to linear are one to one (i.e.,
// segmentation doesn't do anything).
// There is one page table per process, plus one that's used
// when a CPU is not running any process (kpgdir).
// A user process uses the same page table as the kernel; the
// page protection bits prevent it from using anything other
// than its memory.
//
// setupkvm() and exec() set up every page table like this:
// 0..640K : user memory (text, data, stack, heap)
// 640K..1M : mapped direct (for IO space)
// 1M..kernend : mapped direct (for the kernel's text and data)
// kernend..PHYSTOP : mapped direct (kernel heap and user pages)
// 0xfe000000..0 : mapped direct (devices such as ioapic)
//
// The kernel allocates memory for its heap and for user memory
// between kernend and the end of physical memory (PHYSTOP).
// The virtual address space of each user program includes the kernel
// (which is inaccessible in user mode). The user program addresses
// range from 0 till 640KB (USERTOP), which where the I/O hole starts
// (both in physical memory and in the kernel's virtual address
// space).
extern char data[]; // defined in data.S
#define USERTOP 0xA0000
static uint kerntext; // Linker starts kernel at 1MB
static uint kerntsz;
static uint kerndata;
static uint kerndsz;
static uint kernend;
static uint freesz;
static pde_t *kpgdir; // for use in scheduler()
// return the address of the PTE in page table pgdir
// that corresponds to linear address va. if create!=0,
// create any required page table pages.
static pte_t *
walkpgdir(pde_t *pgdir, const void *va, int create)
// Allocate one page table for the machine for the kernel address
// space for scheduler processes.
void
kvmalloc(void)
{
uint r;
pde_t *pde;
pte_t *pgtab;
pde = &pgdir[PDX(va)];
if (*pde & PTE_P) {
pgtab = (pte_t*) PTE_ADDR(*pde);
} else if (!create || !(r = (uint) kalloc()))
return 0;
else {
pgtab = (pte_t*) r;
// Make sure all those PTE_P bits are zero.
memset(pgtab, 0, PGSIZE);
// The permissions here are overly generous, but they can
// be further restricted by the permissions in the page table
// entries, if necessary.
*pde = PADDR(r) | PTE_P | PTE_W | PTE_U;
}
return &pgtab[PTX(va)];
}
// create PTEs for linear addresses starting at la that refer to
// physical addresses starting at pa. la and size might not
// be page-aligned.
static int
mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm)
{
char *first = PGROUNDDOWN(la);
char *last = PGROUNDDOWN(la + size - 1);
char *a = first;
while(1){
pte_t *pte = walkpgdir(pgdir, a, 1);
if(pte == 0)
return 0;
if(*pte & PTE_P)
panic("remap");
*pte = pa | perm | PTE_P;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 1;
kpgdir = setupkvm();
}
// Set up CPU's kernel segment descriptors.
// Run once at boot time on each CPU.
void
ksegment(void)
seginit(void)
{
struct cpu *c;
@ -109,7 +35,7 @@ ksegment(void)
c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER);
c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER);
// map cpu, and curproc
// Map cpu, and curproc
c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0);
lgdt(c->gdt, sizeof(c->gdt));
@ -120,250 +46,106 @@ ksegment(void)
proc = 0;
}
// Switch h/w page table and TSS registers to point to process p.
void
switchuvm(struct proc *p)
// Return the address of the PTE in page table pgdir
// that corresponds to linear address va. If create!=0,
// create any required page table pages.
static pte_t *
walkpgdir(pde_t *pgdir, const void *va, int create)
{
pushcli();
pde_t *pde;
pte_t *pgtab;
// Setup TSS
cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
cpu->gdt[SEG_TSS].s = 0;
cpu->ts.ss0 = SEG_KDATA << 3;
cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE;
ltr(SEG_TSS << 3);
if (p->pgdir == 0)
panic("switchuvm: no pgdir\n");
lcr3(PADDR(p->pgdir)); // switch to new address space
popcli();
pde = &pgdir[PDX(va)];
if(*pde & PTE_P){
pgtab = (pte_t*)PTE_ADDR(*pde);
} else {
if(!create || (pgtab = (pte_t*)kalloc()) == 0)
return 0;
// Make sure all those PTE_P bits are zero.
memset(pgtab, 0, PGSIZE);
// The permissions here are overly generous, but they can
// be further restricted by the permissions in the page table
// entries, if necessary.
*pde = PADDR(pgtab) | PTE_P | PTE_W | PTE_U;
}
return &pgtab[PTX(va)];
}
// Switch h/w page table register to the kernel-only page table, for when
// no process is running.
void
switchkvm()
// Create PTEs for linear addresses starting at la that refer to
// physical addresses starting at pa. la and size might not
// be page-aligned.
static int
mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm)
{
lcr3(PADDR(kpgdir)); // Switch to the kernel page table
char *a, *last;
pte_t *pte;
a = PGROUNDDOWN(la);
last = PGROUNDDOWN(la + size - 1);
for(;;){
pte = walkpgdir(pgdir, a, 1);
if(pte == 0)
return -1;
if(*pte & PTE_P)
panic("remap");
*pte = pa | perm | PTE_P;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
// The mappings from logical to linear are one to one (i.e.,
// segmentation doesn't do anything).
// There is one page table per process, plus one that's used
// when a CPU is not running any process (kpgdir).
// A user process uses the same page table as the kernel; the
// page protection bits prevent it from using anything other
// than its memory.
//
// setupkvm() and exec() set up every page table like this:
// 0..640K : user memory (text, data, stack, heap)
// 640K..1M : mapped direct (for IO space)
// 1M..end : mapped direct (for the kernel's text and data)
// end..PHYSTOP : mapped direct (kernel heap and user pages)
// 0xfe000000..0 : mapped direct (devices such as ioapic)
//
// The kernel allocates memory for its heap and for user memory
// between kernend and the end of physical memory (PHYSTOP).
// The virtual address space of each user program includes the kernel
// (which is inaccessible in user mode). The user program addresses
// range from 0 till 640KB (USERTOP), which where the I/O hole starts
// (both in physical memory and in the kernel's virtual address
// space).
static struct kmap {
void *p;
void *e;
int perm;
} kmap[] = {
{(void*)USERTOP, (void*)0x100000, PTE_W}, // I/O space
{(void*)0x100000, data, 0 }, // kernel text, rodata
{data, (void*)PHYSTOP, PTE_W}, // kernel data, memory
{(void*)0xFE000000, 0, PTE_W}, // device mappings
};
// Set up kernel part of a page table.
pde_t*
setupkvm(void)
{
pde_t *pgdir;
struct kmap *k;
// Allocate page directory
if (!(pgdir = (pde_t *) kalloc()))
if((pgdir = (pde_t*)kalloc()) == 0)
return 0;
memset(pgdir, 0, PGSIZE);
// Map IO space from 640K to 1Mbyte
if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W))
return 0;
// Map kernel text read-only
if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0))
return 0;
// Map kernel data read/write
if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W))
return 0;
// Map dynamically-allocated memory read/write (kernel stacks, user mem)
if (!mappages(pgdir, (void *) kernend, freesz, PADDR(kernend), PTE_W))
return 0;
// Map devices such as ioapic, lapic, ...
if (!mappages(pgdir, (void *)0xFE000000, 0x2000000, 0xFE000000, PTE_W))
return 0;
return pgdir;
}
// return the physical address that a given user address
// maps to. the result is also a kernel logical address,
// since the kernel maps the physical memory allocated to user
// processes directly.
char*
uva2ka(pde_t *pgdir, char *uva)
{
pte_t *pte = walkpgdir(pgdir, uva, 0);
if (pte == 0) return 0;
uint pa = PTE_ADDR(*pte);
return (char *)pa;
}
// allocate sz bytes more memory for a process starting at the
// given user address; allocates physical memory and page
// table entries. addr and sz need not be page-aligned.
// it is a no-op for any parts of the requested memory
// that are already allocated.
int
allocuvm(pde_t *pgdir, char *addr, uint sz)
{
if (addr + sz > (char*)USERTOP)
return 0;
char *first = PGROUNDDOWN(addr);
char *last = PGROUNDDOWN(addr + sz - 1);
char *a;
for(a = first; a <= last; a += PGSIZE){
pte_t *pte = walkpgdir(pgdir, a, 0);
if(pte == 0 || (*pte & PTE_P) == 0){
char *mem = kalloc();
if(mem == 0){
// XXX clean up?
return 0;
}
memset(mem, 0, PGSIZE);
mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U);
}
}
return 1;
}
// deallocate some of the user pages, in response to sbrk()
// with a negative argument. if addr is not page-aligned,
// then only deallocates starting at the next page boundary.
int
deallocuvm(pde_t *pgdir, char *addr, uint sz)
{
if (addr + sz > (char*)USERTOP)
return 0;
char *first = (char*) PGROUNDUP((uint)addr);
char *last = PGROUNDDOWN(addr + sz - 1);
char *a;
for(a = first; a <= last; a += PGSIZE){
pte_t *pte = walkpgdir(pgdir, a, 0);
if(pte && (*pte & PTE_P) != 0){
uint pa = PTE_ADDR(*pte);
if(pa == 0)
panic("deallocuvm");
kfree((void *) pa);
*pte = 0;
}
}
return 1;
}
// free a page table and all the physical memory pages
// in the user part.
void
freevm(pde_t *pgdir)
{
uint i, j, da;
if (!pgdir)
panic("freevm: no pgdir\n");
for (i = 0; i < NPDENTRIES; i++) {
da = PTE_ADDR(pgdir[i]);
if (da != 0) {
pte_t *pgtab = (pte_t*) da;
for (j = 0; j < NPTENTRIES; j++) {
if (pgtab[j] != 0) {
uint pa = PTE_ADDR(pgtab[j]);
uint va = PGADDR(i, j, 0);
if (va < USERTOP) // user memory
kfree((void *) pa);
pgtab[j] = 0;
}
}
kfree((void *) da);
pgdir[i] = 0;
}
}
kfree((void *) pgdir);
}
int
loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
{
uint i, pa, n;
pte_t *pte;
if ((uint)addr % PGSIZE != 0)
panic("loaduvm: addr must be page aligned\n");
for (i = 0; i < sz; i += PGSIZE) {
if (!(pte = walkpgdir(pgdir, addr+i, 0)))
panic("loaduvm: address should exist\n");
pa = PTE_ADDR(*pte);
if (sz - i < PGSIZE) n = sz - i;
else n = PGSIZE;
if(readi(ip, (char *)pa, offset+i, n) != n)
k = kmap;
for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
if(mappages(pgdir, k->p, k->e - k->p, (uint)k->p, k->perm) < 0)
return 0;
}
return 1;
}
void
inituvm(pde_t *pgdir, char *addr, char *init, uint sz)
{
uint i, pa, n, off;
pte_t *pte;
for (i = 0; i < sz; i += PGSIZE) {
if (!(pte = walkpgdir(pgdir, (void *)(i+addr), 0)))
panic("inituvm: pte should exist\n");
off = (i+(uint)addr) % PGSIZE;
pa = PTE_ADDR(*pte);
if (sz - i < PGSIZE) n = sz - i;
else n = PGSIZE;
memmove((char *)pa+off, init+i, n);
}
}
// given a parent process's page table, create a copy
// of it for a child.
pde_t*
copyuvm(pde_t *pgdir, uint sz)
{
pde_t *d = setupkvm();
pte_t *pte;
uint pa, i;
char *mem;
if (!d) return 0;
for (i = 0; i < sz; i += PGSIZE) {
if (!(pte = walkpgdir(pgdir, (void *)i, 0)))
panic("copyuvm: pte should exist\n");
if(*pte & PTE_P){
pa = PTE_ADDR(*pte);
if (!(mem = kalloc()))
return 0;
memmove(mem, (char *)pa, PGSIZE);
if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U))
return 0;
}
}
return d;
}
// Gather information about physical memory layout.
// Called once during boot.
// Really should find out how much physical memory
// there is rather than assuming PHYSTOP.
void
pminit(void)
{
extern char end[];
struct proghdr *ph;
struct elfhdr *elf = (struct elfhdr*)0x10000; // scratch space
if (elf->magic != ELF_MAGIC || elf->phnum != 2)
panic("pminit: need a text and data segment\n");
ph = (struct proghdr*)((uchar*)elf + elf->phoff);
kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1);
kerntext = ph[0].va;
kerndata = ph[1].va;
kerntsz = ph[0].memsz;
kerndsz = ph[1].memsz;
freesz = PHYSTOP - kernend;
kinit((char *)kernend, freesz);
}
// Allocate one page table for the machine for the kernel address
// space for scheduler processes.
void
kvmalloc(void)
{
kpgdir = setupkvm();
return pgdir;
}
// Turn on paging.
@ -378,3 +160,208 @@ vmenable(void)
lcr0(cr0);
}
// Switch h/w page table register to the kernel-only page table,
// for when no process is running.
void
switchkvm(void)
{
lcr3(PADDR(kpgdir)); // switch to the kernel page table
}
// Switch TSS and h/w page table to correspond to process p.
void
switchuvm(struct proc *p)
{
pushcli();
cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
cpu->gdt[SEG_TSS].s = 0;
cpu->ts.ss0 = SEG_KDATA << 3;
cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE;
ltr(SEG_TSS << 3);
if(p->pgdir == 0)
panic("switchuvm: no pgdir");
lcr3(PADDR(p->pgdir)); // switch to new address space
popcli();
}
// Load the initcode into address 0 of pgdir.
// sz must be less than a page.
void
inituvm(pde_t *pgdir, char *init, uint sz)
{
char *mem;
if(sz >= PGSIZE)
panic("inituvm: more than a page");
mem = kalloc();
memset(mem, 0, PGSIZE);
mappages(pgdir, 0, PGSIZE, PADDR(mem), PTE_W|PTE_U);
memmove(mem, init, sz);
}
// Load a program segment into pgdir. addr must be page-aligned
// and the pages from addr to addr+sz must already be mapped.
int
loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
{
uint i, pa, n;
pte_t *pte;
if((uint)addr % PGSIZE != 0)
panic("loaduvm: addr must be page aligned");
for(i = 0; i < sz; i += PGSIZE){
if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
panic("loaduvm: address should exist");
pa = PTE_ADDR(*pte);
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, (char*)pa, offset+i, n) != n)
return -1;
}
return 0;
}
// Allocate page tables and physical memory to grow process from oldsz to
// newsz, which need not be page aligned. Returns new size or 0 on error.
int
allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
{
char *mem;
uint a;
if(newsz > USERTOP)
return 0;
if(newsz < oldsz)
return oldsz;
a = PGROUNDUP(oldsz);
for(; a < newsz; a += PGSIZE){
mem = kalloc();
if(mem == 0){
cprintf("allocuvm out of memory\n");
deallocuvm(pgdir, newsz, oldsz);
return 0;
}
memset(mem, 0, PGSIZE);
mappages(pgdir, (char*)a, PGSIZE, PADDR(mem), PTE_W|PTE_U);
}
return newsz;
}
// Deallocate user pages to bring the process size from oldsz to
// newsz. oldsz and newsz need not be page-aligned, nor does newsz
// need to be less than oldsz. oldsz can be larger than the actual
// process size. Returns the new process size.
int
deallocuvm(pde_t *pgdir, uint oldsz, uint newsz)
{
pte_t *pte;
uint a, pa;
if(newsz >= oldsz)
return oldsz;
a = PGROUNDUP(newsz);
for(; a < oldsz; a += PGSIZE){
pte = walkpgdir(pgdir, (char*)a, 0);
if(pte && (*pte & PTE_P) != 0){
pa = PTE_ADDR(*pte);
if(pa == 0)
panic("kfree");
kfree((char*)pa);
*pte = 0;
}
}
return newsz;
}
// Free a page table and all the physical memory pages
// in the user part.
void
freevm(pde_t *pgdir)
{
uint i;
if(pgdir == 0)
panic("freevm: no pgdir");
deallocuvm(pgdir, USERTOP, 0);
for(i = 0; i < NPDENTRIES; i++){
if(pgdir[i] & PTE_P)
kfree((char*)PTE_ADDR(pgdir[i]));
}
kfree((char*)pgdir);
}
// Given a parent process's page table, create a copy
// of it for a child.
pde_t*
copyuvm(pde_t *pgdir, uint sz)
{
pde_t *d;
pte_t *pte;
uint pa, i;
char *mem;
if((d = setupkvm()) == 0)
return 0;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walkpgdir(pgdir, (void*)i, 0)) == 0)
panic("copyuvm: pte should exist");
if(!(*pte & PTE_P))
panic("copyuvm: page not present");
pa = PTE_ADDR(*pte);
if((mem = kalloc()) == 0)
goto bad;
memmove(mem, (char*)pa, PGSIZE);
if(mappages(d, (void*)i, PGSIZE, PADDR(mem), PTE_W|PTE_U) < 0)
goto bad;
}
return d;
bad:
freevm(d);
return 0;
}
//PAGEBREAK!
// Map user virtual address to kernel physical address.
char*
uva2ka(pde_t *pgdir, char *uva)
{
pte_t *pte;
pte = walkpgdir(pgdir, uva, 0);
if((*pte & PTE_P) == 0)
return 0;
if((*pte & PTE_U) == 0)
return 0;
return (char*)PTE_ADDR(*pte);
}
// Copy len bytes from p to user address va in page table pgdir.
// Most useful when pgdir is not the current page table.
// uva2ka ensures this only works for PTE_U pages.
int
copyout(pde_t *pgdir, uint va, void *p, uint len)
{
char *buf, *pa0;
uint n, va0;
buf = (char*)p;
while(len > 0){
va0 = (uint)PGROUNDDOWN(va);
pa0 = uva2ka(pgdir, (char*)va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (va - va0);
if(n > len)
n = len;
memmove(pa0 + (va - va0), buf, n);
len -= n;
buf += n;
va = va0 + PGSIZE;
}
return 0;
}

View file

@ -1,3 +0,0 @@
index.html: index.txt mkhtml
./mkhtml index.txt >_$@ && mv _$@ $@

BIN
web/boot.pdf Normal file

Binary file not shown.

BIN
web/disk.pdf Normal file

Binary file not shown.

BIN
web/exec.pdf Normal file

Binary file not shown.

BIN
web/fscall.pdf Normal file

Binary file not shown.

BIN
web/fsdata.pdf Normal file

Binary file not shown.

View file

@ -1,4 +1,3 @@
<!-- AUTOMATICALLY GENERATED: EDIT the .txt version, not the .html version -->
<html>
<head>
<title>Xv6, a simple Unix-like teaching operating system</title>
@ -32,31 +31,36 @@ h2 {
--></style>
</head>
<body bgcolor=#ffffff>
<h1>Xv6, a simple Unix-like teaching operating system</h1>
<br><br>
Xv6 is a teaching operating system developed
in the summer of 2006 for MIT's operating systems course,
&ldquo;6.828: Operating Systems Engineering.&rdquo;
We used it for 6.828 in Fall 2006 and Fall 2007
and are using it this semester (Fall 2008).
We hope that xv6 will be useful in other courses too.
This page collects resources to aid the use of xv6
in other courses.
<h2>Introduction</h2>
Xv6 is a teaching operating system developed in the summer of 2006 for
MIT's operating systems
course, <a href="http://pdos.csail.mit.edu/6.828">6.828: operating
systems Engineering</a>. We hope that xv6 will be useful in other
courses too. This page collects resources to aid the use of xv6 in
other courses, including a commentary on the source code itself.
<p><font color="red">Status: The xv6 code is in pretty good shape, but
the commentary is rough.</font>
<h2>History and Background</h2>
For many years, MIT had no operating systems course.
In the fall of 2002, Frans Kaashoek, Josh Cates, and Emil Sit
created a new, experimental course (6.097)
to teach operating systems engineering.
In the course lectures, the class worked through Sixth Edition Unix (aka V6)
using John Lions's famous commentary.
In the lab assignments, students wrote most of an exokernel operating
system, eventually named Jos, for the Intel x86.
Exposing students to multiple systems&ndash;V6 and Jos&ndash;helped
develop a sense of the spectrum of operating system designs.
In the fall of 2003, the experimental 6.097 became the
official course 6.828; the course has been offered each fall since then.
<br><br>
<p>For many years, MIT had no operating systems course. In the fall
of 2002, Frans Kaashoek, Josh Cates, and Emil Sit created a new,
experimental course (6.097) to teach operating systems engineering.
In the course lectures, the class worked through <a href="#v6">Sixth
Edition Unix (aka V6)</a> using John Lions's famous commentary. In
the lab assignments, students wrote most of an exokernel operating
system, eventually named Jos, for the Intel x86. Exposing students to
multiple systems&ndash;V6 and Jos&ndash;helped develop a sense of the
spectrum of operating system designs. In the fall of 2003, the
experimental 6.097 became the official course 6.828; the course has
been offered each fall since then.
<p>
V6 presented pedagogic challenges from the start.
Students doubted the relevance of an obsolete 30-year-old operating system
written in an obsolete programming language (pre-K&R C)
@ -76,13 +80,12 @@ uniprocessors such as
enabling/disabling interrupts) and helps relevance.
Finally, writing a new system allowed us to write cleaner versions
of the rougher parts of V6, like the scheduler and file system.
<br><br>
6.828 substituted xv6 for V6 in the fall of 2006.
Based on that experience, we cleaned up rough patches
of xv6 for the course in the fall of 2007.
Since then, xv6 has stabilized, so we are making it
available in the hopes that others will find it useful too.
<br><br>
<p> 6.828 substituted xv6 for V6 in the fall of 2006. Based on
that experience, we cleaned up rough patches of xv6. Since then, xv6
has stabilized, so we are making it available in the hopes that others
will find it useful too.
<p>
6.828 uses both xv6 and Jos.
Courses taught at UCLA, NYU, Peking University, Stanford, Tsinghua,
and University Texas (Austin) have used
@ -90,14 +93,16 @@ Jos without xv6; we believe other courses could use
xv6 without Jos, though we are not aware of any that have.
<h2>Xv6 sources</h2>
The latest xv6 is <a href="xv6-rev3.tar.gz">xv6-rev3.tar.gz</a>.
The latest xv6 is <a href="xv6-rev5.tar.gz">xv6-rev5.tar.gz</a>.
We distribute the sources in electronic form but also as
a printed booklet with line numbers that keep everyone
together during lectures. The booklet is available as
<a href="xv6-rev3.pdf">xv6-rev3.pdf</a>.
<a href="xv6-rev5.pdf">xv6-rev5.pdf</a>.
The xv6 source code is licensed under the traditional <a href="http://www.opensource.org/licenses/mit-license.php">MIT license</a>;
see the LICENSE file in the source distribution.
<br><br>
<p>
xv6 compiles using the GNU C compiler,
targeted at the x86 using ELF binaries.
On BSD and Linux systems, you can use the native compilers;
@ -106,239 +111,131 @@ you must use a cross-compiler.
Xv6 does boot on real hardware, but typically
we run it using the Bochs emulator.
Both the GCC cross compiler and Bochs
can be found on the <a href="../../2007/tools.html">6.828 tools page</a>.
can be found on the <a href="../2010/tools.html">6.828 tools page</a>.
<h2>Lectures</h2>
In 6.828, the lectures in the first half of the course
introduce the PC hardware, the Intel x86, and then xv6.
The lectures in the second half consider advanced topics
using research papers; for some, xv6 serves as a useful
base for making discussions concrete.
This section describe a typical 6.828 lecture schedule,
linking to lecture notes and homework.
A course using only xv6 (not Jos) will need to adapt
a few of the lectures, but we hope these are a useful
starting point.
<h2>Xv6 lecture material</h2>
<br><br><b><i>Lecture 1. Operating systems</i></b>
<br><br>
The first lecture introduces both the general topic of
operating systems and the specific approach of 6.828.
After defining &ldquo;operating system,&rdquo; the lecture
examines the implementation of a Unix shell
to look at the details the traditional Unix system call interface.
This is relevant to both xv6 and Jos: in the final
Jos labs, students implement a Unix-like interface
and culminating in a Unix shell.
<br><br>
<a href="l1.html">lecture notes</a>
<a href="os-lab-1.pdf">OS abstractions slides</a>
In 6.828, the lectures in the first half of the course introduce the
PC hardware, the Intel x86, and then xv6. The lectures in the second
half consider advanced topics using research papers; for some, xv6
serves as a useful base for making discussions concrete. The lecture
notes are available from the 6.828 schedule page, and the chapters of
the commentary are below.
<br><br><b><i>Lecture 2. PC hardware and x86 programming</i></b>
<br><br>
This lecture introduces the PC architecture, the 16- and 32-bit x86,
the stack, and the GCC x86 calling conventions.
It also introduces the pieces of a typical C tool chain&ndash;compiler,
assembler, linker, loader&ndash;and the Bochs emulator.
<br><br>
Reading: PC Assembly Language
<br><br>
Homework: familiarize with Bochs
<br><br>
<a href="l2.html">lecture notes</a>
<a href="os-lab-2.pdf">x86 intro slides</a>
<a href="x86-intro.html">homework</a>
<h2>Xv6 commentary (rough)</h2>
<br><br><b><i>Lecture 3. Operating system organization</i></b>
<br><br>
This lecture continues Lecture 1's discussion of what
an operating system does.
An operating system provides a &ldquo;virtual computer&rdquo;
interface to user space programs.
At a high level, the main job of the operating system
is to implement that interface
using the physical computer it runs on.
<br><br>
The lecture discusses four approaches to that job:
monolithic operating systems, microkernels,
virtual machines, and exokernels.
Exokernels might not be worth mentioning
except that the Jos labs are built around one.
<br><br>
Reading: Engler et al., Exokernel: An Operating System Architecture
for Application-Level Resource Management
<br><br>
<a href="l3.html">lecture notes</a>
<p>The chapters are rough drafts.
<br><br><b><i>Lecture 4. Address spaces using segmentation</i></b>
<br><br>
This is the first lecture that uses xv6.
It introduces the idea of address spaces and the
details of the x86 segmentation hardware.
It makes the discussion concrete by reading the xv6
source code and watching xv6 execute using the Bochs simulator.
<br><br>
Reading: x86 MMU handout,
xv6: bootasm.S, bootother.S, <a href="src/bootmain.c.html">bootmain.c</a>, <a href="src/main.c.html">main.c</a>, <a href="src/init.c.html">init.c</a>, and setupsegs in <a href="src/proc.c.html">proc.c</a>.
<br><br>
Homework: Bochs stack introduction
<br><br>
<a href="l4.html">lecture notes</a>
<a href="os-lab-3.pdf">x86 virtual memory slides</a>
<a href="xv6-intro.html">homework</a>
<p>Introduction yet to be written.<br>
<ul>
<li>read with the code side by side
<li>code references look like (xxxx) or (xxxx-yyyy) in small text.
<li><a href="xv6-rev5.pdf">this pdf</a> is the one with matching line numbers.
<li>each chapter starts with an introduction to the topic,
spends most of the text on code,
and then wraps up talking about how xv6
compares to real-world operating systems.
</ul>
<br><br><b><i>Lecture 5. Address spaces using page tables</i></b>
<br><br>
This lecture continues the discussion of address spaces,
examining the other x86 virtual memory mechanism: page tables.
Xv6 does not use page tables, so there is no xv6 here.
Instead, the lecture uses Jos as a concrete example.
An xv6-only course might skip or shorten this discussion.
<br><br>
Reading: x86 manual excerpts
<br><br>
Homework: stuff about gdt
XXX not appropriate; should be in Lecture 4
<br><br>
<a href="l5.html">lecture notes</a>
<a href="unix.pdf">Chapter 0: Operating system interfaces</a>
<blockquote>
The Unix system call interface. (rev 4)
</blockquote>
<br><br><b><i>Lecture 6. Interrupts and exceptions</i></b>
<br><br>
How does a user program invoke the operating system kernel?
How does the kernel return to the user program?
What happens when a hardware device needs attention?
This lecture explains the answer to these questions:
interrupt and exception handling.
<br><br>
It explains the x86 trap setup mechanisms and then
examines their use in xv6's SETGATE (<a href="src/mmu.h.html">mmu.h</a>),
tvinit (<a href="src/trap.c.html">trap.c</a>), idtinit (<a href="src/trap.c.html">trap.c</a>), <a href="src/vectors.pl.html">vectors.pl</a>, and vectors.S.
<br><br>
It then traces through a call to the system call open:
<a href="src/init.c.html">init.c</a>, usys.S, vector48 and alltraps (vectors.S), trap (<a href="src/trap.c.html">trap.c</a>),
syscall (<a href="src/syscall.c.html">syscall.c</a>),
sys_open (<a href="src/sysfile.c.html">sysfile.c</a>), fetcharg, fetchint, argint, argptr, argstr (<a href="src/syscall.c.html">syscall.c</a>),
<br><br>
The interrupt controller, briefly:
pic_init and pic_enable (<a href="src/picirq.c.html">picirq.c</a>).
The timer and keyboard, briefly:
timer_init (<a href="src/timer.c.html">timer.c</a>), console_init (<a href="src/console.c.html">console.c</a>).
Enabling and disabling of interrupts.
<br><br>
Reading: x86 manual excerpts,
xv6: trapasm.S, <a href="src/trap.c.html">trap.c</a>, <a href="src/syscall.c.html">syscall.c</a>, and usys.S.
Skim <a href="src/lapic.c.html">lapic.c</a>, <a href="src/ioapic.c.html">ioapic.c</a>, <a href="src/picirq.c.html">picirq.c</a>.
<br><br>
Homework: Explain the 35 words on the top of the
stack at first invocation of <code>syscall</code>.
<br><br>
<a href="l-interrupt.html">lecture notes</a>
<a href="x86-intr.html">homework</a>
<a href="boot.pdf">Chapter 1: Bootstrap</a>
<blockquote>
From power on to kernel start. (rev 4)
</blockquote>
<br><br><b><i>Lecture 7. Multiprocessors and locking</i></b>
<br><br>
This lecture introduces the problems of
coordination and synchronization on a
multiprocessor
and then the solution of mutual exclusion locks.
Atomic instructions, test-and-set locks,
lock granularity, (the mistake of) recursive locks.
<br><br>
Although xv6 user programs cannot share memory,
the xv6 kernel itself is a program with multiple threads
executing concurrently and sharing memory.
Illustration: the xv6 scheduler's proc_table_lock (<a href="src/proc.c.html">proc.c</a>)
and the spin lock implementation (<a href="src/spinlock.c.html">spinlock.c</a>).
<br><br>
Reading: xv6: <a href="src/spinlock.c.html">spinlock.c</a>. Skim <a href="src/mp.c.html">mp.c</a>.
<br><br>
Homework: Interaction between locking and interrupts.
Try not disabling interrupts in the disk driver and watch xv6 break.
<br><br>
<a href="l-lock.html">lecture notes</a>
<a href="xv6-lock.html">homework</a>
<a href="mem.pdf">Chapter 2: Processes</a>
<blockquote>
Memory and process allocation, segments, the first user process. (rev 4)
</blockquote>
<br><br><b><i>Lecture 8. Threads, processes and context switching</i></b>
<br><br>
The last lecture introduced some of the issues
in writing threaded programs, using xv6's processes
as an example.
This lecture introduces the issues in implementing
threads, continuing to use xv6 as the example.
<br><br>
The lecture defines a thread of computation as a register
set and a stack. A process is an address space plus one
or more threads of computation sharing that address space.
Thus the xv6 kernel can be viewed as a single process
with many threads (each user process) executing concurrently.
<br><br>
Illustrations: thread switching (swtch.S), scheduler (<a href="src/proc.c.html">proc.c</a>), sys_fork (<a href="src/sysproc.c.html">sysproc.c</a>)
<br><br>
Reading: <a href="src/proc.c.html">proc.c</a>, swtch.S, sys_fork (<a href="src/sysproc.c.html">sysproc.c</a>)
<br><br>
Homework: trace through stack switching.
<br><br>
<a href="l-threads.html">lecture notes (need to be updated to use swtch)</a>
<a href="xv6-sched.html">homework</a>
<a href="trap.pdf">Chapter 3: Traps</a>
<blockquote>
Low-level trap mechanism, trap handler, system call arguments, sbrk, fork.
</blockquote>
<br><br><b><i>Lecture 9. Processes and coordination</i></b>
<br><br>
This lecture introduces the idea of sequence coordination
and then examines the particular solution illustrated by
sleep and wakeup (<a href="src/proc.c.html">proc.c</a>).
It introduces and refines a simple
producer/consumer queue to illustrate the
need for sleep and wakeup
and then the sleep and wakeup
implementations themselves.
<br><br>
Reading: <a href="src/proc.c.html">proc.c</a>, sys_exec, sys_sbrk, sys_wait, sys_exec, sys_kill (<a href="src/sysproc.c.html">sysproc.c</a>).
<br><br>
Homework: Explain how sleep and wakeup would break
without proc_table_lock. Explain how devices would break
without second lock argument to sleep.
<br><br>
<a href="l-coordination.html">lecture notes</a>
<a href="xv6-sleep.html">homework</a>
<a href="lock.pdf">Chapter 4: Locks</a>
<blockquote>
Locks and interrupts.
</blockquote>
<br><br><b><i>Lecture 10. Files and disk I/O</i></b>
<br><br>
This is the first of three file system lectures.
This lecture introduces the basic file system interface
and then considers the on-disk layout of individual files
and the free block bitmap.
<br><br>
Reading: iread, iwrite, fileread, filewrite, wdir, mknod1, and
code related to these calls in <a href="src/fs.c.html">fs.c</a>, <a href="src/bio.c.html">bio.c</a>, <a href="src/ide.c.html">ide.c</a>, and <a href="src/file.c.html">file.c</a>.
<br><br>
Homework: Add print to bwrite to trace every disk write.
Explain the disk writes caused by some simple shell commands.
<br><br>
<a href="l-fs.html">lecture notes</a>
<a href="xv6-disk.html">homework</a>
<a href="sched.pdf">Chapter 5: Scheduling and coordination</a>
<blockquote>
Scheduling, sleep and wakeup, pipes, wait and exit.
</blockquote>
<br><br><b><i>Lecture 11. Naming</i></b>
<br><br>
The last lecture discussed on-disk file system representation.
This lecture covers the implementation of
file system paths (namei in <a href="src/fs.c.html">fs.c</a>)
and also discusses the security problems of a shared /tmp
and symbolic links.
<br><br>
Understanding exec (<a href="src/exec.c.html">exec.c</a>) is left as an exercise.
<br><br>
Reading: namei in <a href="src/fs.c.html">fs.c</a>, <a href="src/sysfile.c.html">sysfile.c</a>, <a href="src/file.c.html">file.c</a>.
<br><br>
Homework: Explain how to implement symbolic links in xv6.
<br><br>
<a href="l-name.html">lecture notes</a>
<a href="xv6-names.html">homework</a>
<a href="disk.pdf">Chapter 6: Buffer cache</a>
<blockquote>
Buffer cache and IDE disk driver.
</blockquote>
<br><br><b><i>Lecture 12. High-performance file systems</i></b>
<br><br>
This lecture is the first of the research paper-based lectures.
It discusses the &ldquo;soft updates&rdquo; paper,
using xv6 as a concrete example.
<a href="fsdata.pdf">Chapter 7: File system data</a>
<blockquote>
Block in use bitmap, block allocation, inode structure, inode contents,
directories, path names.
</blockquote>
<a href="fscall.pdf">Chapter 8: File system calls</a>
<blockquote>
FIle descriptors, open, close, dup, read, write.
</blockquote>
<a href="exec.pdf">Chapter 9: Exec</a>
<blockquote>
Exec
</blockquote>
Appendix A: Low-level C and inline assembly
<blockquote>
Intro to C and inline assembly for people who only know Java (say).
Examples drawn entirely from xv6 source.
</blockquote>
Appendix B: Additional drivers.
<blockquote>
Keyboard, screen, probably MP hardware.
</blockquote>
<a name="v6"></a>
<h2>Unix Version 6</h2>
<p>6.828's xv6 is inspired by Unix V6 and by:
<ul>
<li>Lions' <i>Commentary on UNIX' 6th Edition</i>, John Lions, Peer to
Peer Communications; ISBN: 1-57398-013-7; 1st edition (June 14, 2000).
<ul>
<li>An on-line version of the <a
href="http://www.lemis.com/grog/Documentation/Lions/">Lions
commentary</a>, and <a href="http://v6.cuzuco.com/">the source code</a>.
<li>The v6 source code is also available <a
href="http://minnie.tuhs.org/UnixTree/V6/usr/sys/">online</a>
through <a
href="http://minnie.tuhs.org/PUPS/">the PDP Unix Preservation
Society</a>.
</ul>
</ul>
The following are useful to read the original code:
<ul>
<li><i>
The PDP11/40 Processor Handbook</i>, Digital Equipment Corporation, 1972.
<ul>
<li>A <a href="http://pdos.csail.mit.edu/6.828/2005/readings/pdp11-40.pdf">PDF</a> (made from scanned images,
and not text-searchable)
<li>A <a href="http://pdos.csail.mit.edu/6.828/2005/pdp11/">web-based
version</a> that is indexed by instruction name.
</ul>
</ul>
<h2>Feedback</h2>
If you are interested in using xv6 or have used xv6 in a course,
@ -346,13 +243,10 @@ we would love to hear from you.
If there's anything that we can do to make xv6 easier
to adopt, we'd like to hear about it.
We'd also be interested to hear what worked well and what didn't.
<br><br>
<p>
Russ Cox (rsc@swtch.com)<br>
Frans Kaashoek (kaashoek@mit.edu)<br>
Robert Morris (rtm@mit.edu)
<br><br>
<p>
You can reach all of us at 6.828-staff@pdos.csail.mit.edu.
<br><br>
<br><br>
</body>
</html>

View file

@ -1,339 +0,0 @@
** Xv6, a simple Unix-like teaching operating system
Xv6 is a teaching operating system developed
in the summer of 2006 for MIT's operating systems course,
``6.828: Operating Systems Engineering.''
We used it for 6.828 in Fall 2006 and Fall 2007
and are using it this semester (Fall 2008).
We hope that xv6 will be useful in other courses too.
This page collects resources to aid the use of xv6
in other courses.
* History and Background
For many years, MIT had no operating systems course.
In the fall of 2002, Frans Kaashoek, Josh Cates, and Emil Sit
created a new, experimental course (6.097)
to teach operating systems engineering.
In the course lectures, the class worked through Sixth Edition Unix (aka V6)
using John Lions's famous commentary.
In the lab assignments, students wrote most of an exokernel operating
system, eventually named Jos, for the Intel x86.
Exposing students to multiple systems--V6 and Jos--helped
develop a sense of the spectrum of operating system designs.
In the fall of 2003, the experimental 6.097 became the
official course 6.828; the course has been offered each fall since then.
V6 presented pedagogic challenges from the start.
Students doubted the relevance of an obsolete 30-year-old operating system
written in an obsolete programming language (pre-K&R C)
running on obsolete hardware (the PDP-11).
Students also struggled to learn the low-level details of two different
architectures (the PDP-11 and the Intel x86) at the same time.
By the summer of 2006, we had decided to replace V6
with a new operating system, xv6, modeled on V6
but written in ANSI C and running on multiprocessor
Intel x86 machines.
Xv6's use of the x86 makes it more relevant to
students' experience than V6 was
and unifies the course around a single architecture.
Adding multiprocessor support requires handling concurrency head on with
locks and threads (instead of using special-case solutions for
uniprocessors such as
enabling/disabling interrupts) and helps relevance.
Finally, writing a new system allowed us to write cleaner versions
of the rougher parts of V6, like the scheduler and file system.
6.828 substituted xv6 for V6 in the fall of 2006.
Based on that experience, we cleaned up rough patches
of xv6 for the course in the fall of 2007.
Since then, xv6 has stabilized, so we are making it
available in the hopes that others will find it useful too.
6.828 uses both xv6 and Jos.
Courses taught at UCLA, NYU, Peking University, Stanford, Tsinghua,
and University Texas (Austin) have used
Jos without xv6; we believe other courses could use
xv6 without Jos, though we are not aware of any that have.
* Xv6 sources
The latest xv6 is [xv6-rev2.tar.gz].
We distribute the sources in electronic form but also as
a printed booklet with line numbers that keep everyone
together during lectures. The booklet is available as
[xv6-rev2.pdf].
xv6 compiles using the GNU C compiler,
targeted at the x86 using ELF binaries.
On BSD and Linux systems, you can use the native compilers;
On OS X, which doesn't use ELF binaries,
you must use a cross-compiler.
Xv6 does boot on real hardware, but typically
we run it using the Bochs emulator.
Both the GCC cross compiler and Bochs
can be found on the [../../2007/tools.html | 6.828 tools page].
* Lectures
In 6.828, the lectures in the first half of the course
introduce the PC hardware, the Intel x86, and then xv6.
The lectures in the second half consider advanced topics
using research papers; for some, xv6 serves as a useful
base for making discussions concrete.
This section describe a typical 6.828 lecture schedule,
linking to lecture notes and homework.
A course using only xv6 (not Jos) will need to adapt
a few of the lectures, but we hope these are a useful
starting point.
Lecture 1. Operating systems
The first lecture introduces both the general topic of
operating systems and the specific approach of 6.828.
After defining ``operating system,'' the lecture
examines the implementation of a Unix shell
to look at the details the traditional Unix system call interface.
This is relevant to both xv6 and Jos: in the final
Jos labs, students implement a Unix-like interface
and culminating in a Unix shell.
[l1.html | lecture notes]
[os-lab-1.pdf | OS abstractions slides]
Lecture 2. PC hardware and x86 programming
This lecture introduces the PC architecture, the 16- and 32-bit x86,
the stack, and the GCC x86 calling conventions.
It also introduces the pieces of a typical C tool chain--compiler,
assembler, linker, loader--and the Bochs emulator.
Reading: PC Assembly Language
Homework: familiarize with Bochs
[l2.html | lecture notes]
[os-lab-2.pdf | x86 intro slides]
[x86-intro.html | homework]
Lecture 3. Operating system organization
This lecture continues Lecture 1's discussion of what
an operating system does.
An operating system provides a ``virtual computer''
interface to user space programs.
At a high level, the main job of the operating system
is to implement that interface
using the physical computer it runs on.
The lecture discusses four approaches to that job:
monolithic operating systems, microkernels,
virtual machines, and exokernels.
Exokernels might not be worth mentioning
except that the Jos labs are built around one.
Reading: Engler et al., Exokernel: An Operating System Architecture
for Application-Level Resource Management
[l3.html | lecture notes]
Lecture 4. Address spaces using segmentation
This is the first lecture that uses xv6.
It introduces the idea of address spaces and the
details of the x86 segmentation hardware.
It makes the discussion concrete by reading the xv6
source code and watching xv6 execute using the Bochs simulator.
Reading: x86 MMU handout,
xv6: bootasm.S, bootother.S, bootmain.c, main.c, init.c, and setupsegs in proc.c.
Homework: Bochs stack introduction
[l4.html | lecture notes]
[os-lab-3.pdf | x86 virtual memory slides]
[xv6-intro.html | homework]
Lecture 5. Address spaces using page tables
This lecture continues the discussion of address spaces,
examining the other x86 virtual memory mechanism: page tables.
Xv6 does not use page tables, so there is no xv6 here.
Instead, the lecture uses Jos as a concrete example.
An xv6-only course might skip or shorten this discussion.
Reading: x86 manual excerpts
Homework: stuff about gdt
XXX not appropriate; should be in Lecture 4
[l5.html | lecture notes]
Lecture 6. Interrupts and exceptions
How does a user program invoke the operating system kernel?
How does the kernel return to the user program?
What happens when a hardware device needs attention?
This lecture explains the answer to these questions:
interrupt and exception handling.
It explains the x86 trap setup mechanisms and then
examines their use in xv6's SETGATE (mmu.h),
tvinit (trap.c), idtinit (trap.c), vectors.pl, and vectors.S.
It then traces through a call to the system call open:
init.c, usys.S, vector48 and alltraps (vectors.S), trap (trap.c),
syscall (syscall.c),
sys_open (sysfile.c), fetcharg, fetchint, argint, argptr, argstr (syscall.c),
The interrupt controller, briefly:
pic_init and pic_enable (picirq.c).
The timer and keyboard, briefly:
timer_init (timer.c), console_init (console.c).
Enabling and disabling of interrupts.
Reading: x86 manual excerpts,
xv6: trapasm.S, trap.c, syscall.c, and usys.S.
Skim lapic.c, ioapic.c, picirq.c.
Homework: Explain the 35 words on the top of the
stack at first invocation of <code>syscall</code>.
[l-interrupt.html | lecture notes]
[x86-intr.html | homework]
Lecture 7. Multiprocessors and locking
This lecture introduces the problems of
coordination and synchronization on a
multiprocessor
and then the solution of mutual exclusion locks.
Atomic instructions, test-and-set locks,
lock granularity, (the mistake of) recursive locks.
Although xv6 user programs cannot share memory,
the xv6 kernel itself is a program with multiple threads
executing concurrently and sharing memory.
Illustration: the xv6 scheduler's proc_table_lock (proc.c)
and the spin lock implementation (spinlock.c).
Reading: xv6: spinlock.c. Skim mp.c.
Homework: Interaction between locking and interrupts.
Try not disabling interrupts in the disk driver and watch xv6 break.
[l-lock.html | lecture notes]
[xv6-lock.html | homework]
Lecture 8. Threads, processes and context switching
The last lecture introduced some of the issues
in writing threaded programs, using xv6's processes
as an example.
This lecture introduces the issues in implementing
threads, continuing to use xv6 as the example.
The lecture defines a thread of computation as a register
set and a stack. A process is an address space plus one
or more threads of computation sharing that address space.
Thus the xv6 kernel can be viewed as a single process
with many threads (each user process) executing concurrently.
Illustrations: thread switching (swtch.S), scheduler (proc.c), sys_fork (sysproc.c)
Reading: proc.c, swtch.S, sys_fork (sysproc.c)
Homework: trace through stack switching.
[l-threads.html | lecture notes (need to be updated to use swtch)]
[xv6-sched.html | homework]
Lecture 9. Processes and coordination
This lecture introduces the idea of sequence coordination
and then examines the particular solution illustrated by
sleep and wakeup (proc.c).
It introduces and refines a simple
producer/consumer queue to illustrate the
need for sleep and wakeup
and then the sleep and wakeup
implementations themselves.
Reading: proc.c, sys_exec, sys_sbrk, sys_wait, sys_exec, sys_kill (sysproc.c).
Homework: Explain how sleep and wakeup would break
without proc_table_lock. Explain how devices would break
without second lock argument to sleep.
[l-coordination.html | lecture notes]
[xv6-sleep.html | homework]
Lecture 10. Files and disk I/O
This is the first of three file system lectures.
This lecture introduces the basic file system interface
and then considers the on-disk layout of individual files
and the free block bitmap.
Reading: iread, iwrite, fileread, filewrite, wdir, mknod1, and
code related to these calls in fs.c, bio.c, ide.c, and file.c.
Homework: Add print to bwrite to trace every disk write.
Explain the disk writes caused by some simple shell commands.
[l-fs.html | lecture notes]
[xv6-disk.html | homework]
Lecture 11. Naming
The last lecture discussed on-disk file system representation.
This lecture covers the implementation of
file system paths (namei in fs.c)
and also discusses the security problems of a shared /tmp
and symbolic links.
Understanding exec (exec.c) is left as an exercise.
Reading: namei in fs.c, sysfile.c, file.c.
Homework: Explain how to implement symbolic links in xv6.
[l-name.html | lecture notes]
[xv6-names.html | homework]
Lecture 12. High-performance file systems
This lecture is the first of the research paper-based lectures.
It discusses the ``soft updates'' paper,
using xv6 as a concrete example.
* Feedback
If you are interested in using xv6 or have used xv6 in a course,
we would love to hear from you.
If there's anything that we can do to make xv6 easier
to adopt, we'd like to hear about it.
We'd also be interested to hear what worked well and what didn't.
Russ Cox (rsc@swtch.com)<br>
Frans Kaashoek (kaashoek@mit.edu)<br>
Robert Morris (rtm@mit.edu)
You can reach all of us at 6.828-staff@pdos.csail.mit.edu.
xv6 and lecture notes are copyright &copy; 2006-present by Russ Cox,
Frans Kaashoek, and Robert Morris.

BIN
web/lock.pdf Normal file

Binary file not shown.

BIN
web/mem.pdf Normal file

Binary file not shown.

View file

@ -1,70 +0,0 @@
#!/usr/bin/perl
my @lines = <>;
my $text = join('', @lines);
my $title;
if($text =~ /^\*\* (.*?)\n/m){
$title = $1;
$text = $` . $';
}else{
$title = "Untitled";
}
$text =~ s/[ \t]+$//mg;
$text =~ s/^$/<br><br>/mg;
$text =~ s!\b([a-z0-9]+\.(c|s|pl|h))\b!<a href="src/$1.html">$1</a>!g;
$text =~ s!^(Lecture [0-9]+\. .*?)$!<b><i>$1</i></b>!mg;
$text =~ s!^\* (.*?)$!<h2>$1</h2>!mg;
$text =~ s!((<br>)+\n)+<h2>!\n<h2>!g;
$text =~ s!</h2>\n?((<br>)+\n)+!</h2>\n!g;
$text =~ s!((<br>)+\n)+<b>!\n<br><br><b>!g;
$text =~ s!\b\s*--\s*\b!\&ndash;!g;
$text =~ s!\[([^\[\]|]+) \| ([^\[\]]+)\]!<a href="$1">$2</a>!g;
$text =~ s!\[([^ \t]+)\]!<a href="$1">$1</a>!g;
$text =~ s!``!\&ldquo;!g;
$text =~ s!''!\&rdquo;!g;
print <<EOF;
<!-- AUTOMATICALLY GENERATED: EDIT the .txt version, not the .html version -->
<html>
<head>
<title>$title</title>
<style type="text/css"><!--
body {
background-color: white;
color: black;
font-size: medium;
line-height: 1.2em;
margin-left: 0.5in;
margin-right: 0.5in;
margin-top: 0;
margin-bottom: 0;
}
h1 {
text-indent: 0in;
text-align: left;
margin-top: 2em;
font-weight: bold;
font-size: 1.4em;
}
h2 {
text-indent: 0in;
text-align: left;
margin-top: 2em;
font-weight: bold;
font-size: 1.2em;
}
--></style>
</head>
<body bgcolor=#ffffff>
<h1>$title</h1>
<br><br>
EOF
print $text;
print <<EOF;
</body>
</html>
EOF

BIN
web/sched.pdf Normal file

Binary file not shown.

BIN
web/trap.pdf Normal file

Binary file not shown.

BIN
web/unix.pdf Normal file

Binary file not shown.

87
x86.h
View file

@ -90,25 +90,28 @@ readeflags(void)
return eflags;
}
static inline uint
xchg(volatile uint *addr, uint newval)
{
uint result;
// The + in "+m" denotes a read-modify-write operand.
asm volatile("lock; xchgl %0, %1" :
"+m" (*addr), "=a" (result) :
"1" (newval) :
"cc");
return result;
}
static inline void
loadgs(ushort v)
{
asm volatile("movw %0, %%gs" : : "r" (v));
}
static inline uint
rebp(void)
{
uint val;
asm volatile("movl %%ebp,%0" : "=r" (val));
return val;
}
static inline uint
resp(void)
{
uint val;
asm volatile("movl %%esp,%0" : "=r" (val));
return val;
}
static inline void
cli(void)
{
@ -121,66 +124,56 @@ sti(void)
asm volatile("sti");
}
static inline void lcr0(uint val)
static inline uint
xchg(volatile uint *addr, uint newval)
{
uint result;
// The + in "+m" denotes a read-modify-write operand.
asm volatile("lock; xchgl %0, %1" :
"+m" (*addr), "=a" (result) :
"1" (newval) :
"cc");
return result;
}
//PAGEBREAK!
static inline void
lcr0(uint val)
{
asm volatile("movl %0,%%cr0" : : "r" (val));
}
static inline uint rcr0(void)
static inline uint
rcr0(void)
{
uint val;
asm volatile("movl %%cr0,%0" : "=r" (val));
return val;
}
static inline uint rcr2(void)
static inline uint
rcr2(void)
{
uint val;
asm volatile("movl %%cr2,%0" : "=r" (val));
return val;
}
static inline void lcr3(uint val)
static inline void
lcr3(uint val)
{
asm volatile("movl %0,%%cr3" : : "r" (val));
}
static inline uint rcr3(void)
static inline uint
rcr3(void)
{
uint val;
asm volatile("movl %%cr3,%0" : "=r" (val));
return val;
}
static inline void lebp(uint val)
{
asm volatile("movl %0,%%ebp" : : "r" (val));
}
static inline uint rebp(void)
{
uint val;
asm volatile("movl %%ebp,%0" : "=r" (val));
return val;
}
static inline void lesp(uint val)
{
asm volatile("movl %0,%%esp" : : "r" (val));
}
static inline uint resp(void)
{
uint val;
asm volatile("movl %%esp,%0" : "=r" (val));
return val;
}
static inline void nop_pause(void)
{
asm volatile("pause" : :);
}
//PAGEBREAK: 36
// Layout of the trap frame built on the stack by the
// hardware and by trapasm.S, and passed to trap().

BIN
xv6-rev4.tar.gz Normal file

Binary file not shown.

BIN
xv6-rev5.pdf Normal file

Binary file not shown.

BIN
xv6-rev5.tar.gz Normal file

Binary file not shown.