This is the 2021 version.
This part of the 2021 version is slightly different from the 2020 version.
COW presumably should be familiar to the students of the subject class, so this article focuses on the operation process without much analysis. For details, please refer to https://pdos.csail.mit.edu/6.828/2021/labs/cow.html
Let's start experimenting.
Switch to the COW branch
git fetch git checkout cow make clean
First, let's look at what the fork function is doing (the function is in kernel/proc.c):
int fork(void) { int i, pid; struct proc *np; struct proc *p = myproc(); // Allocate process. if((np = allocproc()) == 0){ return -1; } // Copy user memory from parent to child. if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){ freeproc(np); release(&np->lock); return -1; } np->sz = p->sz; // copy saved user registers. *(np->trapframe) = *(p->trapframe); // Cause fork to return 0 in the child. np->trapframe->a0 = 0; // increment reference counts on open file descriptors. for(i = 0; i < NOFILE; i++) if(p->ofile[i]) np->ofile[i] = filedup(p->ofile[i]); np->cwd = idup(p->cwd); safestrcpy(np->name, p->name, sizeof(p->name)); pid = np->pid; release(&np->lock); acquire(&wait_lock); np->parent = p; release(&wait_lock); acquire(&np->lock); np->state = RUNNABLE; release(&np->lock); return pid; }
It can be found that the main modification of copy-on-write is the uvmcopy function, which changes the original direct copy to the reference counting mode.
Therefore, the whole experimental procedure is as follows:
First add in kernel/riscv.h
#define PTE_C (1L << 8) // COW
If you are not familiar with PTE, you can review the previous experiments.
Then add the following to the very beginning of kernel/kalloc.c:
note: Because there is no multi-threading involved here, there is no part of the test point, so there is no locking part. If it is multi-threaded, it should be locked. After all, this reference count is a shared variable.
//KERNBASE == 0x80000000 static int reference_count[(PHYSTOP - KERNBASE) / PGSIZE]; static int idx_rc(uint64 pa){ //get reference cnt return (pa - KERNBASE) / PGSIZE; } void add_rc(uint64 pa){ // add reference_count[idx_rc(pa)]++; } void sub_rc(uint64 pa){ // sub reference_count[idx_rc(pa)]--; }
Then modify the uvmcopy function in kernel/vm.c.
The function turns out to be:
int uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) { pte_t *pte; uint64 pa, i; uint flags; char *mem; for(i = 0; i < sz; i += PGSIZE){ if((pte = walk(old, i, 0)) == 0) panic("uvmcopy: pte should exist"); if((*pte & PTE_V) == 0) panic("uvmcopy: page not present"); pa = PTE2PA(*pte); flags = PTE_FLAGS(*pte); if((mem = kalloc()) == 0) goto err; memmove(mem, (char*)pa, PGSIZE); if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ kfree(mem); goto err; } } return 0; err: uvmunmap(new, 0, i / PGSIZE, 1); return -1; }
Instead of copying the page directly, instead of increasing the reference count, change it to the following:
int uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) { pte_t *pte; uint64 pa, i; uint flags; for(i = 0; i < sz; i += PGSIZE){ if((pte = walk(old, i, 0)) == 0) panic("uvmcopy: pte should exist"); if((*pte & PTE_V) == 0) panic("uvmcopy: page not present"); pa = PTE2PA(*pte); *pte = (*pte & ~PTE_W) | PTE_C; flags = PTE_FLAGS(*pte); if(mappages(new, i, PGSIZE,pa, flags) != 0){ goto err; } add_rc(pa);// Instead of allocating pages, increase the reference count } return 0; err: uvmunmap(new, 0, i / PGSIZE, 1); return -1; }
Then when dereferencing, you need to subtract a reference count. We need to pay attention to this part. This reference is logically located at the physical address, so just change the do_free related part in uvmunmap, that is, modify the kfree function ( in kernel/kalloc.c)
The original code:
void kfree(void *pa) { struct run *r; if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) panic("kfree"); // Fill with junk to catch dangling refs. memset(pa, 1, PGSIZE); r = (struct run*)pa; acquire(&kmem.lock); r->next = kmem.freelist; kmem.freelist = r; release(&kmem.lock); }
Change it to the following, increase it only if the reference count is processed, otherwise subtract the reference count once:
void kfree(void *pa) { struct run *r; if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) panic("kfree"); if(reference_count[idx_rc((uint64)pa)] > 1){// > 1 just sub 1 sub_rc((uint64)pa); return; } // Fill with junk to catch dangling refs. memset(pa, 1, PGSIZE); r = (struct run*)pa; acquire(&kmem.lock); r->next = kmem.freelist; kmem.freelist = r; release(&kmem.lock); reference_count[idx_rc((uint64)pa)] = 0; }
Modify the usertrap function below (in kernel/trap.c)
The original code:
void usertrap(void) { int which_dev = 0; if((r_sstatus() & SSTATUS_SPP) != 0) panic("usertrap: not from user mode"); // send interrupts and exceptions to kerneltrap(), // since we're now in the kernel. w_stvec((uint64)kernelvec); struct proc *p = myproc(); // save user program counter. p->trapframe->epc = r_sepc(); if(r_scause() == 8){ // system call if(p->killed) exit(-1); // sepc points to the ecall instruction, // but we want to return to the next instruction. p->trapframe->epc += 4; // an interrupt will change sstatus &c registers, // so don't enable until done with those registers. intr_on(); syscall(); } else if((which_dev = devintr()) != 0){ // ok } else { printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid); printf(" sepc=%p stval=%p\n", r_sepc(), r_stval()); p->killed = 1; } if(p->killed) exit(-1); // give up the CPU if this is a timer interrupt. if(which_dev == 2) yield(); usertrapret(); }
Now need to increase the processing when the scause is 13 or 15
The changed code is:
void usertrap(void) { int which_dev = 0; if((r_sstatus() & SSTATUS_SPP) != 0) panic("usertrap: not from user mode"); // send interrupts and exceptions to kerneltrap(), // since we're now in the kernel. w_stvec((uint64)kernelvec); struct proc *p = myproc(); // save user program counter. p->trapframe->epc = r_sepc(); if(r_scause() == 8){ // system call if(p->killed) exit(-1); // sepc points to the ecall instruction, // but we want to return to the next instruction. p->trapframe->epc += 4; // an interrupt will change sstatus &c registers, // so don't enable until done with those registers. intr_on(); syscall(); } else if((which_dev = devintr()) != 0){ // ok } else { uint64 va = PGROUNDDOWN(r_stval()); if(va >= MAXVA){ p->killed = 1; exit(-1); } pte_t* pte = walk(p->pagetable,va,0); if(pte == 0){ p->killed = 1; exit(-1); } uint64 pa = PTE2PA(*pte); uint64 flag = PTE_FLAGS(*pte); if((r_scause() == 13 || r_scause() == 15) && (flag & PTE_C) ){ char* mem = kalloc(); if(mem == 0){ printf("here\n"); p->killed = 1; exit(-1); } memmove(mem,(char*)pa,PGSIZE); kfree((char*)pa); *pte = PA2PTE(mem) | (flag & ~PTE_C) | PTE_W; } else{ printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid); printf(" sepc=%p stval=%p\n", r_sepc(), r_stval()); p->killed = 1; } } if(p->killed) exit(-1); // give up the CPU if this is a timer interrupt. if(which_dev == 2) yield(); usertrapret(); }
This part of the content is to allocate pages, modify the flag bits, and call kfree to process, reducing the reference count once.
Then modify the copyout function of kernel/vm.c.
The original code is:
int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) { uint64 n, va0, pa0; while(len > 0){ va0 = PGROUNDDOWN(dstva); pa0 = walkaddr(pagetable, va0); if(pa0 == 0) return -1; n = PGSIZE - (dstva - va0); if(n > len) n = len; memmove((void *)(pa0 + (dstva - va0)), src, n); len -= n; src += n; dstva = va0 + PGSIZE; } return 0; }
Now change to:
int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) { uint64 n, va0, pa0, flag; pte_t* pte; while(len > 0){ va0 = PGROUNDDOWN(dstva); if(va0 >= MAXVA) return -1; pte = walk(pagetable,va0,0); if(pte == 0) return -1; pa0 = PTE2PA(*pte); flag = PTE_FLAGS(*pte); if(flag & PTE_C){ char* mem = kalloc(); if(mem == 0){ return -1; } memmove(mem,(char*)pa0,PGSIZE); kfree((char*)pa0); *pte = PA2PTE((uint64)mem) | (flag & ~PTE_C) | PTE_W; pa0 = (uint64)mem; } if(pa0 == 0) return -1; n = PGSIZE - (dstva - va0); if(n > len) n = len; memmove((void *)(pa0 + (dstva - va0)), src, n); len -= n; src += n; dstva = va0 + PGSIZE; } return 0; }
Don't forget to change the kalloc function, located in (kernel/kalloc.c)
Originally:
void * kalloc(void) { struct run *r; acquire(&kmem.lock); r = kmem.freelist; if(r) kmem.freelist = r->next; release(&kmem.lock); if(r) memset((char*)r, 5, PGSIZE); // fill with junk return (void*)r; }
It is now necessary to initialize the reference count (set to 1) when allocating pages
That is:
void * kalloc(void) { struct run *r; acquire(&kmem.lock); r = kmem.freelist; if(r) kmem.freelist = r->next; release(&kmem.lock); if(r) memset((char*)r, 5, PGSIZE); // fill with junk if(r) reference_count[idx_rc((uint64)r)] = 1; return (void*)r; }
Finally add the declaration in kernel/defs.h:
// kalloc.c void add_rc(uint64); void sub_rc(uint64); // vm.c pte_t* walk(pagetable_t, uint64,int);
Here are the final experimental results:

references:
https://github.com/jlu-xiurui/MIT6.S081-2021-FALL/tree/master/lab6-cow