os161-1.99
 All Data Structures
kmalloc.c
00001 /*
00002  * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
00003  *      The President and Fellows of Harvard College.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 1. Redistributions of source code must retain the above copyright
00009  *    notice, this list of conditions and the following disclaimer.
00010  * 2. Redistributions in binary form must reproduce the above copyright
00011  *    notice, this list of conditions and the following disclaimer in the
00012  *    documentation and/or other materials provided with the distribution.
00013  * 3. Neither the name of the University nor the names of its contributors
00014  *    may be used to endorse or promote products derived from this software
00015  *    without specific prior written permission.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include <types.h>
00031 #include <lib.h>
00032 #include <spinlock.h>
00033 #include <vm.h>
00034 
00035 /*
00036  * Kernel malloc.
00037  */
00038 
00039 
00040 static
00041 void
00042 fill_deadbeef(void *vptr, size_t len)
00043 {
00044         uint32_t *ptr = vptr;
00045         size_t i;
00046 
00047         for (i=0; i<len/sizeof(uint32_t); i++) {
00048                 ptr[i] = 0xdeadbeef;
00049         }
00050 }
00051 
00052 ////////////////////////////////////////////////////////////
00053 //
00054 // Pool-based subpage allocator.
00055 //
00056 // It works like this:
00057 //
00058 //    We allocate one page at a time and fill it with objects of size k,
00059 //    for various k. Each page has its own freelist, maintained by a
00060 //    linked list in the first word of each object. Each page also has a
00061 //    freecount, so we know when the page is completely free and can 
00062 //    release it.
00063 //
00064 //    No assumptions are made about the sizes k; they need not be
00065 //    powers of two. Note, however, that malloc must always return
00066 //    pointers aligned to the maximum alignment requirements of the
00067 //    platform; thus block sizes must at least be multiples of 4,
00068 //    preferably 8. They must also be at least sizeof(struct
00069 //    freelist). It is only worth defining an additional block size if
00070 //    more blocks would fit on a page than with the existing block
00071 //    sizes, and large numbers of items of the new size are allocated.
00072 //
00073 //    The free counts and addresses of the pages are maintained in
00074 //    another list.  Maintaining this table is a nuisance, because it
00075 //    cannot recursively use the subpage allocator. (We could probably
00076 //    make that work, but it would be painful.)
00077 //
00078 
00079 #undef  SLOW    /* consistency checks */
00080 #undef SLOWER   /* lots of consistency checks */
00081 
00082 ////////////////////////////////////////
00083 
00084 #if PAGE_SIZE == 4096
00085 
00086 #define NSIZES 8
00087 static const size_t sizes[NSIZES] = { 16, 32, 64, 128, 256, 512, 1024, 2048 };
00088 
00089 #define SMALLEST_SUBPAGE_SIZE 16
00090 #define LARGEST_SUBPAGE_SIZE 2048
00091 
00092 #elif PAGE_SIZE == 8192
00093 #error "No support for 8k pages (yet?)"
00094 #else
00095 #error "Odd page size"
00096 #endif
00097 
00098 ////////////////////////////////////////
00099 
00100 struct freelist {
00101         struct freelist *next;
00102 };
00103 
00104 struct pageref {
00105         struct pageref *next_samesize;
00106         struct pageref *next_all;
00107         vaddr_t pageaddr_and_blocktype;
00108         uint16_t freelist_offset;
00109         uint16_t nfree;
00110 };
00111 
00112 #define INVALID_OFFSET   (0xffff)
00113 
00114 #define PR_PAGEADDR(pr)  ((pr)->pageaddr_and_blocktype & PAGE_FRAME)
00115 #define PR_BLOCKTYPE(pr) ((pr)->pageaddr_and_blocktype & ~PAGE_FRAME)
00116 #define MKPAB(pa, blk)   (((pa)&PAGE_FRAME) | ((blk) & ~PAGE_FRAME))
00117 
00118 ////////////////////////////////////////
00119 
00120 /*
00121  * This is cheesy. 
00122  *
00123  * The problem is not that it's wasteful - we can only allocate whole
00124  * pages of pageref structures at a time anyway. The problem is that
00125  * we really ought to be able to have more than one of these pages.
00126  *
00127  * However, for the time being, one page worth of pagerefs gives us
00128  * 256 pagerefs; this lets us manage 256 * 4k = 1M of kernel heap.
00129  * That would be twice as much memory as we get for *everything*.
00130  * Thus, we will cheat and not allow any mechanism for having a second
00131  * page of pageref structs.
00132  *
00133  * Then, since the size is fixed at one page, we'll simplify a bit
00134  * further by allocating the page in the kernel BSS instead of calling
00135  * alloc_kpages to get it.
00136  */
00137 
00138 #define NPAGEREFS (PAGE_SIZE / sizeof(struct pageref))
00139 static struct pageref pagerefs[NPAGEREFS];
00140 
00141 #define INUSE_WORDS (NPAGEREFS/32)
00142 static uint32_t pagerefs_inuse[INUSE_WORDS];
00143 
00144 static
00145 struct pageref *
00146 allocpageref(void)
00147 {
00148         unsigned i,j;
00149         uint32_t k;
00150 
00151         for (i=0; i<INUSE_WORDS; i++) {
00152                 if (pagerefs_inuse[i]==0xffffffff) {
00153                         /* full */
00154                         continue;
00155                 }
00156                 for (k=1,j=0; k!=0; k<<=1,j++) {
00157                         if ((pagerefs_inuse[i] & k)==0) {
00158                                 pagerefs_inuse[i] |= k;
00159                                 return &pagerefs[i*32 + j];
00160                         }
00161                 }
00162                 KASSERT(0);
00163         }
00164 
00165         /* ran out */
00166         return NULL;
00167 }
00168 
00169 static
00170 void
00171 freepageref(struct pageref *p)
00172 {
00173         size_t i, j;
00174         uint32_t k;
00175 
00176         j = p-pagerefs;
00177         KASSERT(j < NPAGEREFS);  /* note: j is unsigned, don't test < 0 */
00178         i = j/32;
00179         k = ((uint32_t)1) << (j%32);
00180         KASSERT((pagerefs_inuse[i] & k) != 0);
00181         pagerefs_inuse[i] &= ~k;
00182 }
00183 
00184 ////////////////////////////////////////
00185 
00186 static struct pageref *sizebases[NSIZES];
00187 static struct pageref *allbase;
00188 
00189 ////////////////////////////////////////
00190 
00191 /*
00192  * Use one spinlock for the whole thing. Making parts of the kmalloc
00193  * logic per-cpu is worthwhile for scalability; however, for the time
00194  * being at least we won't, because it adds a lot of complexity and in
00195  * OS/161 performance and scalability aren't super-critical.
00196  */
00197 
00198 static struct spinlock kmalloc_spinlock = SPINLOCK_INITIALIZER;
00199 
00200 ////////////////////////////////////////
00201 
00202 /* SLOWER implies SLOW */
00203 #ifdef SLOWER
00204 #ifndef SLOW
00205 #define SLOW
00206 #endif
00207 #endif
00208 
00209 #ifdef SLOW
00210 static
00211 void
00212 checksubpage(struct pageref *pr)
00213 {
00214         vaddr_t prpage, fla;
00215         struct freelist *fl;
00216         int blktype;
00217         int nfree=0;
00218 
00219         KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
00220 
00221         if (pr->freelist_offset == INVALID_OFFSET) {
00222                 KASSERT(pr->nfree==0);
00223                 return;
00224         }
00225 
00226         prpage = PR_PAGEADDR(pr);
00227         blktype = PR_BLOCKTYPE(pr);
00228 
00229         KASSERT(pr->freelist_offset < PAGE_SIZE);
00230         KASSERT(pr->freelist_offset % sizes[blktype] == 0);
00231 
00232         fla = prpage + pr->freelist_offset;
00233         fl = (struct freelist *)fla;
00234 
00235         for (; fl != NULL; fl = fl->next) {
00236                 fla = (vaddr_t)fl;
00237                 KASSERT(fla >= prpage && fla < prpage + PAGE_SIZE);
00238                 KASSERT((fla-prpage) % sizes[blktype] == 0);
00239                 KASSERT(fla >= MIPS_KSEG0);
00240                 KASSERT(fla < MIPS_KSEG1);
00241                 nfree++;
00242         }
00243         KASSERT(nfree==pr->nfree);
00244 }
00245 #else
00246 #define checksubpage(pr) ((void)(pr))
00247 #endif
00248 
00249 #ifdef SLOWER
00250 static
00251 void
00252 checksubpages(void)
00253 {
00254         struct pageref *pr;
00255         int i;
00256         unsigned sc=0, ac=0;
00257 
00258         KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
00259 
00260         for (i=0; i<NSIZES; i++) {
00261                 for (pr = sizebases[i]; pr != NULL; pr = pr->next_samesize) {
00262                         checksubpage(pr);
00263                         KASSERT(sc < NPAGEREFS);
00264                         sc++;
00265                 }
00266         }
00267 
00268         for (pr = allbase; pr != NULL; pr = pr->next_all) {
00269                 checksubpage(pr);
00270                 KASSERT(ac < NPAGEREFS);
00271                 ac++;
00272         }
00273 
00274         KASSERT(sc==ac);
00275 }
00276 #else
00277 #define checksubpages() 
00278 #endif
00279 
00280 ////////////////////////////////////////
00281 
00282 static
00283 void
00284 dumpsubpage(struct pageref *pr)
00285 {
00286         vaddr_t prpage, fla;
00287         struct freelist *fl;
00288         int blktype;
00289         unsigned i, n, index;
00290         uint32_t freemap[PAGE_SIZE / (SMALLEST_SUBPAGE_SIZE*32)];
00291 
00292         checksubpage(pr);
00293         KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
00294 
00295         /* clear freemap[] */
00296         for (i=0; i<sizeof(freemap)/sizeof(freemap[0]); i++) {
00297                 freemap[i] = 0;
00298         }
00299 
00300         prpage = PR_PAGEADDR(pr);
00301         blktype = PR_BLOCKTYPE(pr);
00302 
00303         /* compute how many bits we need in freemap and assert we fit */
00304         n = PAGE_SIZE / sizes[blktype];
00305         KASSERT(n <= 32*sizeof(freemap)/sizeof(freemap[0]));
00306 
00307         if (pr->freelist_offset != INVALID_OFFSET) {
00308                 fla = prpage + pr->freelist_offset;
00309                 fl = (struct freelist *)fla;
00310 
00311                 for (; fl != NULL; fl = fl->next) {
00312                         fla = (vaddr_t)fl;
00313                         index = (fla-prpage) / sizes[blktype];
00314                         KASSERT(index<n);
00315                         freemap[index/32] |= (1<<(index%32));
00316                 }
00317         }
00318 
00319         kprintf("at 0x%08lx: size %-4lu  %u/%u free\n", 
00320                 (unsigned long)prpage, (unsigned long) sizes[blktype],
00321                 (unsigned) pr->nfree, n);
00322         kprintf("   ");
00323         for (i=0; i<n; i++) {
00324                 int val = (freemap[i/32] & (1<<(i%32)))!=0;
00325                 kprintf("%c", val ? '.' : '*');
00326                 if (i%64==63 && i<n-1) {
00327                         kprintf("\n   ");
00328                 }
00329         }
00330         kprintf("\n");
00331 }
00332 
00333 void
00334 kheap_printstats(void)
00335 {
00336         struct pageref *pr;
00337 
00338         /* print the whole thing with interrupts off */
00339         spinlock_acquire(&kmalloc_spinlock);
00340 
00341         kprintf("Subpage allocator status:\n");
00342 
00343         for (pr = allbase; pr != NULL; pr = pr->next_all) {
00344                 dumpsubpage(pr);
00345         }
00346 
00347         spinlock_release(&kmalloc_spinlock);
00348 }
00349 
00350 ////////////////////////////////////////
00351 
00352 static
00353 void
00354 remove_lists(struct pageref *pr, int blktype)
00355 {
00356         struct pageref **guy;
00357 
00358         KASSERT(blktype>=0 && blktype<NSIZES);
00359 
00360         for (guy = &sizebases[blktype]; *guy; guy = &(*guy)->next_samesize) {
00361                 checksubpage(*guy);
00362                 if (*guy == pr) {
00363                         *guy = pr->next_samesize;
00364                         break;
00365                 }
00366         }
00367 
00368         for (guy = &allbase; *guy; guy = &(*guy)->next_all) {
00369                 checksubpage(*guy);
00370                 if (*guy == pr) {
00371                         *guy = pr->next_all;
00372                         break;
00373                 }
00374         }
00375 }
00376 
00377 static
00378 inline
00379 int blocktype(size_t sz)
00380 {
00381         unsigned i;
00382         for (i=0; i<NSIZES; i++) {
00383                 if (sz <= sizes[i]) {
00384                         return i;
00385                 }
00386         }
00387 
00388         panic("Subpage allocator cannot handle allocation of size %lu\n", 
00389               (unsigned long)sz);
00390 
00391         // keep compiler happy
00392         return 0;
00393 }
00394 
00395 static
00396 void *
00397 subpage_kmalloc(size_t sz)
00398 {
00399         unsigned blktype;       // index into sizes[] that we're using
00400         struct pageref *pr;     // pageref for page we're allocating from
00401         vaddr_t prpage;         // PR_PAGEADDR(pr)
00402         vaddr_t fla;            // free list entry address
00403         struct freelist *volatile fl;   // free list entry
00404         void *retptr;           // our result
00405 
00406         volatile int i;
00407 
00408 
00409         blktype = blocktype(sz);
00410         sz = sizes[blktype];
00411 
00412         spinlock_acquire(&kmalloc_spinlock);
00413 
00414         checksubpages();
00415 
00416         for (pr = sizebases[blktype]; pr != NULL; pr = pr->next_samesize) {
00417 
00418                 /* check for corruption */
00419                 KASSERT(PR_BLOCKTYPE(pr) == blktype);
00420                 checksubpage(pr);
00421 
00422                 if (pr->nfree > 0) {
00423 
00424                 doalloc: /* comes here after getting a whole fresh page */
00425 
00426                         KASSERT(pr->freelist_offset < PAGE_SIZE);
00427                         prpage = PR_PAGEADDR(pr);
00428                         fla = prpage + pr->freelist_offset;
00429                         fl = (struct freelist *)fla;
00430 
00431                         retptr = fl;
00432                         fl = fl->next;
00433                         pr->nfree--;
00434 
00435                         if (fl != NULL) {
00436                                 KASSERT(pr->nfree > 0);
00437                                 fla = (vaddr_t)fl;
00438                                 KASSERT(fla - prpage < PAGE_SIZE);
00439                                 pr->freelist_offset = fla - prpage;
00440                         }
00441                         else {
00442                                 KASSERT(pr->nfree == 0);
00443                                 pr->freelist_offset = INVALID_OFFSET;
00444                         }
00445 
00446                         checksubpages();
00447 
00448                         spinlock_release(&kmalloc_spinlock);
00449                         return retptr;
00450                 }
00451         }
00452 
00453         /*
00454          * No page of the right size available.
00455          * Make a new one.
00456          *
00457          * We release the spinlock while calling alloc_kpages. This
00458          * avoids deadlock if alloc_kpages needs to come back here.
00459          * Note that this means things can change behind our back...
00460          */
00461 
00462         spinlock_release(&kmalloc_spinlock);
00463         prpage = alloc_kpages(1);
00464         if (prpage==0) {
00465                 /* Out of memory. */
00466                 kprintf("kmalloc: Subpage allocator couldn't get a page\n"); 
00467                 return NULL;
00468         }
00469         spinlock_acquire(&kmalloc_spinlock);
00470 
00471         pr = allocpageref();
00472         if (pr==NULL) {
00473                 /* Couldn't allocate accounting space for the new page. */
00474                 spinlock_release(&kmalloc_spinlock);
00475                 free_kpages(prpage);
00476                 kprintf("kmalloc: Subpage allocator couldn't get pageref\n"); 
00477                 return NULL;
00478         }
00479 
00480         pr->pageaddr_and_blocktype = MKPAB(prpage, blktype);
00481         pr->nfree = PAGE_SIZE / sizes[blktype];
00482 
00483         /*
00484          * Note: fl is volatile because the MIPS toolchain we were
00485          * using in spring 2001 attempted to optimize this loop and
00486          * blew it. Making fl volatile inhibits the optimization.
00487          */
00488 
00489         fla = prpage;
00490         fl = (struct freelist *)fla;
00491         fl->next = NULL;
00492         for (i=1; i<pr->nfree; i++) {
00493                 fl = (struct freelist *)(fla + i*sizes[blktype]);
00494                 fl->next = (struct freelist *)(fla + (i-1)*sizes[blktype]);
00495                 KASSERT(fl != fl->next);
00496         }
00497         fla = (vaddr_t) fl;
00498         pr->freelist_offset = fla - prpage;
00499         KASSERT(pr->freelist_offset == (pr->nfree-1)*sizes[blktype]);
00500 
00501         pr->next_samesize = sizebases[blktype];
00502         sizebases[blktype] = pr;
00503 
00504         pr->next_all = allbase;
00505         allbase = pr;
00506 
00507         /* This is kind of cheesy, but avoids duplicating the alloc code. */
00508         goto doalloc;
00509 }
00510 
00511 static
00512 int
00513 subpage_kfree(void *ptr)
00514 {
00515         int blktype;            // index into sizes[] that we're using
00516         vaddr_t ptraddr;        // same as ptr
00517         struct pageref *pr;     // pageref for page we're freeing in
00518         vaddr_t prpage;         // PR_PAGEADDR(pr)
00519         vaddr_t fla;            // free list entry address
00520         struct freelist *fl;    // free list entry
00521         vaddr_t offset;         // offset into page
00522 
00523         ptraddr = (vaddr_t)ptr;
00524 
00525         spinlock_acquire(&kmalloc_spinlock);
00526 
00527         checksubpages();
00528 
00529         for (pr = allbase; pr; pr = pr->next_all) {
00530                 prpage = PR_PAGEADDR(pr);
00531                 blktype = PR_BLOCKTYPE(pr);
00532 
00533                 /* check for corruption */
00534                 KASSERT(blktype>=0 && blktype<NSIZES);
00535                 checksubpage(pr);
00536 
00537                 if (ptraddr >= prpage && ptraddr < prpage + PAGE_SIZE) {
00538                         break;
00539                 }
00540         }
00541 
00542         if (pr==NULL) {
00543                 /* Not on any of our pages - not a subpage allocation */
00544                 spinlock_release(&kmalloc_spinlock);
00545                 return -1;
00546         }
00547 
00548         offset = ptraddr - prpage;
00549 
00550         /* Check for proper positioning and alignment */
00551         if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) {
00552                 panic("kfree: subpage free of invalid addr %p\n", ptr);
00553         }
00554 
00555         /*
00556          * Clear the block to 0xdeadbeef to make it easier to detect
00557          * uses of dangling pointers.
00558          */
00559         fill_deadbeef(ptr, sizes[blktype]);
00560 
00561         /*
00562          * We probably ought to check for free twice by seeing if the block
00563          * is already on the free list. But that's expensive, so we don't.
00564          */
00565 
00566         fla = prpage + offset;
00567         fl = (struct freelist *)fla;
00568         if (pr->freelist_offset == INVALID_OFFSET) {
00569                 fl->next = NULL;
00570         } else {
00571                 fl->next = (struct freelist *)(prpage + pr->freelist_offset);
00572         }
00573         pr->freelist_offset = offset;
00574         pr->nfree++;
00575 
00576         KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]);
00577         if (pr->nfree == PAGE_SIZE / sizes[blktype]) {
00578                 /* Whole page is free. */
00579                 remove_lists(pr, blktype);
00580                 freepageref(pr);
00581                 /* Call free_kpages without kmalloc_spinlock. */
00582                 spinlock_release(&kmalloc_spinlock);
00583                 free_kpages(prpage);
00584         }
00585         else {
00586                 spinlock_release(&kmalloc_spinlock);
00587         }
00588 
00589 #ifdef SLOWER /* Don't get the lock unless checksubpages does something. */
00590         spinlock_acquire(&kmalloc_spinlock);
00591         checksubpages();
00592         spinlock_release(&kmalloc_spinlock);
00593 #endif
00594 
00595         return 0;
00596 }
00597 
00598 //
00599 ////////////////////////////////////////////////////////////
00600 
00601 void *
00602 kmalloc(size_t sz)
00603 {
00604         if (sz>=LARGEST_SUBPAGE_SIZE) {
00605                 unsigned long npages;
00606                 vaddr_t address;
00607 
00608                 /* Round up to a whole number of pages. */
00609                 npages = (sz + PAGE_SIZE - 1)/PAGE_SIZE;
00610                 address = alloc_kpages(npages);
00611                 if (address==0) {
00612                         return NULL;
00613                 }
00614 
00615                 return (void *)address;
00616         }
00617 
00618         return subpage_kmalloc(sz);
00619 }
00620 
00621 void
00622 kfree(void *ptr)
00623 {
00624         /*
00625          * Try subpage first; if that fails, assume it's a big allocation.
00626          */
00627         if (ptr == NULL) {
00628                 return;
00629         } else if (subpage_kfree(ptr)) {
00630                 KASSERT((vaddr_t)ptr%PAGE_SIZE==0);
00631                 free_kpages((vaddr_t)ptr);
00632         }
00633 }
00634 
 All Data Structures