kern/vm/kmalloc.c

root/kern/vm/kmalloc.c

/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS

This source file includes following definitions.
fill_deadbeef
allocpageref
freepageref
checksubpage
checksubpages
dumpsubpage
kheap_printstats
remove_lists
blocktype
subpage_kmalloc
subpage_kfree
kmalloc
kfree
   1 /*
   2  * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
   3  *      The President and Fellows of Harvard College.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29 
  30 #include <types.h>
  31 #include <lib.h>
  32 #include <spinlock.h>
  33 #include <vm.h>
  34 
  35 /*
  36  * Kernel malloc.
  37  */
  38 
  39 
  40 static
  41 void
  42 fill_deadbeef(void *vptr, size_t len)
  43 {
  44         uint32_t *ptr = vptr;
  45         size_t i;
  46 
  47         for (i=0; i<len/sizeof(uint32_t); i++) {
  48                 ptr[i] = 0xdeadbeef;
  49         }
  50 }
  51 
  52 ////////////////////////////////////////////////////////////
  53 //
  54 // Pool-based subpage allocator.
  55 //
  56 // It works like this:
  57 //
  58 //    We allocate one page at a time and fill it with objects of size k,
  59 //    for various k. Each page has its own freelist, maintained by a
  60 //    linked list in the first word of each object. Each page also has a
  61 //    freecount, so we know when the page is completely free and can 
  62 //    release it.
  63 //
  64 //    No assumptions are made about the sizes k; they need not be
  65 //    powers of two. Note, however, that malloc must always return
  66 //    pointers aligned to the maximum alignment requirements of the
  67 //    platform; thus block sizes must at least be multiples of 4,
  68 //    preferably 8. They must also be at least sizeof(struct
  69 //    freelist). It is only worth defining an additional block size if
  70 //    more blocks would fit on a page than with the existing block
  71 //    sizes, and large numbers of items of the new size are allocated.
  72 //
  73 //    The free counts and addresses of the pages are maintained in
  74 //    another list.  Maintaining this table is a nuisance, because it
  75 //    cannot recursively use the subpage allocator. (We could probably
  76 //    make that work, but it would be painful.)
  77 //
  78 
  79 #undef  SLOW    /* consistency checks */
  80 #undef SLOWER   /* lots of consistency checks */
  81 
  82 ////////////////////////////////////////
  83 
  84 #if PAGE_SIZE == 4096
  85 
  86 #define NSIZES 8
  87 static const size_t sizes[NSIZES] = { 16, 32, 64, 128, 256, 512, 1024, 2048 };
  88 
  89 #define SMALLEST_SUBPAGE_SIZE 16
  90 #define LARGEST_SUBPAGE_SIZE 2048
  91 
  92 #elif PAGE_SIZE == 8192
  93 #error "No support for 8k pages (yet?)"
  94 #else
  95 #error "Odd page size"
  96 #endif
  97 
  98 ////////////////////////////////////////
  99 
 100 struct freelist {
 101         struct freelist *next;
 102 };
 103 
 104 struct pageref {
 105         struct pageref *next_samesize;
 106         struct pageref *next_all;
 107         vaddr_t pageaddr_and_blocktype;
 108         uint16_t freelist_offset;
 109         uint16_t nfree;
 110 };
 111 
 112 #define INVALID_OFFSET   (0xffff)
 113 
 114 #define PR_PAGEADDR(pr)  ((pr)->pageaddr_and_blocktype & PAGE_FRAME)
 115 #define PR_BLOCKTYPE(pr) ((pr)->pageaddr_and_blocktype & ~PAGE_FRAME)
 116 #define MKPAB(pa, blk)   (((pa)&PAGE_FRAME) | ((blk) & ~PAGE_FRAME))
 117 
 118 ////////////////////////////////////////
 119 
 120 /*
 121  * This is cheesy. 
 122  *
 123  * The problem is not that it's wasteful - we can only allocate whole
 124  * pages of pageref structures at a time anyway. The problem is that
 125  * we really ought to be able to have more than one of these pages.
 126  *
 127  * However, for the time being, one page worth of pagerefs gives us
 128  * 256 pagerefs; this lets us manage 256 * 4k = 1M of kernel heap.
 129  * That would be twice as much memory as we get for *everything*.
 130  * Thus, we will cheat and not allow any mechanism for having a second
 131  * page of pageref structs.
 132  *
 133  * Then, since the size is fixed at one page, we'll simplify a bit
 134  * further by allocating the page in the kernel BSS instead of calling
 135  * alloc_kpages to get it.
 136  */
 137 
 138 #define NPAGEREFS (PAGE_SIZE / sizeof(struct pageref))
 139 static struct pageref pagerefs[NPAGEREFS];
 140 
 141 #define INUSE_WORDS (NPAGEREFS/32)
 142 static uint32_t pagerefs_inuse[INUSE_WORDS];
 143 
 144 static
 145 struct pageref *
 146 allocpageref(void)
 147 {
 148         unsigned i,j;
 149         uint32_t k;
 150 
 151         for (i=0; i<INUSE_WORDS; i++) {
 152                 if (pagerefs_inuse[i]==0xffffffff) {
 153                         /* full */
 154                         continue;
 155                 }
 156                 for (k=1,j=0; k!=0; k<<=1,j++) {
 157                         if ((pagerefs_inuse[i] & k)==0) {
 158                                 pagerefs_inuse[i] |= k;
 159                                 return &pagerefs[i*32 + j];
 160                         }
 161                 }
 162                 KASSERT(0);
 163         }
 164 
 165         /* ran out */
 166         return NULL;
 167 }
 168 
 169 static
 170 void
 171 freepageref(struct pageref *p)
 172 {
 173         size_t i, j;
 174         uint32_t k;
 175 
 176         j = p-pagerefs;
 177         KASSERT(j < NPAGEREFS);  /* note: j is unsigned, don't test < 0 */
 178         i = j/32;
 179         k = ((uint32_t)1) << (j%32);
 180         KASSERT((pagerefs_inuse[i] & k) != 0);
 181         pagerefs_inuse[i] &= ~k;
 182 }
 183 
 184 ////////////////////////////////////////
 185 
 186 static struct pageref *sizebases[NSIZES];
 187 static struct pageref *allbase;
 188 
 189 ////////////////////////////////////////
 190 
 191 /*
 192  * Use one spinlock for the whole thing. Making parts of the kmalloc
 193  * logic per-cpu is worthwhile for scalability; however, for the time
 194  * being at least we won't, because it adds a lot of complexity and in
 195  * OS/161 performance and scalability aren't super-critical.
 196  */
 197 
 198 static struct spinlock kmalloc_spinlock = SPINLOCK_INITIALIZER;
 199 
 200 ////////////////////////////////////////
 201 
 202 /* SLOWER implies SLOW */
 203 #ifdef SLOWER
 204 #ifndef SLOW
 205 #define SLOW
 206 #endif
 207 #endif
 208 
 209 #ifdef SLOW
 210 static
 211 void
 212 checksubpage(struct pageref *pr)
 213 {
 214         vaddr_t prpage, fla;
 215         struct freelist *fl;
 216         int blktype;
 217         int nfree=0;
 218 
 219         KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
 220 
 221         if (pr->freelist_offset == INVALID_OFFSET) {
 222                 KASSERT(pr->nfree==0);
 223                 return;
 224         }
 225 
 226         prpage = PR_PAGEADDR(pr);
 227         blktype = PR_BLOCKTYPE(pr);
 228 
 229         KASSERT(pr->freelist_offset < PAGE_SIZE);
 230         KASSERT(pr->freelist_offset % sizes[blktype] == 0);
 231 
 232         fla = prpage + pr->freelist_offset;
 233         fl = (struct freelist *)fla;
 234 
 235         for (; fl != NULL; fl = fl->next) {
 236                 fla = (vaddr_t)fl;
 237                 KASSERT(fla >= prpage && fla < prpage + PAGE_SIZE);
 238                 KASSERT((fla-prpage) % sizes[blktype] == 0);
 239                 KASSERT(fla >= MIPS_KSEG0);
 240                 KASSERT(fla < MIPS_KSEG1);
 241                 nfree++;
 242         }
 243         KASSERT(nfree==pr->nfree);
 244 }
 245 #else
 246 #define checksubpage(pr) ((void)(pr))
 247 #endif
 248 
 249 #ifdef SLOWER
 250 static
 251 void
 252 checksubpages(void)
 253 {
 254         struct pageref *pr;
 255         int i;
 256         unsigned sc=0, ac=0;
 257 
 258         KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
 259 
 260         for (i=0; i<NSIZES; i++) {
 261                 for (pr = sizebases[i]; pr != NULL; pr = pr->next_samesize) {
 262                         checksubpage(pr);
 263                         KASSERT(sc < NPAGEREFS);
 264                         sc++;
 265                 }
 266         }
 267 
 268         for (pr = allbase; pr != NULL; pr = pr->next_all) {
 269                 checksubpage(pr);
 270                 KASSERT(ac < NPAGEREFS);
 271                 ac++;
 272         }
 273 
 274         KASSERT(sc==ac);
 275 }
 276 #else
 277 #define checksubpages() 
 278 #endif
 279 
 280 ////////////////////////////////////////
 281 
 282 static
 283 void
 284 dumpsubpage(struct pageref *pr)
 285 {
 286         vaddr_t prpage, fla;
 287         struct freelist *fl;
 288         int blktype;
 289         unsigned i, n, index;
 290         uint32_t freemap[PAGE_SIZE / (SMALLEST_SUBPAGE_SIZE*32)];
 291 
 292         checksubpage(pr);
 293         KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
 294 
 295         /* clear freemap[] */
 296         for (i=0; i<sizeof(freemap)/sizeof(freemap[0]); i++) {
 297                 freemap[i] = 0;
 298         }
 299 
 300         prpage = PR_PAGEADDR(pr);
 301         blktype = PR_BLOCKTYPE(pr);
 302 
 303         /* compute how many bits we need in freemap and assert we fit */
 304         n = PAGE_SIZE / sizes[blktype];
 305         KASSERT(n <= 32*sizeof(freemap)/sizeof(freemap[0]));
 306 
 307         if (pr->freelist_offset != INVALID_OFFSET) {
 308                 fla = prpage + pr->freelist_offset;
 309                 fl = (struct freelist *)fla;
 310 
 311                 for (; fl != NULL; fl = fl->next) {
 312                         fla = (vaddr_t)fl;
 313                         index = (fla-prpage) / sizes[blktype];
 314                         KASSERT(index<n);
 315                         freemap[index/32] |= (1<<(index%32));
 316                 }
 317         }
 318 
 319         kprintf("at 0x%08lx: size %-4lu  %u/%u free\n", 
 320                 (unsigned long)prpage, (unsigned long) sizes[blktype],
 321                 (unsigned) pr->nfree, n);
 322         kprintf("   ");
 323         for (i=0; i<n; i++) {
 324                 int val = (freemap[i/32] & (1<<(i%32)))!=0;
 325                 kprintf("%c", val ? '.' : '*');
 326                 if (i%64==63 && i<n-1) {
 327                         kprintf("\n   ");
 328                 }
 329         }
 330         kprintf("\n");
 331 }
 332 
 333 void
 334 kheap_printstats(void)
 335 {
 336         struct pageref *pr;
 337 
 338         /* print the whole thing with interrupts off */
 339         spinlock_acquire(&kmalloc_spinlock);
 340 
 341         kprintf("Subpage allocator status:\n");
 342 
 343         for (pr = allbase; pr != NULL; pr = pr->next_all) {
 344                 dumpsubpage(pr);
 345         }
 346 
 347         spinlock_release(&kmalloc_spinlock);
 348 }
 349 
 350 ////////////////////////////////////////
 351 
 352 static
 353 void
 354 remove_lists(struct pageref *pr, int blktype)
 355 {
 356         struct pageref **guy;
 357 
 358         KASSERT(blktype>=0 && blktype<NSIZES);
 359 
 360         for (guy = &sizebases[blktype]; *guy; guy = &(*guy)->next_samesize) {
 361                 checksubpage(*guy);
 362                 if (*guy == pr) {
 363                         *guy = pr->next_samesize;
 364                         break;
 365                 }
 366         }
 367 
 368         for (guy = &allbase; *guy; guy = &(*guy)->next_all) {
 369                 checksubpage(*guy);
 370                 if (*guy == pr) {
 371                         *guy = pr->next_all;
 372                         break;
 373                 }
 374         }
 375 }
 376 
 377 static
 378 inline
 379 int blocktype(size_t sz)
 380 {
 381         unsigned i;
 382         for (i=0; i<NSIZES; i++) {
 383                 if (sz <= sizes[i]) {
 384                         return i;
 385                 }
 386         }
 387 
 388         panic("Subpage allocator cannot handle allocation of size %lu\n", 
 389               (unsigned long)sz);
 390 
 391         // keep compiler happy
 392         return 0;
 393 }
 394 
 395 static
 396 void *
 397 subpage_kmalloc(size_t sz)
 398 {
 399         unsigned blktype;       // index into sizes[] that we're using
 400         struct pageref *pr;     // pageref for page we're allocating from
 401         vaddr_t prpage;         // PR_PAGEADDR(pr)
 402         vaddr_t fla;            // free list entry address
 403         struct freelist *volatile fl;   // free list entry
 404         void *retptr;           // our result
 405 
 406         volatile int i;
 407 
 408 
 409         blktype = blocktype(sz);
 410         sz = sizes[blktype];
 411 
 412         spinlock_acquire(&kmalloc_spinlock);
 413 
 414         checksubpages();
 415 
 416         for (pr = sizebases[blktype]; pr != NULL; pr = pr->next_samesize) {
 417 
 418                 /* check for corruption */
 419                 KASSERT(PR_BLOCKTYPE(pr) == blktype);
 420                 checksubpage(pr);
 421 
 422                 if (pr->nfree > 0) {
 423 
 424                 doalloc: /* comes here after getting a whole fresh page */
 425 
 426                         KASSERT(pr->freelist_offset < PAGE_SIZE);
 427                         prpage = PR_PAGEADDR(pr);
 428                         fla = prpage + pr->freelist_offset;
 429                         fl = (struct freelist *)fla;
 430 
 431                         retptr = fl;
 432                         fl = fl->next;
 433                         pr->nfree--;
 434 
 435                         if (fl != NULL) {
 436                                 KASSERT(pr->nfree > 0);
 437                                 fla = (vaddr_t)fl;
 438                                 KASSERT(fla - prpage < PAGE_SIZE);
 439                                 pr->freelist_offset = fla - prpage;
 440                         }
 441                         else {
 442                                 KASSERT(pr->nfree == 0);
 443                                 pr->freelist_offset = INVALID_OFFSET;
 444                         }
 445 
 446                         checksubpages();
 447 
 448                         spinlock_release(&kmalloc_spinlock);
 449                         return retptr;
 450                 }
 451         }
 452 
 453         /*
 454          * No page of the right size available.
 455          * Make a new one.
 456          *
 457          * We release the spinlock while calling alloc_kpages. This
 458          * avoids deadlock if alloc_kpages needs to come back here.
 459          * Note that this means things can change behind our back...
 460          */
 461 
 462         spinlock_release(&kmalloc_spinlock);
 463         prpage = alloc_kpages(1);
 464         if (prpage==0) {
 465                 /* Out of memory. */
 466                 kprintf("kmalloc: Subpage allocator couldn't get a page\n"); 
 467                 return NULL;
 468         }
 469         spinlock_acquire(&kmalloc_spinlock);
 470 
 471         pr = allocpageref();
 472         if (pr==NULL) {
 473                 /* Couldn't allocate accounting space for the new page. */
 474                 spinlock_release(&kmalloc_spinlock);
 475                 free_kpages(prpage);
 476                 kprintf("kmalloc: Subpage allocator couldn't get pageref\n"); 
 477                 return NULL;
 478         }
 479 
 480         pr->pageaddr_and_blocktype = MKPAB(prpage, blktype);
 481         pr->nfree = PAGE_SIZE / sizes[blktype];
 482 
 483         /*
 484          * Note: fl is volatile because the MIPS toolchain we were
 485          * using in spring 2001 attempted to optimize this loop and
 486          * blew it. Making fl volatile inhibits the optimization.
 487          */
 488 
 489         fla = prpage;
 490         fl = (struct freelist *)fla;
 491         fl->next = NULL;
 492         for (i=1; i<pr->nfree; i++) {
 493                 fl = (struct freelist *)(fla + i*sizes[blktype]);
 494                 fl->next = (struct freelist *)(fla + (i-1)*sizes[blktype]);
 495                 KASSERT(fl != fl->next);
 496         }
 497         fla = (vaddr_t) fl;
 498         pr->freelist_offset = fla - prpage;
 499         KASSERT(pr->freelist_offset == (pr->nfree-1)*sizes[blktype]);
 500 
 501         pr->next_samesize = sizebases[blktype];
 502         sizebases[blktype] = pr;
 503 
 504         pr->next_all = allbase;
 505         allbase = pr;
 506 
 507         /* This is kind of cheesy, but avoids duplicating the alloc code. */
 508         goto doalloc;
 509 }
 510 
 511 static
 512 int
 513 subpage_kfree(void *ptr)
 514 {
 515         int blktype;            // index into sizes[] that we're using
 516         vaddr_t ptraddr;        // same as ptr
 517         struct pageref *pr;     // pageref for page we're freeing in
 518         vaddr_t prpage;         // PR_PAGEADDR(pr)
 519         vaddr_t fla;            // free list entry address
 520         struct freelist *fl;    // free list entry
 521         vaddr_t offset;         // offset into page
 522 
 523         ptraddr = (vaddr_t)ptr;
 524 
 525         spinlock_acquire(&kmalloc_spinlock);
 526 
 527         checksubpages();
 528 
 529         for (pr = allbase; pr; pr = pr->next_all) {
 530                 prpage = PR_PAGEADDR(pr);
 531                 blktype = PR_BLOCKTYPE(pr);
 532 
 533                 /* check for corruption */
 534                 KASSERT(blktype>=0 && blktype<NSIZES);
 535                 checksubpage(pr);
 536 
 537                 if (ptraddr >= prpage && ptraddr < prpage + PAGE_SIZE) {
 538                         break;
 539                 }
 540         }
 541 
 542         if (pr==NULL) {
 543                 /* Not on any of our pages - not a subpage allocation */
 544                 spinlock_release(&kmalloc_spinlock);
 545                 return -1;
 546         }
 547 
 548         offset = ptraddr - prpage;
 549 
 550         /* Check for proper positioning and alignment */
 551         if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) {
 552                 panic("kfree: subpage free of invalid addr %p\n", ptr);
 553         }
 554 
 555         /*
 556          * Clear the block to 0xdeadbeef to make it easier to detect
 557          * uses of dangling pointers.
 558          */
 559         fill_deadbeef(ptr, sizes[blktype]);
 560 
 561         /*
 562          * We probably ought to check for free twice by seeing if the block
 563          * is already on the free list. But that's expensive, so we don't.
 564          */
 565 
 566         fla = prpage + offset;
 567         fl = (struct freelist *)fla;
 568         if (pr->freelist_offset == INVALID_OFFSET) {
 569                 fl->next = NULL;
 570         } else {
 571                 fl->next = (struct freelist *)(prpage + pr->freelist_offset);
 572         }
 573         pr->freelist_offset = offset;
 574         pr->nfree++;
 575 
 576         KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]);
 577         if (pr->nfree == PAGE_SIZE / sizes[blktype]) {
 578                 /* Whole page is free. */
 579                 remove_lists(pr, blktype);
 580                 freepageref(pr);
 581                 /* Call free_kpages without kmalloc_spinlock. */
 582                 spinlock_release(&kmalloc_spinlock);
 583                 free_kpages(prpage);
 584         }
 585         else {
 586                 spinlock_release(&kmalloc_spinlock);
 587         }
 588 
 589 #ifdef SLOWER /* Don't get the lock unless checksubpages does something. */
 590         spinlock_acquire(&kmalloc_spinlock);
 591         checksubpages();
 592         spinlock_release(&kmalloc_spinlock);
 593 #endif
 594 
 595         return 0;
 596 }
 597 
 598 //
 599 ////////////////////////////////////////////////////////////
 600 
 601 void *
 602 kmalloc(size_t sz)
 603 {
 604         if (sz>=LARGEST_SUBPAGE_SIZE) {
 605                 unsigned long npages;
 606                 vaddr_t address;
 607 
 608                 /* Round up to a whole number of pages. */
 609                 npages = (sz + PAGE_SIZE - 1)/PAGE_SIZE;
 610                 address = alloc_kpages(npages);
 611                 if (address==0) {
 612                         return NULL;
 613                 }
 614 
 615                 return (void *)address;
 616         }
 617 
 618         return subpage_kmalloc(sz);
 619 }
 620 
 621 void
 622 kfree(void *ptr)
 623 {
 624         /*
 625          * Try subpage first; if that fails, assume it's a big allocation.
 626          */
 627         if (ptr == NULL) {
 628                 return;
 629         } else if (subpage_kfree(ptr)) {
 630                 KASSERT((vaddr_t)ptr%PAGE_SIZE==0);
 631                 free_kpages((vaddr_t)ptr);
 632         }
 633 }
 634
/* [<][>][^][v][top][bottom][index][help] */