1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // stb_malloc.h 0.01 -- public domain multi-heap malloc -- Sean Barrett, 2012
29 // WARNING -- IN THIS VERSION THE STBM_TC STUFF IS UNTESTED AND
30 // CROSS-THREAD FREES AREN'T CODED -- IGGY DOESN'T USE THOSE FEATURES
31 // AND THAT'S ALL I'VE CODED/TESTED
33 // A malloc "implementation" that you need to do a bit of configuring
34 // on to actually make a malloc.
38 // Define the preprocessor macros STB_MALLOC_IMPLEMENTATION
39 // in *one* .c/.cpp file before #including this file, e.g.:
41 // #define STB_MALLOC_IMPLEMENTATION
42 // #include "stb_malloc.h"
44 // Also optionally define the symbols listed in the section
45 // "Optional definitions" below.
49 // - TLSF for mid-sized allocations for O(1) allocation & freeing
50 // - large allocations go to user-defined "system allocator"
51 // - small allocations (<= pointer-sized) slab-allocated (no coalescing)
52 // - no constraints on system allocator (e.g. page-aligned NOT required)
53 // - multiple separate heaps which don't interfragment
54 // - can free heap all-at-once without freeing individual allocations
55 // - can free without specifying which heap it came from
57 // - must create one thread cache per thread per heap that you want cached
58 // - you must create/manage any thread-local variables yourself
59 // - almost every allocation offers a 16-bit user-defined "header" field
60 // - overhead of 8 bytes in 32-bit, 16 bytes in 64-bit
61 // - half that for allocations of 4 bytes in 32-bit, 8 bytes in 64-bit
62 // - but they get no 16-bit user field
63 // - plus overhead for allocations that aren't multiples of alignment
64 // - all configuration at run-time
65 // - choose per-heap minimum alignment at heap creation
66 // - provide mutex handles or not for thread-safety / non-safety
67 // - enable file/line recording on the fly (mixed in one heap)
68 // - enable trailing guard bytes on the fly (mixed in one heap)
69 // - enable variable-sized user-defined debug data on the fly (mixed)
70 // - all on-the-fly-enables incur significant extra size in malloc header
71 // - compile time configuration of platform features
72 // - definition of a mutex handle and operations to lock/unlock it
73 // - definition of a compare-and-swap for faster cross-thread frees
75 // Four possible models:
76 // - create single-thread-only heaps without mutexes for fast performance
77 // - create mutexed heap with per-thread thread cache for "global" malloc
78 // that shares memory between threads
79 // - create per-thread single-thread-only heap with cross-thread mutex for
80 // fast "global" malloc without sharing memory (no false sharing)
81 // - create M heaps for N threads with per-thread thread caches and
82 // primary mutexes and cross-thread-mutexes
84 // Optional definitions:
87 // The type of a handle to a mutex. It must be comparable against
88 // 0 to mean "is it null". These should be as-efficient-as-possible
89 // process-private mutexes, not cross-process mutexes. If not
90 // defined, allocation heaps are not thread-safe.
93 // "Lock" a mutex handle. A single thread of stb_malloc will
94 // never lock more than one mutex at a time, nor try to lock
95 // the same mutex twice (they are not "reentrant"). Must be
96 // defined if STBM_MUTEX_HANDLE is defined.
99 // Release a previously acquired mutex, as above.
101 // STBM_COMPARE_AND_SWAP32
102 // Define a function that performs an atomic compare-and-swap on
103 // a 32-bit value. (To be specified, since it's unimplemented so far.)
104 // Only used if STBM_MUTEX_HANDLE is defined and a cross-thread
105 // mutex is defined for a given heap.
110 // If your compiler is C99, you do not need to define these.
111 // Otherwise, stb_malloc will try default (32-bit) assignments
112 // for them and validate them at compile time. If they are
113 // incorrect, you will get compile errors, and will need to
114 // define them yourself. STBM_POINTER_SIZE should be 32 or 64.
116 // STBM_LOG2_32BIT(x)
117 // Compute floor of log2 of a 32-bit number, or -1 if 0, i.e.
118 // "31-count_leading_zeroes32(x)". This function is important to
119 // high-performance of the allocator; it is used for computing block
120 // "sizeclasses" and for TLSF O(1) scans. If you do not define it,
121 // stb_malloc uses a small binary tree and a small table lookup.
124 // If you don't define this, stb_malloc uses assert.h and assert().
125 // This is the only C standard library function used by stb_malloc.
128 // You can define this to 'memset' or your own memset replacement;
129 // if not, stb_malloc uses a naive (possibly less efficient)
133 // You can define this to 'memcpy' or your own memcpy replacement;
134 // if not, stb_malloc uses a naive (probably inefficient)
138 // This function (which takes a char*) is called when there is an
139 // extraordinary failure: a corrupt header is detected, guard bytes
140 // are overwritten, or the API is used incorrectly. It is NOT called
141 // if an allocation fails, which is considered a normal behavior.
142 // If not defined, defaults to STBM_ASSERT(0).
145 // Defines an attribute applied to any callback functions, useful
146 // for special linkage conventions.
149 // If defined, supresses the stbm_heap typedef for compilers that
150 // don't like to see duplicate identical typedefs.
153 // If defined, declares all functions as static, so they can only
154 // be accessed from the file that creates the implementation.
157 // If defined, enables internal automatic calls to iggy_heap_check
158 // to validate the heap after every operation (slow).
161 // On The Design Of stb_malloc.h
163 // I've written many allocators (google for 'satoria smalloc' for the 1st).
164 // stb_malloc.h is my most refined, the collision of seven central ideas:
166 // - feature: multiple heaps, with heap-independent freeing
167 // - design: overhead mostly proportional to used memory
168 // - design: reuse-oriented API/implementation (i.e. stb_-library-style)
169 // - design: all malloc options available without compile-time switches
170 // - algorithm: TLSF (two-level segregated fit) for O(1) allocation
171 // - algorithm: boundary-tag coalescing for O(1) frees
172 // - algorithm: thread-caching inspired by TCMalloc
174 // Implementation information appears after the header section.
176 #include "vogl_core.h"
177 #include "vogl_threading.h"
178 #include "stb_malloc.h"
180 #define STBM_ASSERT VOGL_ASSERT
181 #define STBM_FAIL VOGL_FAIL
182 #define STBM_LOG2_32BIT(x) (31 - static_cast<int>(vogl::math::count_leading_zero_bits(x)))
183 #define STBM_MEMSET memset
184 #define STBM_MEMCPY memcpy
186 #if defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__)
187 #define STBM_POINTER_SIZE 64
189 #define STBM_POINTER_SIZE 32
191 #define STBM_UINTPTR uintptr_t
195 // Between needing to minimize overhead and be O(1), we cannot
196 // afford to construct indices over all of the memory space. Also
197 // we cannot guarantee that system allocations are contiguous.
198 // The former means we must store data immediately before the
199 // user pointer; the latter means we don't bother trying to coalesce
200 // across system allocation boundaries.
202 // Allocations that are pointer-sized or smaller are satisfied from
203 // a "homogenous heap" (slab) that does no coalescing.
205 // Allocations >= 1MB are allocated from the system allocator (with
206 // additional padding to allow our header). The threshhold is
207 // configurable and actually defaults to 256KB, but 1MB is the
208 // largest allowed, due to limits of encoding sizes into header.
210 // Allocations outside the ranges above are allocated out of chunks
211 // allocated from the system allocator, using TLSF to locate sufficiently
212 // large regions, using boundary tags to free them.
214 // Specially-aligned blocks waste up to alignment-worth of space
215 // using special markup, rather than creating a larger block and freeing
216 // pieces on either side of it.
218 // Debug blocks have special markup to allow storing __FILE__ and __LINE__
219 // or other user data without recompilation.
221 // Thread-cached blocks are left in the 'allocated' state--they are
222 // essentially a layer over the regular heaps--and they just construct
223 // a singly-linked list in the block's user storage space (which is
224 // always at least pointer-sized).
229 // The allocator is split into five separate layered abstractions.
230 // However, the memory layout is designed accounting for all the layers
231 // at once, so that we can determine from a pointer what state it's
232 // in from any of the layers.
234 // == Layer 1: Raw storage manipulation
236 // stbm__small_* - handles allocations <= sizeof(void*)
237 // stbm__large_* - handles allocations >= 1024*1024
238 // stbm__medium_* - handles other allocations
240 // This layer defines three independent allocators. All three allocators
241 // store their state independently in an stbm_heap structure, and all
242 // use the system allocator defined in the stbm_heap to create underlying
243 // storage. stbm__medium and stbm__large both guarantee allocations meet
244 // the minimum_alignment defined in stbm_heap.
246 // == Layer 2: Heap allocator
250 // Based on the size of the allocation, dispatches to one of the above
251 // allocators. Handles cross-thread frees.
253 // == Layer 3: Thread cache
257 // If defined, allocates from the thread cache, and fills the thread cache
258 // using layer 2. If undefined, calls layer 2.
260 // == Layer 4: Abstract allocations
264 // Handles aligned and debug allocations by making larger allocations
265 // to layer 3, and takes responsibility for setting the 16-bit userdata
270 // Implements the top-level API functions by dispatching to stbm__alloc
271 // functions based on the API function and the internal debug settings
275 // ======================================================================
277 // Memory consists of discontinuous blocks called "system allocations".
278 // System allocations come in two flavors:
280 // LARGE-ALLOC MEDIUM-ALLOC
281 // +---------------+ +----------------+
282 // | system alloc | | system alloc |
283 // | header | | header |
284 // +---------------+ stbm__large +----------------+
285 // | pad-to-align | user pointer | pad-to-align | stbm__medium
286 // |+-------------+| | |+--------------+| user pointer
287 // || large alloc || | || medium alloc || |
288 // || header || | || header || |
289 // |+=============+| | |+==============+| |
290 // || allocated ||<--+ || allocated ||<--+
291 // || storage || || storage ||
293 // || || || || stbm__medium
294 // || || |+--------------+| user pointer
295 // || || || medium alloc || |
296 // || || || header || |
297 // || || |+==============+| |
298 // || || || allocated ||<--+
299 // || || || storage ||
300 // || || |+--------------+|
301 // || || || medium alloc ||
302 // || || || header ||
303 // || || |+--------------+|
304 // || || ||+------------+||
305 // || || |||small chunk ||| stbm__small
306 // || || ||| header ||| user pointer
307 // || || ||+------------+|| |||
308 // || || |||small header||| |||
309 // || || ||+============+|| |||
310 // || || |||small alloc |||<--+||
311 // || || ||+------------+|| ||
312 // || || |||small header||| ||
313 // || || ||+============+|| ||
314 // || || |||small alloc |||<---+|
315 // || || ||+------------+|| |
316 // || || |||small header||| |
317 // || || ||+============+|| |
318 // || || |||small alloc |||<----+
319 // || || ||+------------+||
320 // |+-------------+| ||| ... |||
321 // +---------------+ ||+------------+||
322 // |+--------------+|
323 // || medium alloc ||
324 // Boundaries indicated as || header ||
325 // "=====" are boundaries that |+--------------+|
326 // free and get functions look ||+------------+||
327 // across to read a pointer-sized |||aligned base|||
328 // "prefix" field which contains ||| header ||| stbm__medium
329 // tag bits which explain what ||+------------+|| user pointer
330 // type of pointer/block a user |||pad-to-align||| with explicit
331 // pointer corresponds to. ||+------------+|| alignment
332 // |||align alloc ||| |
334 // ||+============+|| |
335 // ||| aligned |||<---+
336 // Debug blocks are wrapped ||| allocated |||
337 // similarly to the aligned ||| storage |||
338 // block shown on the right. ||| |||
340 // ||+------------+||
341 // |+--------------+| stbm__medium
342 // || medium alloc || user pointer
344 // |+==============+| |
345 // || allocated ||<--+
348 // |+--------------+|
350 // || alloc header ||
351 // |+--------------+|
352 // +----------------+
355 // |due to alignment|
356 // +----------------+
360 // All allocations have one of the following two structures:
362 // low-level header data
363 // uintptr header "prefix"
364 // ==> pointer returned by malloc
369 // low-level header data
370 // uintptr header "prefix"
371 // ==> high-level extra data
373 // high-level header bits
374 // ==> pointer returned by malloc
376 // optional guard bytes
378 // Where the items with ==> are guaranteed to be aligned
379 // to the heap minimum alignment (except for small blocks).
381 // The most general case header bits in the "prefix" field
384 // [xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx only in 64-bit]
385 // uuuuuuuu uuuuuuuu ssssssss sewbpttt
387 // u = user data for medium/large allocated
388 // s = size class for medium allocated
389 // e = extra bit for medium allocated size
391 // ttt = medium (110) small (000) large (100) aligned (001) debug (011)
392 // b = block is allocated (boundary tag for medium allocated & free blocks)
393 // p = previous allocated (boundary tag for medium allocated & free blocks)
394 // w = block is a low-level block containing a high-level "wrapped" block:
395 // internal is either debug, aligned, or a chunk of small blocks
397 // In debug-type blocks, bp are repurposed as sub-type.
399 // The tag bit assignments are chosen to allow accelerating joint
400 // tests ("medium or large" and "debug or aligned").
402 //#define STBM_SELFTEST
407 #define STBM_UNITTEST
410 #if !defined(STBM_DEBUGCHECK) && defined(STBM_UNITTEST)
411 #define STBM_DEBUGCHECK
414 typedef unsigned char stbm__uint8;
416 #if __STDC_VERSION__ >= 199901L
420 typedef uint32_t stbm__uint32;
424 typedef uintptr_t stbm__uintptr;
427 #ifndef STBM_POINTER_SIZE
428 #if UINTPTR_MAX > 0xffffffff
429 #define STBM_POINTER_SIZE 64
431 #define STBM_POINTER_SIZE 32
437 typedef unsigned int stbm__uint32;
441 typedef size_t stbm__uintptr;
444 #ifndef STBM_POINTER_SIZE
445 #define STBM_POINTER_SIZE 32
450 typedef STBM_UINTPTR stbm__uintptr;
454 typedef STBM_UINT32 stbm__uint32;
457 typedef int stbm__check_ptrsize[sizeof(void *) == STBM_POINTER_SIZE / 8 ? 1 : -1];
458 typedef int stbm__check_uintptr[sizeof(stbm__uintptr) == sizeof(void *) ? 1 : -1];
459 typedef int stbm__check_uint32[sizeof(stbm__uint32) == 4 ? 1 : -1];
461 #define STBM__SMALLEST_ALLOCATION (sizeof(void *)) // 4 or 8
462 #define STBM__SMALLEST_BLOCKSIZE (2 * sizeof(void *)) // 8 or 16
463 #define STBM__SMALL_ALIGNMENT (2 * sizeof(void *)) // 8 or 16
464 #define STBM__MEDIUM_SMALLEST_BLOCK (4 * sizeof(void *))
466 #if STBM_POINTER_SIZE == 64
467 #define STBM__SMALLEST_BLOCKSIZE_LOG2 4
469 #define STBM__SMALLEST_BLOCKSIZE_LOG2 3
472 typedef int stbm__check_smallest_log2[(1 << STBM__SMALLEST_BLOCKSIZE_LOG2) == STBM__SMALLEST_BLOCKSIZE ? 1 : -1];
476 #define STBM_ASSERT(x) assert(x)
480 // if this function is called, it represents a bug in the client code
481 // (failing to obey the heap semantics properly in stbm_free)
482 static void STBM_FAIL(const char *message)
488 #if defined(STBM_MUTEX_HANDLE) && !defined(STBM__NO_MUTEX_HANDLE)
490 #define STBM__LOCKING(x) x
493 #if defined(STBM__MUTEX) && (!defined(STBM_MUTEX_ACQUIRE) || !defined(STBM_MUTEX_RELEASE))
494 #error "Must define STBM_MUTEX_ACQUIRE and STBM_MUTEX_RELEASE if defining STBM_MUTEX_HANDLE"
497 #ifndef STBM__LOCKING
498 #define STBM__LOCKING(x)
502 #define STBM__ACQUIRE(lock) \
504 STBM_MUTEX_ACQUIRE(lock); \
506 #define STBM__RELEASE(lock) \
508 STBM_MUTEX_RELEASE(lock); \
511 #define STBM__ACQUIRE(lock)
512 #define STBM__RELEASE(lock)
515 #ifdef STBM_DEBUGCHECK
516 static void stbm__heap_check_locked(stbm_heap *heap);
517 #define STBM__CHECK_LOCKED(heap) stbm__heap_check_locked(heap)
519 #define STBM__CHECK_LOCKED(heap)
522 //////////////////////////////////////////////////////////////////////////////
527 int stbm__is_pow2(size_t num)
529 return (num & (num - 1)) == 0;
532 static size_t stbm__round_up_to_multiple_of_power_of_two(size_t num, size_t pow2)
534 STBM_ASSERT(stbm__is_pow2(pow2));
535 return (num + pow2 - 1) & ~(pow2 - 1);
538 #ifdef STBM_LOG2_32BIT
539 #define stbm__log2_floor(n) STBM_LOG2_32BIT(n)
541 static int stbm__log2_floor(stbm__uint32 n)
543 static signed char log2_4[16] = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
551 return t + log2_4[n];
555 // return the index of a zero bit, or <0 if all bits are set
556 static int stbm__find_any_zero(stbm__uint32 n)
558 // @OPTIMIZE: we can replace this with an intrinsic that finds
559 // any zero in any direction
561 // address of left-most zero bit is address of left-most 1 bit of ~n
562 return stbm__log2_floor(~n);
565 // find the leftmost set bit which has index >= minbitpos counting from left
566 // for TLSF this would be clearer counting from the right, but counting from
567 // the left is guaranteed to be efficient
568 static int stbm__find_leftmost_one_bit_after_skipping(stbm__uint32 value, stbm__uint32 minbitpos)
570 stbm__uint32 mask = 0xffffffff >> minbitpos;
571 return 31 - stbm__log2_floor(value & mask);
574 // align an address to a specific alignment & offset
575 static void *stbm__align_address(void *ptr, size_t align, size_t align_off)
577 stbm__uintptr cur_off;
578 stbm__uintptr align_mask = (stbm__uintptr)align - 1;
583 stbm__uintptr address;
588 cur_off = (v.address & align_mask);
589 v.address += (align_off - cur_off) & align_mask;
590 STBM_ASSERT((v.address & align_mask) == align_off);
596 static void STBM_MEMSET(void *mem, unsigned char value, size_t num_bytes)
598 unsigned char *s = (unsigned char *)mem;
600 for (i = 0; i < num_bytes; ++i)
605 //////////////////////////////////////////////////////////////////////////////
607 // Size-class computation
609 // For simplicity, we use the same 13*32 size classes for both TLSF and for
610 // the thread cache, although they actually mean different things.
612 // For TLSF, we need to split the size class into two
613 // variables, the "index" and the "slot". For the thread
614 // cache, we store them in a single variable.
616 // The size classes are defined by the following logic:
619 // size = 0 + slot * 8
621 // size = (128 << index) + slot * (4 << index)
624 #define STBM__NUM_INDICES 13
625 #define STBM__NUM_SLOTS 32
627 stbm__uint32 stbm__size_classes[STBM__NUM_INDICES][STBM__NUM_SLOTS] =
629 { 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, },
630 { 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 392, 400, 408, 416, 424, 432, 440, 448, 456, 464, 472, 480, 488, 496, 504, },
631 { 512, 528, 544, 560, 576, 592, 608, 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, },
632 { 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408, 1440, 1472, 1504, 1536, 1568, 1600, 1632, 1664, 1696, 1728, 1760, 1792, 1824, 1856, 1888, 1920, 1952, 1984, 2016, },
633 { 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 2624, 2688, 2752, 2816, 2880, 2944, 3008, 3072, 3136, 3200, 3264, 3328, 3392, 3456, 3520, 3584, 3648, 3712, 3776, 3840, 3904, 3968, 4032, },
634 { 4096, 4224, 4352, 4480, 4608, 4736, 4864, 4992, 5120, 5248, 5376, 5504, 5632, 5760, 5888, 6016, 6144, 6272, 6400, 6528, 6656, 6784, 6912, 7040, 7168, 7296, 7424, 7552, 7680, 7808, 7936, 8064, },
635 { 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 10752, 11008, 11264, 11520, 11776, 12032, 12288, 12544, 12800, 13056, 13312, 13568, 13824, 14080, 14336, 14592, 14848, 15104, 15360, 15616, 15872, 16128, },
636 { 16384, 16896, 17408, 17920, 18432, 18944, 19456, 19968, 20480, 20992, 21504, 22016, 22528, 23040, 23552, 24064, 24576, 25088, 25600, 26112, 26624, 27136, 27648, 28160, 28672, 29184, 29696, 30208, 30720, 31232, 31744, 32256, },
637 { 32768, 33792, 34816, 35840, 36864, 37888, 38912, 39936, 40960, 41984, 43008, 44032, 45056, 46080, 47104, 48128, 49152, 50176, 51200, 52224, 53248, 54272, 55296, 56320, 57344, 58368, 59392, 60416, 61440, 62464, 63488, 64512, },
638 { 65536, 67584, 69632, 71680, 73728, 75776, 77824, 79872, 81920, 83968, 86016, 88064, 90112, 92160, 94208, 96256, 98304, 100352, 102400, 104448, 106496, 108544, 110592, 112640, 114688, 116736, 118784, 120832, 122880, 124928, 126976, 129024, },
639 { 131072, 135168, 139264, 143360, 147456, 151552, 155648, 159744, 163840, 167936, 172032, 176128, 180224, 184320, 188416, 192512, 196608, 200704, 204800, 208896, 212992, 217088, 221184, 225280, 229376, 233472, 237568, 241664, 245760, 249856, 253952, 258048, },
640 { 262144, 270336, 278528, 286720, 294912, 303104, 311296, 319488, 327680, 335872, 344064, 352256, 360448, 368640, 376832, 385024, 393216, 401408, 409600, 417792, 425984, 434176, 442368, 450560, 458752, 466944, 475136, 483328, 491520, 499712, 507904, 516096, },
641 { 524288, 540672, 557056, 573440, 589824, 606208, 622592, 638976, 655360, 671744, 688128, 704512, 720896, 737280, 753664, 770048, 786432, 802816, 819200, 835584, 851968, 868352, 884736, 901120, 917504, 933888, 950272, 966656, 983040, 999424, 1015808, 1032192, },
643 #define STBM__SIZECLASS_MAX 1032192
645 #define stbm__size_for_index_slot_calc(i, s) ((stbm__uint32)(((i) == 0 ? (s) * 8 : (128 << (i)) + (s) * (4 << (i)))))
647 #define stbm__size_for_index_slot(i, s) stbm__size_classes[i][s] // @OPTIMIZE: better to use table or _calc?
649 #define stbm__size_for_index_slot(i, s) stbm__size_for_index_slot_calc(i, s)
651 #define stbm__size_for_tc_index(i) stbm__size_classes[0][i]
658 // compute the size class that a given size block can satisfy
659 static stbm__index_slot stbm__index_slot_for_free_block(size_t size)
665 is.slot = (stbm__uint32)(size >> 3);
669 is.index = stbm__log2_floor((stbm__uint32)size) - 7;
670 is.slot = (stbm__uint32)((size - (128 << is.index)) >> (is.index + 2));
672 STBM_ASSERT(is.index < STBM__NUM_INDICES && is.slot < STBM__NUM_SLOTS);
673 STBM_ASSERT(size >= stbm__size_for_index_slot(is.index, is.slot));
677 // compute the size class that is required to satisfy a given request size
678 // computing the correct index requires rounding up in a complex way; maybe
679 // it can be done (but I suspect it requires a divide), so for now instead
680 // of doing it closed-form we'll just compute the round-down one and
681 // "increment" it if needed
682 static stbm__index_slot stbm__index_slot_for_request(size_t size)
684 stbm__index_slot is = stbm__index_slot_for_free_block(size);
685 if (size > stbm__size_for_index_slot(is.index, is.slot))
687 if (++is.slot == STBM__NUM_SLOTS)
693 STBM_ASSERT(is.index < STBM__NUM_INDICES && is.slot < STBM__NUM_SLOTS);
694 STBM_ASSERT(size <= stbm__size_for_index_slot(is.index, is.slot));
698 static int stbm__tc_index_for_free_block(size_t size)
700 stbm__index_slot is = stbm__index_slot_for_free_block(size);
701 return is.index * STBM__NUM_SLOTS + is.slot;
704 static int stbm__tc_index_for_request(size_t size)
706 stbm__index_slot is = stbm__index_slot_for_request(size);
707 return is.index * STBM__NUM_SLOTS + is.slot;
711 static void stbm__test_sizes(void)
714 for (i = 0; i < 13; ++i)
716 for (j = 0; j < 32; ++j)
720 stbm__uint32 size = stbm__size_for_index_slot(i, j);
722 STBM_ASSERT(stbm__size_classes[i][j] == size);
723 STBM_ASSERT(stbm__size_for_index_slot_calc(i, j) == size);
725 // if the user requests a size exactly at a size class, it should return that sizeclass
726 is = stbm__index_slot_for_request(size);
727 STBM_ASSERT(is.index == i && is.slot == j);
729 // if the user requests a size just smaller than the size class, it should return the sizeclass
730 is = stbm__index_slot_for_request(size - 1);
731 STBM_ASSERT(is.index == i && is.slot == j);
733 // if the user requests a size just larger than the size class, it should return the next sizeclass
734 if (i < 12 || j < 31)
736 is = stbm__index_slot_for_request(size + 1);
737 STBM_ASSERT(is.index == i + 1 || is.slot == j + 1);
740 // if a free block is exactly at the size class, it should return that size class
741 is = stbm__index_slot_for_free_block(size);
742 STBM_ASSERT(is.index == i && is.slot == j);
744 // if a free block is just larger than the size class, it should return that size class
745 is = stbm__index_slot_for_free_block(size + 1);
746 STBM_ASSERT(is.index == i && is.slot == j);
748 // if a free block is just smaller than the size class, it should return the previous size class
749 is = stbm__index_slot_for_free_block(size - 1);
750 STBM_ASSERT(is.index + 1 == i || is.slot + 1 == j);
757 //////////////////////////////////////////////////////////////////////////////
759 // Define the block data structures
762 #define STBM__SYSTEM_ALLOC_LARGE 0xface0ff5
763 #define STBM__SYSTEM_ALLOC_MEDIUM 0xfacefac1
765 typedef struct stbm__system_allocation
768 struct stbm__system_allocation *prev;
769 struct stbm__system_allocation *next;
770 void *begin_pointer; // used for both large and medium
771 void *end_pointer; // only used for iterating medium blocks
772 stbm__uintptr size; // RG 12/02/13: this was used only for medium blocks, now it should be set on large blocks too
773 } stbm__system_allocation;
775 typedef struct stbm__small_chunk
777 void *wrapped_user_pointer; // required for heap walking
780 struct stbm__small_chunk *next;
781 struct stbm__small_chunk *prev;
783 struct stbm__small_freeblock *first_free;
784 char *unused_storage;
785 stbm__uint32 num_alloc;
791 void *wrapped_user_pointer;
792 } stbm__aligned_base;
796 void *underlying_allocation;
797 stbm__uintptr prefix;
799 } stbm__aligned_allocated;
804 void *wrapped_user_pointer;
812 stbm__debug_base1 debug1;
814 unsigned short num_guard_bytes;
815 stbm__uint8 guard_value;
817 stbm__uint32 extra_debug_bytes;
820 typedef struct stbm__debug_allocated
824 stbm__debug_base1 *debug1;
825 stbm__debug_base2 *debug2;
826 } underlying_allocation;
827 stbm__uintptr user_size_in_bytes;
828 stbm__uintptr prefix;
830 } stbm__debug_allocated;
832 // stbm__lock allows the user to either specify locks
833 // as structures (e.g. CRITICAL_SECTION) or as mutex
835 typedef STBM_MUTEX_HANDLE stbm__lock;
836 #define STBM__LOCKFIELD(s) (s)
840 // linked list sentinels
841 stbm__small_chunk full;
842 stbm__small_chunk nonfull;
845 stbm__small_chunk *chunkcache;
846 } stbm__heap_smalldata;
850 struct stbm__medium_freeblock *lists[STBM__NUM_INDICES][STBM__NUM_SLOTS]; // 1456 bytes on 32-bit
851 stbm__system_allocation *cached_system_alloc;
852 stbm__uintptr cached_system_alloc_size;
853 stbm__uint32 bitmap[STBM__NUM_INDICES];
854 stbm__uint32 toplevel_bitmap;
855 stbm__uint32 minimum_freeblock_size; // larger of minimum_alignment and STBM__SMALLEST_ALLOCATION
856 } stbm__heap_mediumdata;
858 #define STBM__FREE_STACK_SIZE 16
860 // NB: changes to this struct must be propogated to stbm_heap_size() macro
863 // if data is freed "to" other heaps, they store it here and
864 // let this heap resolve it, to avoid excess contention on the
865 // heap's core data structures
869 void *crossthread_free_stack[STBM__FREE_STACK_SIZE];
870 // pick STBM__FREE_STACK_SIZE so there's probably a cacheline boundary here
871 stbm__uintptr crossthread_free_pointer_stack_pos;
872 char ct_stack_padding[64 - sizeof(stbm__uintptr)];
873 // pad to a cacheline boundary
876 stbm__lock crossthread_mutex;
877 void *crossthread_free_list;
878 char ct_list_padding[64 - sizeof(void *) - sizeof(stbm__lock)];
881 stbm__lock allocation_mutex;
883 // everything below here can be accessed unguarded, assuming
884 // two threads don't try to access a single heap at the same time
887 size_t smallchunk_size; // typically 4KB
888 size_t cur_medium_chunk_size;
889 size_t max_medium_chunk_size;
891 stbm__uint32 num_guard_bytes;
892 stbm__uint32 minimum_alignment;
893 stbm__uint32 large_alloc_threshhold;
895 stbm__uint8 align_all_blocks_to_minimum;
896 stbm__uint8 force_file_line;
897 stbm__uint8 force_debug; // force debug blocks always
898 stbm__uint8 guard_value;
900 stbm__uint8 memset_on_alloc;
901 stbm__uint8 memset_on_free;
902 stbm__uint8 memset_alloc_value;
903 stbm__uint8 memset_free_value;
905 stbm__uint8 full_stats;
906 stbm__uint8 cache_medium_chunks;
907 stbm__uint8 config_padding2;
908 stbm__uint8 config_padding3;
911 stbm__system_allocation system_sentinel;
912 stbm_system_alloc *system_alloc;
913 stbm_system_free *system_free;
917 stbm__uint32 cur_outstanding_allocations_count;
918 stbm__uint32 max_outstanding_allocations_count;
919 stbm__uintptr cur_outstanding_allocations_bytes;
920 stbm__uintptr max_outstanding_allocations_bytes;
921 stbm__uintptr total_allocations_count;
922 stbm__uintptr total_allocations_bytes;
925 stbm__heap_smalldata small;
928 stbm__heap_mediumdata medium;
930 typedef int stbm__heap_size_validate[sizeof(stbm_heap) + 60 <= STBM_HEAP_SIZEOF ? 1 : -1];
932 #define STBM__MIN(a, b) ((a) < (b) ? (a) : (b))
933 #define STBM__MAX(a, b) ((a) > (b) ? (a) : (b))
935 static void stbm__stats_init(stbm_heap *heap)
937 heap->cur_outstanding_allocations_bytes = 0;
938 heap->cur_outstanding_allocations_count = 0;
939 heap->max_outstanding_allocations_bytes = 0;
940 heap->max_outstanding_allocations_count = 0;
941 heap->total_allocations_bytes = 0;
942 heap->total_allocations_count = 0;
945 static void stbm__stats_update_alloc(stbm_heap *heap, size_t bytes)
947 heap->cur_outstanding_allocations_count += 1;
948 heap->cur_outstanding_allocations_bytes += bytes;
949 if (heap->full_stats)
951 heap->max_outstanding_allocations_count = STBM__MAX(heap->max_outstanding_allocations_count, heap->cur_outstanding_allocations_count);
952 heap->max_outstanding_allocations_bytes = STBM__MAX(heap->max_outstanding_allocations_bytes, heap->cur_outstanding_allocations_bytes);
953 heap->total_allocations_count += 1;
954 heap->total_allocations_bytes += bytes;
958 static void stbm__stats_update_free(stbm_heap *heap, size_t bytes)
960 STBM_ASSERT(heap->cur_outstanding_allocations_count >= 1);
961 STBM_ASSERT(heap->cur_outstanding_allocations_bytes >= bytes);
962 heap->cur_outstanding_allocations_count -= 1;
963 heap->cur_outstanding_allocations_bytes -= bytes;
966 //////////////////////////////////////////////////////////////////////////////
968 // Decode the prefix bits
972 #define STBM__TYPE_small 0 // small gets 0 so 8-aligned heap pointer works
973 #define STBM__TYPE_aligned 1
974 #define STBM__TYPE_debug 3
975 #define STBM__TYPE_large 4
976 #define STBM__TYPE_medium 6
977 #define STBM__TYPE_MASK 7
978 #define STBM__TYPE(x) ((x) & STBM__TYPE_MASK)
979 #define STBM__TYPE_med_large(x) (STBM__TYPE(x) & 4)
980 #define STBM__TYPE_align_debug(x) ((x) & 1)
982 // only if allocated (if allocated, must be true)
983 #define STBM__PREFIX_VALID(x) (STBM__TYPE(x) != 2 && STBM__TYPE(x) != 5 && STBM__TYPE(x) != 7)
985 //////////////////////////////////////////////////////////////////////////////
987 // stbm__heap_* -- dispatch layer
989 // This layer dispatches to the three different allocators, and
990 // also manages cross-heap frees.
993 static void *stbm__small_alloc(stbm_heap *heap, size_t size);
994 static void stbm__small_free(stbm_heap *heap, void *ptr);
995 static void *stbm__medium_alloc(stbm_heap *heap, size_t size);
996 static void stbm__medium_free(stbm_heap *heap, void *ptr);
997 static void *stbm__large_alloc(stbm_heap *heap, size_t size);
998 static void stbm__large_free(stbm_heap *heap, void *ptr);
1000 static void *stbm__heap_alloc(stbm_heap *heap, size_t size)
1002 if (size <= STBM__SMALLEST_ALLOCATION)
1003 return stbm__small_alloc(heap, size);
1004 if (size < heap->large_alloc_threshhold)
1005 return stbm__medium_alloc(heap, size);
1007 return stbm__large_alloc(heap, size);
1010 static void stbm__heap_crossthread_free(stbm_heap *src_heap, void *ptr)
1014 // use CAS to push it onto the crossthread stack
1016 STBM_FAIL("crossheap frees not implemented");
1019 static void stbm__heap_free(stbm_heap *safe_heap, stbm_heap *src_heap, void *ptr)
1021 stbm__uintptr prefix;
1024 // - src_heap == safe_heap
1025 // - use regular heap free path
1026 // - src_heap != safe_heap && src_heap->crossthread_handle
1027 // - use cross-thread free path
1028 // - src_heap != safe_heap && src_heap->allocation_handle
1029 // - use regular heap free path
1030 // - src_heap != safe_heap && neither handle defined
1033 if (src_heap != safe_heap)
1035 if (src_heap->crossthread_mutex)
1037 stbm__heap_crossthread_free(src_heap, ptr);
1040 if (!src_heap->allocation_mutex)
1042 STBM_FAIL("Freed block from wrong thread and no mutexes available");
1047 prefix = ((stbm__uintptr *)ptr)[-1];
1048 if (STBM__TYPE(prefix) == STBM__TYPE_medium)
1049 stbm__medium_free(src_heap, ptr);
1050 else if (STBM__TYPE(prefix) == STBM__TYPE_small)
1051 stbm__small_free(src_heap, ptr);
1052 else if (STBM__TYPE(prefix) == STBM__TYPE_large)
1053 stbm__large_free(src_heap, ptr);
1055 STBM_FAIL("Corrupt header found in stbm_free");
1058 // only if STBM__TYPE_debug
1059 #define STBM__DEBUG_1 (0 << 3)
1060 #define STBM__DEBUG_2 (1 << 3)
1061 #define STBM__DEBUG_MASK (3 << 3) // room for future expansion
1062 #define STBM__DEBUG(x) ((x) & STBM__DEBUG_MASK)
1064 // only if STBM__TYPE_debug
1065 #define STBM__DEBUG_MAGIC (0x55 << 8)
1066 #define STBM__DEBUG_SETMAGIC(x) (((x) & ~0xff00) | STBM__DEBUG_MAGIC)
1067 #define STBM__DEBUG_TESTMAGIC(x) (((x) & 0xff00) == STBM__DEBUG_MAGIC)
1069 // only if allocated and not small
1070 #define STBM__USERDATA_BITS(x) (((x) & 0xffffffff) >> 16) // mask is a no-op on 32-bit
1071 #define STBM__USERDATA_SETBITS(x, u) (((x) & 0xffff) | ((u) << 16))
1073 //////////////////////////////////////////////////////////////////////////////
1075 // stbm__small_* -- very tiny allocations, non-coalescing
1077 // 1-4 bytes in 32-bit
1078 // 1-8 bytes in 64-bit
1085 stbm__uintptr prefix;
1086 stbm__small_chunk *chunk;
1088 } stbm__small_allocated;
1090 typedef struct stbm__small_freeblock
1092 stbm__uintptr prefix;
1093 struct stbm__small_freeblock *next_free;
1094 } stbm__small_freeblock;
1096 static void stbm__small_init(stbm_heap *heap)
1098 heap->small.full.next = heap->small.full.prev = &heap->small.full;
1099 heap->small.nonfull.next = heap->small.nonfull.prev = &heap->small.nonfull;
1100 heap->small.chunkcache = 0;
1103 static void stbm__smallchunk_link(stbm__small_chunk *sentinel, stbm__small_chunk *chunk)
1105 chunk->next = sentinel->next;
1106 chunk->prev = sentinel;
1107 sentinel->next->prev = chunk;
1108 sentinel->next = chunk;
1111 static void stbm__smallchunk_unlink(stbm__small_chunk *sentinel, stbm__small_chunk *chunk)
1114 stbm__small_chunk *next = chunk->next;
1115 stbm__small_chunk *prev = chunk->prev;
1121 #define STBM__SMALL_FREE_BLOCK_MAGIC 1
1123 static void stbm__small_free(stbm_heap *heap, void *ptr)
1125 stbm__small_allocated *b = (stbm__small_allocated *)ptr - 1;
1126 stbm__small_chunk *c = b->u.chunk, *chunk_to_free = NULL;
1127 stbm__small_freeblock *f = (stbm__small_freeblock *)b;
1128 STBM_ASSERT(c->num_alloc > 0);
1130 STBM__ACQUIRE(heap->allocation_mutex);
1131 f->next_free = c->first_free;
1132 f->prefix = STBM__SMALL_FREE_BLOCK_MAGIC;
1140 stbm__smallchunk_unlink(&heap->small.full, c);
1141 stbm__smallchunk_link(&heap->small.nonfull, c);
1146 // chunk is empty, free it
1147 STBM_ASSERT(!c->full);
1148 stbm__smallchunk_unlink(&heap->small.nonfull, c);
1149 if (!heap->small.chunkcache)
1151 heap->small.chunkcache = c; // cache one smallblock chunk for hysteresis
1158 stbm__stats_update_free(heap, STBM__SMALLEST_ALLOCATION);
1159 STBM__RELEASE(heap->allocation_mutex);
1160 // avoid nesting mutexes
1162 stbm__medium_free(heap, chunk_to_free);
1165 static void *stbm__small_alloc(stbm_heap *heap, size_t size)
1168 stbm__small_chunk *c;
1169 stbm__small_allocated *b;
1171 STBM__ACQUIRE(heap->allocation_mutex);
1173 // find first non-full chunk of small blocks
1174 c = heap->small.nonfull.next;
1176 // if non-full list is empty, allocate a new small chunk
1177 if (c == &heap->small.nonfull)
1179 if (heap->small.chunkcache)
1181 // if we cached an entirely-empty smallchunk, use that
1182 c = heap->small.chunkcache;
1183 heap->small.chunkcache = NULL;
1187 // avoid nesting mutexes
1188 STBM__RELEASE(heap->allocation_mutex);
1189 c = (stbm__small_chunk *)stbm__medium_alloc(heap, heap->smallchunk_size);
1192 STBM__ACQUIRE(heap->allocation_mutex);
1193 c->wrapped_user_pointer = NULL; // flags which type this is
1195 c->first_free = NULL;
1196 c->next = c->prev = 0;
1199 c->unused_storage = (char *)stbm__align_address((char *)c + heap->smallchunk_size - STBM__SMALL_ALIGNMENT, STBM__SMALL_ALIGNMENT, STBM__SMALL_ALIGNMENT - sizeof(stbm__small_allocated));
1200 STBM_ASSERT(c->unused_storage + STBM__SMALLEST_ALLOCATION <= (char *)c + heap->smallchunk_size);
1202 stbm__smallchunk_link(&heap->small.nonfull, c);
1204 STBM_ASSERT(!c->full); // must be on the nonfull list
1206 // allocate from free list or from unsubdivided storage
1209 stbm__small_freeblock *f = c->first_free;
1210 c->first_free = f->next_free;
1211 b = (stbm__small_allocated *)f;
1212 if (b->u.prefix != STBM__SMALL_FREE_BLOCK_MAGIC)
1213 STBM_FAIL("Corrupt data structure in stbm_alloc");
1218 STBM_ASSERT(c->unused_storage);
1219 b = (stbm__small_allocated *)c->unused_storage;
1221 STBM_ASSERT((char *)b + size <= (char *)c + heap->smallchunk_size);
1222 STBM_ASSERT((char *)b >= (char *)(c + 1));
1223 c->unused_storage -= STBM__SMALLEST_BLOCKSIZE;
1224 STBM_ASSERT(STBM__SMALLEST_BLOCKSIZE == sizeof(stbm__small_freeblock));
1225 if (c->unused_storage < (char *)(c + 1))
1226 c->unused_storage = NULL;
1228 // check if the chunk became full
1229 if (c->first_free == NULL && c->unused_storage == NULL)
1231 stbm__smallchunk_unlink(&heap->small.nonfull, c);
1232 stbm__smallchunk_link(&heap->small.full, c);
1236 stbm__stats_update_alloc(heap, STBM__SMALLEST_ALLOCATION);
1237 STBM__RELEASE(heap->allocation_mutex);
1241 //////////////////////////////////////////////////////////////////////////////
1243 // stbm__system_* tracks system allocations for fast full-heap free
1246 static void stbm__system_init(stbm_heap *heap)
1248 heap->system_sentinel.next = &heap->system_sentinel;
1249 heap->system_sentinel.prev = &heap->system_sentinel;
1252 static void stbm__system_alloc_link(stbm_heap *heap, stbm__system_allocation *alloc, stbm__uint32 magic)
1254 alloc->next = heap->system_sentinel.next;
1255 alloc->prev = &heap->system_sentinel;
1256 heap->system_sentinel.next->prev = alloc;
1257 heap->system_sentinel.next = alloc;
1259 alloc->magic = magic;
1262 static void stbm__system_alloc_unlink(stbm_heap *heap, stbm__system_allocation *alloc, stbm__uint32 magic)
1265 stbm__system_allocation *next = alloc->next;
1266 stbm__system_allocation *prev = alloc->prev;
1271 if (alloc->magic != magic)
1272 STBM_FAIL("Corrupt block in stbm_free");
1275 //////////////////////////////////////////////////////////////////////////////
1277 // stbm__large_* large allocations -- every large allocation
1278 // is a wrapped system allocation
1283 stbm__system_allocation *system_allocation;
1284 stbm__uintptr user_size_in_bytes;
1286 stbm__uintptr prefix;
1288 } stbm__large_allocated;
1290 static void stbm__large_init(stbm_heap *heap)
1292 (void)(sizeof(heap));
1295 static void *stbm__large_alloc(stbm_heap *heap, size_t size)
1297 stbm__system_allocation *s;
1299 stbm__large_allocated *a;
1302 size_t overhead = sizeof(stbm__system_allocation) + sizeof(stbm__large_allocated);
1303 size_t actual = overhead + (heap->minimum_alignment - 1) + size;
1305 s = (stbm__system_allocation *)heap->system_alloc(heap->user_context, actual, &actual);
1311 user_ptr = stbm__align_address((char *)s + overhead, heap->minimum_alignment, 0);
1313 user_size = ((char *)s + actual) - ((char *)user_ptr);
1315 STBM_ASSERT(user_size >= size);
1316 STBM_ASSERT((char *)user_ptr + size <= (char *)s + actual);
1318 STBM__ACQUIRE(heap->allocation_mutex);
1319 stbm__system_alloc_link(heap, s, STBM__SYSTEM_ALLOC_LARGE);
1320 stbm__stats_update_alloc(heap, user_size);
1321 STBM__RELEASE(heap->allocation_mutex);
1323 s->begin_pointer = user_ptr;
1325 a = ((stbm__large_allocated *)user_ptr) - 1;
1327 a->system_allocation = s;
1329 a->prefix = STBM__TYPE_large;
1330 a->user_size_in_bytes = user_size;
1335 static void stbm__large_free(stbm_heap *heap, void *ptr)
1337 stbm__large_allocated *a = ((stbm__large_allocated *)ptr) - 1;
1338 if (a->system_allocation->magic != STBM__SYSTEM_ALLOC_LARGE)
1339 STBM_FAIL("Corrupt block in stbm_free");
1341 STBM__ACQUIRE(heap->allocation_mutex);
1342 stbm__system_alloc_unlink(heap, a->system_allocation, STBM__SYSTEM_ALLOC_LARGE);
1343 stbm__stats_update_free(heap, a->user_size_in_bytes);
1344 STBM__RELEASE(heap->allocation_mutex);
1346 heap->system_free(heap->user_context, a->system_allocation, a->system_allocation->size);
1349 //////////////////////////////////////////////////////////////////////////////
1351 // stbm__medium_* medium allocations with TLSF and boundary-tag coalescing
1354 // only if STBM__TYPE_medium or if on normal-size free list
1355 #define STBM__PREVIOUS_free (0 << 3)
1356 #define STBM__PREVIOUS_alloc (1 << 3)
1357 #define STBM__PREVIOUS_MASK (1 << 3)
1359 // only if STBM__TYPE_medium or if on normal-size free list
1360 #define STBM__BLOCK_free (0 << 4)
1361 #define STBM__BLOCK_alloc (1 << 4)
1362 #define STBM__BLOCK_MASK (1 << 4)
1364 #define STBM__WRAPPED_false (0 << 5)
1365 #define STBM__WRAPPED_true (1 << 5)
1366 #define STBM__WRAPPED_MASK (1 << 5)
1367 #define STBM__IS_WRAPPED(x) ((x) & STBM__WRAPPED_MASK)
1369 // only if allocated and STBM__TYPE_medium
1370 #define STBM__SIZE_DATA(x) (((x) & 0xffff) >> 6)
1371 #define STBM__SIZE_SETDATA(x, s) (((x) & 0x3f) | ((s) << 6)) // doesn't preserve userdata
1373 #define STBM__SIZEDATA_index(x) ((x) >> 6)
1374 #define STBM__SIZEDATA_slot(x) (((x) >> 1) & 31)
1375 #define STBM__SIZEDATA_extra(x) ((x) & 1)
1376 #define STBM__EXTRA_SIZE (STBM__MEDIUM_SMALLEST_BLOCK >> 1)
1378 // only if normal-sized and free -- this limits us to just under 2MB as the largest allowed free block
1379 #define STBM__MED_FREE_BYTES(x) (((x) >> 2) & 0x1FFFF8)
1380 #define STBM__MED_FREE_SIZECLASS(x) ((x) >> 23)
1381 #define STBM__MED_FREE_SETSIZE(x, bytes, sizeclass) (((x) & 0x1f) | ((size_t)(bytes) << 2) | ((size_t)(sizeclass) << 23))
1383 #define STBM__MEDIUM_CHUNK_PRE_HEADER 0x80000000
1385 // the header for a medium chunk
1386 typedef struct stbm__medium_chunk
1388 stbm__system_allocation *system_alloc; // lets us recover the original chunk pointer
1389 stbm__uintptr chunk_length_marker; // tag to allow us to detect that a block is the first block
1390 } stbm__medium_chunk;
1395 stbm__uintptr prefix;
1397 } stbm__medium_allocated;
1399 typedef struct stbm__medium_freeblock
1401 struct stbm__medium_freeblock *prev_free; // aliases with 'heap' in allocated block
1402 stbm__uintptr prefix; // aliases with 'prefix' in allocated block
1403 struct stbm__medium_freeblock *next_free; // aliases with first bytes of 'user data' in allocated block
1405 //stbm__uintptr size_in_bytes_trailing; // aliases with last bytes of 'user data' in allocated block
1406 } stbm__medium_freeblock;
1408 // set bitmap bits starting from the left
1409 #define STBM__TLSF_BITMAP_BIT(x) (1 << (31 - (x)))
1411 static void stbm__medium_init(stbm_heap *heap)
1413 STBM_MEMSET(&heap->medium, 0, sizeof(heap->medium));
1414 heap->medium.minimum_freeblock_size = STBM__MAX(heap->minimum_alignment, STBM__MEDIUM_SMALLEST_BLOCK);
1415 heap->medium.cached_system_alloc = NULL;
1416 heap->medium.cached_system_alloc_size = 0;
1419 // allocate a new chunk from the system allocator, then set up the header
1420 // and footer so it looks like it's surrounded by allocated blocks (so it
1421 // can't coalesce against them). this is called without any locks taken.
1422 static size_t stbm__medium_init_chunk(stbm_heap *heap, stbm__medium_freeblock **ptr_block, size_t freesize, stbm__system_allocation *s)
1426 stbm__medium_chunk *c;
1428 stbm__medium_allocated *tail;
1429 stbm__medium_freeblock *block;
1431 // now we need to create phantom blocks before and after the free region,
1432 // and all of them need to be heap->minimum_alignment aligned
1434 // first we want to create a block immediately after the system allocation region
1435 // find the correctly-aligned user address
1436 start_size = sizeof(*s) + sizeof(stbm__medium_chunk) + sizeof(stbm__medium_allocated);
1437 ptr = stbm__align_address((char *)s + start_size, heap->minimum_alignment, 0);
1439 s->begin_pointer = ptr;
1440 s->size = (size_t)freesize;
1442 // back up to the header that corresponds to this user address
1443 ptr = ((stbm__medium_allocated *)ptr) - 1;
1444 // and then switch to a free block at the same address
1445 block = (stbm__medium_freeblock *)ptr;
1446 // and get the medium chunk header
1447 c = ((stbm__medium_chunk *)ptr) - 1;
1449 // setup the medium chunk header
1450 c->system_alloc = s;
1452 block->prefix = STBM__BLOCK_free | STBM__PREVIOUS_free;
1453 // we don't need to set up the size for block because it's about to be allocated from,
1454 // which will reset it
1456 // now we want to set up a phantom block at the end of the region, called 'tail'
1457 // (a) aligned such that the corresponding user data would be on heap minimum alignment
1458 // (b) be positioned so that it doesn't extend past the end of the allocated block
1460 // find the end of the chunk
1461 ptr = (char *)s + freesize;
1462 // retreat by minimum alignment
1463 ptr = (char *)ptr - heap->minimum_alignment;
1464 // find the first aligned point AFTER that point, which must be before the end of the block
1465 ptr = stbm__align_address(ptr, heap->minimum_alignment, 0);
1466 STBM_ASSERT((char *)ptr < (char *)s + freesize);
1468 s->end_pointer = ptr;
1470 tail = ((stbm__medium_allocated *)ptr) - 1;
1471 STBM_ASSERT((char *)(tail + 1) < (char *)s + freesize);
1473 tail->prefix = STBM__BLOCK_alloc | STBM__PREVIOUS_free;
1474 // we don't need to set up the size for tail because nobody will ever look at it
1476 // change 'freesize' to be the size of block
1477 freesize = (char *)(tail) - (char *)block;
1478 STBM_ASSERT((freesize & (heap->minimum_alignment - 1)) == 0);
1480 // compute proper chunk_length_marker that allows us to recognize a fully-free block
1481 c->chunk_length_marker = STBM__MEDIUM_CHUNK_PRE_HEADER + freesize;
1488 // this is called inside the heap lock, but releases it
1489 static void stbm__medium_free_chunk(stbm_heap *heap, stbm__system_allocation *s)
1491 stbm__system_alloc_unlink(heap, s, STBM__SYSTEM_ALLOC_MEDIUM);
1493 if (heap->cache_medium_chunks == STBM_MEDIUM_CACHE_one)
1495 if (heap->medium.cached_system_alloc == NULL)
1497 heap->medium.cached_system_alloc = s;
1498 heap->medium.cached_system_alloc_size = s->size;
1499 STBM__RELEASE(heap->allocation_mutex);
1504 STBM__RELEASE(heap->allocation_mutex);
1505 heap->system_free(heap->user_context, s, s->size);
1508 // link 'block' to the correct free list and set up its header / footer
1509 static void stbm__medium_link(stbm_heap *heap, struct stbm__medium_freeblock *block, size_t size)
1511 stbm__index_slot is;
1512 stbm__medium_freeblock *list;
1514 STBM_ASSERT(size <= (2 << 20) - 8); // necessary for packing into MED_FREE
1516 if (size >= STBM__SIZECLASS_MAX)
1518 is.index = STBM__NUM_INDICES - 1;
1519 is.slot = STBM__NUM_SLOTS - 1;
1523 is = stbm__index_slot_for_free_block(size);
1526 block->prefix = STBM__MED_FREE_SETSIZE(block->prefix, size, is.index * STBM__NUM_SLOTS + is.slot);
1527 STBM_ASSERT(size == STBM__MED_FREE_BYTES(block->prefix));
1528 STBM_ASSERT(STBM__MED_FREE_SIZECLASS(block->prefix) == (unsigned)is.index * STBM__NUM_SLOTS + is.slot);
1530 list = heap->medium.lists[is.index][is.slot];
1531 heap->medium.lists[is.index][is.slot] = block;
1532 block->next_free = list;
1533 block->prev_free = NULL;
1535 list->prev_free = block;
1538 // list was empty, so set the non-empty bit -- @OPTIMIZE is it faster to always
1539 // set both, or should we check whether the toplevel_bitmap needs setting?
1540 heap->medium.toplevel_bitmap |= (1 << (31 - is.index));
1541 heap->medium.bitmap[is.index] |= (1 << (31 - is.slot));
1544 // set the size at the end of the block as well for O(1) coalescing
1545 ((stbm__uintptr *)((char *)block + size))[-1] = size;
1548 // unlink 'block' from its current free list
1549 static void stbm__medium_unlink(stbm_heap *heap, struct stbm__medium_freeblock *block)
1551 int sizeclass = (int)STBM__MED_FREE_SIZECLASS(block->prefix);
1552 int index = sizeclass >> 5;
1553 int slot = sizeclass & 31;
1555 STBM_ASSERT(heap->medium.lists[index][slot] != NULL);
1557 if (heap->medium.lists[index][slot] != block)
1559 // the block is not the first one on the list
1560 stbm__medium_freeblock *next = block->next_free;
1561 stbm__medium_freeblock *prev = block->prev_free;
1562 STBM_ASSERT(prev != NULL);
1563 prev->next_free = next;
1565 next->prev_free = prev;
1569 // if the block is the first one on the list, we have to do the
1570 // linked-list logic differently, but also it may be the ONLY one
1571 //on the list, so unlinking it may make the list empty, in which
1572 // case we need to update the bitmaps
1573 stbm__medium_freeblock *next = block->next_free;
1574 heap->medium.lists[index][slot] = next;
1576 next->prev_free = NULL;
1579 // the list is empty, so update the bitmap
1580 heap->medium.bitmap[index] &= ~(1 << (31 - slot));
1581 if (heap->medium.bitmap[index] == 0)
1582 heap->medium.toplevel_bitmap &= ~(1 << (31 - index));
1587 static void stbm__medium_free(stbm_heap *heap, void *ptr)
1589 stbm__uintptr previous_tag;
1590 stbm__medium_allocated *alloc = ((stbm__medium_allocated *)ptr) - 1;
1591 stbm__medium_freeblock *block = (stbm__medium_freeblock *)alloc;
1592 stbm__medium_freeblock *follow;
1593 stbm__uintptr sizedata = STBM__SIZE_DATA(alloc->prefix);
1595 stbm__index_slot is;
1596 is.index = (int)STBM__SIZEDATA_index(sizedata);
1597 is.slot = (int)STBM__SIZEDATA_slot(sizedata);
1598 size = stbm__size_for_index_slot(is.index, is.slot);
1599 if (STBM__SIZEDATA_extra(sizedata))
1600 size += STBM__EXTRA_SIZE;
1602 STBM_ASSERT((size & (heap->minimum_alignment - 1)) == 0);
1604 STBM__ACQUIRE(heap->allocation_mutex);
1605 STBM__CHECK_LOCKED(heap);
1606 stbm__stats_update_free(heap, size - sizeof(*alloc));
1608 // merge with following block
1609 follow = (stbm__medium_freeblock *)((char *)block + size);
1610 STBM_ASSERT((follow->prefix & STBM__PREVIOUS_MASK) == STBM__PREVIOUS_alloc);
1611 if ((follow->prefix & STBM__BLOCK_MASK) == STBM__BLOCK_free)
1613 size_t blocksize = STBM__MED_FREE_BYTES(follow->prefix);
1614 stbm__medium_unlink(heap, follow);
1616 STBM__CHECK_LOCKED(heap);
1618 STBM_ASSERT((size & (heap->minimum_alignment - 1)) == 0);
1620 // merge with previous block
1621 if ((block->prefix & STBM__PREVIOUS_MASK) == STBM__PREVIOUS_free)
1623 size_t prev_length = ((stbm__uintptr *)block)[-1];
1624 // it's possible this is actually the first block in the medium chunk,
1625 // which is indicated by the bogus size of the previous block
1626 if (prev_length < STBM__MEDIUM_CHUNK_PRE_HEADER)
1628 stbm__medium_freeblock *prev = (stbm__medium_freeblock *)((char *)block - prev_length);
1629 STBM_ASSERT((prev->prefix & STBM__BLOCK_MASK) == STBM__BLOCK_free);
1630 STBM_ASSERT(STBM__MED_FREE_BYTES(prev->prefix) == prev_length);
1632 stbm__medium_unlink(heap, prev);
1633 size = prev_length + size;
1635 STBM__CHECK_LOCKED(heap);
1636 previous_tag = STBM__PREVIOUS_alloc;
1640 if (heap->cache_medium_chunks != STBM_MEDIUM_CACHE_all)
1642 // if we're the first block in the chunk, we might have
1643 // now freed ALL the data in the chunk, in which case we should
1644 // go ahead and free the chunk entirely
1646 if (size == prev_length - STBM__MEDIUM_CHUNK_PRE_HEADER)
1648 // if this happens, we want to recover the chunk header
1649 // and free it, but @TODO with a one-item cache to reduce churn
1650 stbm__medium_free_chunk(heap, (((stbm__medium_chunk *)block) - 1)->system_alloc);
1651 // the above call releases the heap lock
1655 previous_tag = STBM__PREVIOUS_free;
1659 previous_tag = STBM__PREVIOUS_alloc;
1660 STBM_ASSERT((size & (heap->minimum_alignment - 1)) == 0);
1662 block->prefix = STBM__BLOCK_free | previous_tag;
1663 ((stbm__medium_allocated *)((char *)block + size))->prefix &= ~STBM__PREVIOUS_MASK;
1664 #if STBM__PREVIOUS_free != 0
1665 #error "above computation is wrong"
1668 stbm__medium_link(heap, block, size);
1670 STBM__CHECK_LOCKED(heap);
1671 STBM__RELEASE(heap->allocation_mutex);
1674 static void *stbm__medium_alloc(stbm_heap *heap, size_t size)
1676 stbm__medium_freeblock *block;
1677 stbm__medium_allocated *alloc;
1678 size_t freesize, expected_size;
1680 stbm__index_slot is;
1682 // switch from user-requested size to physically-needed size
1684 size += sizeof(stbm__medium_allocated);
1686 // force the size to be a multiple of the minimum alignment
1687 size = (size + heap->minimum_alignment - 1) & ~(heap->minimum_alignment - 1);
1689 is = stbm__index_slot_for_request(size);
1691 STBM_ASSERT(is.index < STBM__NUM_INDICES);
1692 STBM_ASSERT(stbm__size_for_index_slot(is.index, is.slot) >= size);
1693 STBM_ASSERT((stbm__size_for_index_slot(is.index, is.slot) & (STBM__EXTRA_SIZE - 1)) == 0);
1695 STBM__ACQUIRE(heap->allocation_mutex);
1696 STBM__CHECK_LOCKED(heap);
1698 // check whether there's a free block in that slot. note that
1699 // our bitmaps are indexed with smaller sizes to the left so that
1700 // the bitscan can be from the left so it's efficient on all
1702 slot = stbm__find_leftmost_one_bit_after_skipping(heap->medium.bitmap[is.index], is.slot);
1706 // if we got a valid slot, then we can use that block
1708 block = heap->medium.lists[index][slot];
1709 freesize = STBM__MED_FREE_BYTES(block->prefix);
1710 STBM_ASSERT(freesize >= size && freesize >= stbm__size_for_index_slot(is.index, is.slot));
1711 stbm__medium_unlink(heap, block);
1715 // otherwise, we have to find a larger index with a non-empty slot
1716 index = stbm__find_leftmost_one_bit_after_skipping(heap->medium.toplevel_bitmap, is.index + 1);
1720 // if we got a valid index, find the smallest non-empty slot
1721 slot = stbm__find_leftmost_one_bit_after_skipping(heap->medium.bitmap[index], 0);
1722 STBM_ASSERT(slot < 32);
1723 block = heap->medium.lists[index][slot];
1724 freesize = STBM__MED_FREE_BYTES(block->prefix);
1725 STBM_ASSERT(freesize >= size && freesize >= stbm__size_for_index_slot(is.index, is.slot));
1726 stbm__medium_unlink(heap, block);
1730 // if no sufficiently sufficiently large blocks exist, allocate a new chunk
1731 size_t size_with_overhead;
1732 stbm__system_allocation *s;
1733 freesize = heap->cur_medium_chunk_size;
1735 // we'll compute expected_size twice on this path because we compute
1736 // it redundantly below, but it's better than computing it earlier
1738 expected_size = stbm__size_for_index_slot(is.index, is.slot);
1740 size_with_overhead = expected_size + heap->minimum_alignment + 256; // inexact extra padding for chunk headers etc
1742 if (heap->medium.cached_system_alloc_size >= STBM__MAX(freesize, size_with_overhead))
1744 s = (stbm__system_allocation *)heap->medium.cached_system_alloc;
1745 freesize = heap->medium.cached_system_alloc_size;
1746 STBM_ASSERT(s != NULL);
1747 heap->medium.cached_system_alloc = NULL;
1748 heap->medium.cached_system_alloc_size = 0;
1749 freesize = stbm__medium_init_chunk(heap, &block, freesize, s);
1753 if (size_with_overhead > freesize)
1755 freesize = size_with_overhead;
1759 // double the amount we allocate next time
1760 heap->cur_medium_chunk_size = STBM__MIN(heap->cur_medium_chunk_size * 2, heap->max_medium_chunk_size);
1763 if (heap->medium.cached_system_alloc)
1765 s = (stbm__system_allocation *)heap->medium.cached_system_alloc;
1766 heap->system_free(heap->user_context, s, s->size);
1767 heap->medium.cached_system_alloc = 0;
1768 heap->medium.cached_system_alloc_size = 0;
1770 STBM__RELEASE(heap->allocation_mutex);
1772 s = (stbm__system_allocation *)heap->system_alloc(heap->user_context, freesize, &freesize);
1776 STBM_ASSERT(freesize >= size && freesize >= size_with_overhead);
1778 freesize = stbm__medium_init_chunk(heap, &block, freesize, s);
1780 STBM__ACQUIRE(heap->allocation_mutex);
1782 stbm__system_alloc_link(heap, s, STBM__SYSTEM_ALLOC_MEDIUM);
1786 expected_size = stbm__size_for_index_slot(is.index, is.slot);
1787 STBM_ASSERT(freesize >= size && freesize >= expected_size);
1789 // now we have a free block 'block' which is not on any lists and whose
1790 // size is 'freesize'
1792 alloc = (stbm__medium_allocated *)block;
1794 // check if we need to split the block -- we only split if the
1795 // extra piece is larger than STBM__SMALLEST_ALLOCATION and
1796 // larger than the minimum alignment
1797 if (freesize >= expected_size + heap->medium.minimum_freeblock_size)
1799 stbm__medium_freeblock *split;
1803 // move past allocation plus header to find location of user block after split
1804 ptr = stbm__align_address((char *)alloc + expected_size + sizeof(stbm__medium_allocated), heap->minimum_alignment, 0);
1806 // move back to find header corresponding to that user pointer
1807 ptr = (((stbm__medium_allocated *)ptr) - 1);
1809 split = (stbm__medium_freeblock *)ptr;
1810 STBM_ASSERT((char *)split + heap->medium.minimum_freeblock_size <= (char *)block + freesize);
1812 tail_size = ((char *)block + freesize) - (char *)split;
1813 freesize = (char *)split - (char *)block;
1814 STBM_ASSERT(freesize >= expected_size && freesize < expected_size + heap->medium.minimum_freeblock_size);
1816 split->prefix = STBM__BLOCK_free | STBM__PREVIOUS_free;
1817 stbm__medium_link(heap, split, tail_size);
1820 // at this point, alloc points to the block, we just need to set up its header
1822 // the previous block cannot be free, or we would have coalesced it
1823 if ((alloc->prefix & STBM__PREVIOUS_MASK) != STBM__PREVIOUS_alloc)
1825 // unless this block is the first block in the chunk
1826 STBM_ASSERT(((stbm__uintptr *)alloc)[-1] >= STBM__MEDIUM_CHUNK_PRE_HEADER);
1829 // check that we can encode the size correctly
1830 STBM_ASSERT(freesize == expected_size || freesize == expected_size + STBM__EXTRA_SIZE);
1832 stbm__stats_update_alloc(heap, freesize - sizeof(*alloc));
1835 alloc->prefix = STBM__TYPE_medium | STBM__BLOCK_alloc | (alloc->prefix & STBM__PREVIOUS_MASK);
1837 alloc->prefix = STBM__SIZE_SETDATA(alloc->prefix, (is.index << 6) + (is.slot << 1) + (freesize == expected_size + STBM__EXTRA_SIZE ? 1 : 0));
1839 // and we need to set the following block to know that its previous block is allocated
1840 ((stbm__medium_allocated *)((char *)block + freesize))->prefix |= STBM__PREVIOUS_alloc;
1842 #if STBM__PREVIOUS_alloc != STBM__PREVIOUS_MASK
1843 #error "the above assignment is wrong"
1846 STBM__CHECK_LOCKED(heap);
1847 STBM__RELEASE(heap->allocation_mutex);
1851 static void stbm__medium_check_list(stbm_heap *heap, int index, int slot)
1853 const char *message = "check";
1856 stbm__medium_freeblock *cur = heap->medium.lists[index][slot];
1857 STBM_ASSERT((cur == NULL) == ((heap->medium.bitmap[index] & (1 << (31 - slot))) == 0));
1860 stbm__index_slot is;
1861 stbm__uintptr prefix = cur->prefix;
1862 // this block must be free
1863 if ((prefix & STBM__BLOCK_MASK) != STBM__BLOCK_free)
1865 // the previous block cannot be free, unless it is the first block
1866 if (((stbm__uintptr *)cur)[-1] < STBM__MEDIUM_CHUNK_PRE_HEADER)
1867 if ((prefix & STBM__PREVIOUS_MASK) == STBM__PREVIOUS_free)
1869 // the sizeclass in the block must match the passed-in sizeclass
1870 if (STBM__MED_FREE_SIZECLASS(prefix) != (unsigned)(index * STBM__NUM_SLOTS + slot))
1872 // the linked list must be linked properly
1873 if (cur->next_free && cur->next_free->prev_free != cur)
1875 // it should have the right stored sizeclass
1876 if ((int)(STBM__MED_FREE_SIZECLASS(prefix) >> 5) != index || (int)(STBM__MED_FREE_SIZECLASS(prefix) & 31) != slot)
1878 // if it's not the largest size
1879 if (index != STBM__NUM_INDICES - 1 || slot != STBM__NUM_SLOTS - 1)
1881 // then the size it is should generate the same index/slot pair
1882 is = stbm__index_slot_for_free_block(STBM__MED_FREE_BYTES(prefix));
1883 if (is.index != index || is.slot != slot)
1886 // the next block should have its prev free set
1887 if (((((stbm__medium_freeblock *)((char *)cur + STBM__MED_FREE_BYTES(prefix)))->prefix) & STBM__PREVIOUS_MASK) != STBM__PREVIOUS_free)
1889 cur = cur->next_free;
1893 static void stbm__medium_check_all(stbm_heap *heap)
1896 for (i = 0; i < STBM__NUM_INDICES; ++i)
1898 if (((heap->medium.toplevel_bitmap & (1 << (31 - i))) == 0) != (heap->medium.bitmap[i] == 0))
1900 for (s = 0; s < STBM__NUM_SLOTS; ++s)
1901 stbm__medium_check_list(heap, i, s);
1905 //////////////////////////////////////////////////////////////////////////////
1907 // allocation request structure
1909 // allocation requests are reformatted into this structure,
1910 // and then each layer only uses the parts of it that are relevant
1919 size_t extra_debug_size;
1921 stbm__uint32 alignment;
1922 stbm__uint32 align_offset;
1923 unsigned short userdata;
1926 //////////////////////////////////////////////////////////////////////////////
1928 // stbm__tc_* -- thread-caching layer
1930 // This layer caches allocations.
1932 // It keeps 32 free lists, each storing one unique allocation size class.
1933 // If we allocate more sizes than that, it'll start discarding cached
1934 // allocations, abandoning the least-recently-allocated-from list.
1936 // The heuristics for dealing with the cache becoming too large come
1937 // from TCMalloc. The doubling-number-of-cahed-allocations attempts to
1938 // keep excess memory usage from being excessive (e.g. if you're allocating
1939 // strings, you may not get duplicates at all) and to keep things more O(1)
1940 // in the worst case where we cycle through 33 different allocation sizes
1941 // (although since there's a fixed number of lists and at most 16 allocs
1942 // per list, technically it's always O(1) anyway).
1945 #define STBM__NUM_TC_INDICES 257 // #256 is 32KB, which matches TCMalloc
1946 #define STBM__MAX_TC_ALLOC_SIZE stbm__size_for_index_slot_calc((STBM__NUM_TC_INDICES >> 5), STBM__NUM_TC_INDICES & 31)
1948 typedef struct stbm__cached_block
1950 struct stbm__cached_block *next;
1951 } stbm__cached_block;
1953 typedef struct stbm__cache
1955 stbm__cached_block *head; // first free block
1956 stbm__uint8 lowwater; // smallest number of free blocks since last scavenge
1957 stbm__uint8 num; // number of free blocks
1958 stbm__uint8 alloc; // number of blocks allocated on last refill
1959 stbm__uint8 padding;
1962 #define STBM__NUM_THREADCACHE 32
1963 #define STBM__NUM_TC_SIZECLASS (STBM__NUM_SLOTS *STBM__NUM_INDICES)
1967 // caches for a subset of sizeclasses
1968 stbm__cache cache[STBM__NUM_THREADCACHE]; // 256 bytes in 32-bit
1970 // further parallel cache[] data, separated for different access patterns
1971 unsigned short last_used[STBM__NUM_THREADCACHE]; // 64 bytes
1972 unsigned short sizeclass[STBM__NUM_THREADCACHE]; // 64 bytes
1975 stbm__uint32 bitmap; // which caches are in use
1976 stbm__uint32 lru_counter; // a counter that updates every time we allocate from a list
1977 stbm__uint32 cached_bytes;
1978 stbm__uint32 cached_bytes_scavenge_threshhold;
1979 signed char slot_for_sizeclass[STBM__NUM_TC_INDICES]; // 257 bytes
1980 }; // ~500/1000 bytes 32/64-bit
1981 typedef int stbm__tc_size_validate[sizeof(stbm_tc) <= STBM_TC_SIZEOF ? 1 : -1];
1983 static void stbm__tc_update_lowwater_mark(stbm__cache *sizecache)
1985 if (sizecache->num < sizecache->lowwater)
1986 sizecache->lowwater = sizecache->num;
1989 static void stbm__tc_free_cached_blocks(stbm_tc *tc, stbm_heap *safe_heap, int tc_slot, int count)
1991 stbm__cached_block *cur;
1992 stbm__cache *sizecache = &tc->cache[tc_slot];
1995 STBM_ASSERT(count >= 0);
1997 // @OPTIMIZE: free a whole list in one operation (reduces locking in heap)
1998 cur = sizecache->head;
1999 for (i = 0; i < count; ++i)
2001 stbm__cached_block *next = cur->next;
2002 stbm__heap_free(safe_heap, tc->heap, cur);
2005 sizecache->head = cur;
2007 sizecache->num -= (stbm__uint8)count;
2008 stbm__tc_update_lowwater_mark(sizecache);
2010 tc->cached_bytes -= (stbm__uint32)(count * stbm__size_for_tc_index(tc->sizeclass[tc_slot]));
2013 // Because we only keep 32 free lists for 257 size classes,
2014 // we need to be able to discard an existing free list to
2015 // use for a different size class. To do that, we keep
2016 // track of how recently used each list is.
2017 static void stbm__tc_lru_tick(stbm_tc *tc)
2019 if (++tc->lru_counter >= 0x10000)
2022 // every 32K allocations, we'll rewrite all 32 last_used values--this way
2023 // we avoid having to update LRU linked lists on every alloc
2024 for (i = 0; i < STBM__NUM_THREADCACHE; ++i)
2025 // remap last_used values from 0..ffff to 0..7fff
2026 tc->last_used[i] >>= 1;
2027 tc->lru_counter = 0x8000; // now lru counter is larger than any existing value
2031 static int stbm__tc_free_lru_slot(stbm_tc *tc, stbm_heap *safe_heap)
2034 // find the LRU slot
2035 int lru = 0x10000, lru_slot = 0, i;
2036 for (i = 0; i < STBM__NUM_THREADCACHE; ++i)
2038 if (tc->last_used[i] < lru)
2040 lru = tc->last_used[i];
2045 // free the cached blocks in the slot
2046 if (tc->cache[lru_slot].num)
2047 stbm__tc_free_cached_blocks(tc, safe_heap, lru_slot, tc->cache[lru_slot].num);
2049 sizeclass = tc->sizeclass[lru_slot];
2050 STBM_ASSERT(tc->slot_for_sizeclass[sizeclass] == lru_slot);
2051 tc->slot_for_sizeclass[sizeclass] = -1;
2056 static int stbm__tc_allocate_slot(stbm_tc *tc, int tc_index, stbm_heap *safe_heap)
2058 // once we're in the steady state, all bitmap bits are set, so make that the fast path
2059 int tc_slot = (~tc->bitmap ? stbm__find_any_zero(tc->bitmap) : -1);
2060 if (tc_slot < 0 || tc_slot >= STBM__NUM_THREADCACHE)
2061 tc_slot = stbm__tc_free_lru_slot(tc, safe_heap);
2063 tc->bitmap |= 1 << tc_slot;
2064 STBM_ASSERT(tc->bitmap & (1 << tc_slot));
2065 STBM_ASSERT(tc->cache[tc_slot].num == 0);
2066 tc->cache[tc_slot].head = 0;
2067 tc->cache[tc_slot].num = 0;
2068 tc->cache[tc_slot].alloc = 0;
2069 tc->cache[tc_slot].lowwater = 0;
2070 tc->slot_for_sizeclass[tc_index] = (signed char)tc_slot;
2074 static void stbm__tc_alloc_cached_blocks(stbm_tc *tc, int tc_slot, int sizeclass)
2076 size_t size = stbm__size_for_tc_index(sizeclass);
2077 stbm__cached_block *head;
2078 stbm__uint8 num_alloc_uc;
2080 if (!tc->cache[tc_slot].alloc)
2081 // if we've never allocated any of this size before, allocate 2
2085 // allocate twice as many as before, up to a max of 16 or 64 KB
2086 num_alloc = 2 * tc->cache[tc_slot].alloc;
2087 if (num_alloc >= 16 || size * num_alloc >= 65536)
2088 num_alloc = tc->cache[tc_slot].alloc;
2091 // @OPTIMIZE: provide a function to allocate multiple so we don't
2092 // have to take the lock multiple times (and possibly can even
2093 // allocate one big block and subdivide it)
2094 head = tc->cache[tc_slot].head;
2095 for (i = 0; i < num_alloc; ++i)
2097 stbm__cached_block *p = (stbm__cached_block *)stbm__heap_alloc(tc->heap, size);
2106 tc->cache[tc_slot].head = head;
2108 num_alloc_uc = (stbm__uint8)num_alloc;
2109 tc->cache[tc_slot].num += num_alloc_uc;
2110 tc->cache[tc_slot].alloc = num_alloc_uc;
2111 tc->cache[tc_slot].lowwater = num_alloc_uc;
2114 static void *stbm__tc_alloc(stbm__request *request)
2116 stbm_tc *tc = request->tc;
2117 if (tc && tc->heap == request->heap && request->size <= STBM__MAX_TC_ALLOC_SIZE)
2119 stbm__cached_block *p;
2120 stbm__cache *sizecache;
2122 int tc_index = stbm__tc_index_for_request(request->size);
2123 int tc_slot = tc->slot_for_sizeclass[tc_index];
2126 tc_slot = stbm__tc_allocate_slot(tc, tc_index, tc->heap); // can't fail
2128 // update our last_used now so tc_slot is most-recently-used
2129 tc->last_used[tc_slot] = (unsigned short)tc->lru_counter;
2131 sizecache = &tc->cache[tc_slot];
2132 if (sizecache->num == 0)
2134 stbm__tc_alloc_cached_blocks(tc, tc_slot, tc_index);
2135 if (sizecache->num == 0)
2136 return 0; // if allocation fails, we should bail
2140 // update how much data we have; tc_index is dead past here
2141 tc->cached_bytes -= stbm__size_for_tc_index(tc_index);
2143 // update our lru timer; tc is dead past here
2144 stbm__tc_lru_tick(tc);
2146 // do accounting on free list
2148 stbm__tc_update_lowwater_mark(sizecache);
2150 // take head of free list
2151 p = sizecache->head;
2152 sizecache->head = p->next;
2156 return stbm__heap_alloc(request->heap, request->size);
2159 // TCmalloc-style scavenging logic -- free half the
2160 // unused objects in each list
2161 static void stbm__tc_scavenge(stbm_tc *tc, stbm_heap *safe_heap)
2164 for (i = 0; i < STBM__NUM_THREADCACHE; ++i)
2166 if (tc->sizeclass[i] && tc->cache[i].num)
2168 stbm__uint32 count = (tc->cache[i].lowwater + 1) >> 1;
2171 if (count > tc->cache[i].num)
2172 count = tc->cache[i].num;
2173 stbm__tc_free_cached_blocks(tc, safe_heap, i, count);
2179 static void stbm__tc_free(stbm_tc *tc, stbm_heap *safe_heap, void *ptr)
2181 // @OPTIMIZE: merge these two computations, which branch on the same cases
2182 stbm_heap *src_heap = stbm_get_heap(ptr);
2183 size_t size = stbm_get_allocation_size(ptr);
2185 if (tc && tc->heap == src_heap && size <= STBM__MAX_TC_ALLOC_SIZE)
2187 stbm__cache *sizecache;
2188 stbm__cached_block *p = (stbm__cached_block *)ptr;
2189 int tc_index = stbm__tc_index_for_free_block(size);
2190 int tc_slot = tc->slot_for_sizeclass[tc_index];
2192 tc_slot = stbm__tc_allocate_slot(tc, tc_index, safe_heap);
2193 sizecache = &tc->cache[tc_slot];
2194 p->next = sizecache->head;
2195 sizecache->head = p;
2197 tc->cached_bytes += stbm__size_for_tc_index(tc_index);
2198 if (tc->cached_bytes >= tc->cached_bytes_scavenge_threshhold)
2199 stbm__tc_scavenge(tc, safe_heap);
2202 stbm__heap_free(safe_heap, src_heap, ptr);
2205 //////////////////////////////////////////////////////////////////////////////
2209 // Allocates and frees debug & aligned blocks, and manages debug modes.
2210 // Also takes responsibility for setting the 16-bit userdata field.
2213 #ifdef STBM_DEBUGCHECK
2214 #define stbm__heap_check_internal(x) stbm_heap_check(x)
2216 #define stbm__heap_check_internal(x)
2219 static void *stbm__alloc_normal(stbm__request *request)
2222 size_t size = request->size;
2223 if (size <= STBM__SMALLEST_ALLOCATION)
2225 if (!request->userdata)
2226 return stbm__tc_alloc(request);
2227 request->size = STBM__SMALLEST_ALLOCATION + 1;
2229 ptr = stbm__tc_alloc(request);
2232 stbm__uintptr *prefix = (stbm__uintptr *)ptr - 1;
2233 STBM_ASSERT(STBM__TYPE(*prefix) != STBM__TYPE_small);
2234 *prefix = STBM__USERDATA_SETBITS(*prefix, request->userdata);
2235 stbm__heap_check_internal(request->heap);
2240 // allocate a block with non-default alignment
2241 static void *stbm__alloc_aligned_internal(stbm__request *request, size_t header_bytes, size_t footer_bytes, void **base_ptr)
2245 size_t align, align_off;
2248 // canonicalize the requested alignment
2249 if (request->alignment == 0)
2251 align = request->heap->minimum_alignment;
2256 align = request->alignment;
2257 align_off = request->align_offset;
2260 // suppose the underlying alloc is aligned to 'align', then compute
2261 // the number of bytes we need to guarantee userdata is correctly aligned
2262 extra_bytes = stbm__round_up_to_multiple_of_power_of_two(header_bytes - align_off, align) + align_off;
2264 // now, provide enough bytes to allow for the underlying alloc not being
2265 // sufficiently aligned
2266 if (align > request->heap->minimum_alignment)
2268 // e.g. if they request 32 and default is 8, we might need 24 bytes to force alignment
2269 extra_bytes += align - request->heap->minimum_alignment;
2272 // now, work out the full allocation size
2273 request->size = extra_bytes + request->size + footer_bytes;
2274 base = stbm__tc_alloc(request);
2280 // set the 'wrapped' flag so the heap walk can detect wrapped objects
2281 ((stbm__uintptr *)base)[-1] |= STBM__WRAPPED_true;
2283 return stbm__align_address((char *)base + header_bytes, align, align_off);
2286 static void *stbm__alloc_align(stbm__request *request)
2289 size_t old_size = request->size;
2290 void *user_ptr = stbm__alloc_aligned_internal(request, sizeof(stbm__aligned_allocated) + sizeof(stbm__aligned_base), 0, &base);
2291 stbm__aligned_allocated *aa = ((stbm__aligned_allocated *)user_ptr) - 1;
2292 stbm__aligned_base *ab = ((stbm__aligned_base *)base);
2293 (void)(sizeof(old_size)); // avoid warning
2295 if (user_ptr == NULL)
2298 aa->underlying_allocation = base;
2299 aa->prefix = STBM__USERDATA_SETBITS(STBM__TYPE_aligned, request->userdata);
2300 ab->wrapped_user_pointer = user_ptr;
2302 // verify that both blocks of memory (aa & user) fit inside allocated block
2303 STBM_ASSERT((char *)user_ptr + old_size <= (char *)base + request->size);
2304 STBM_ASSERT((char *)aa >= (char *)base);
2306 stbm__heap_check_internal(request->heap);
2311 static void stbm__alloc_check_guard(stbm__debug_allocated *da, const char *message)
2316 if (STBM__DEBUG(da->prefix) != STBM__DEBUG_2)
2319 num_bytes = da->underlying_allocation.debug2->num_guard_bytes;
2323 if (num_bytes & 0xf00f)
2325 STBM_FAIL("Invalid guard byte count in stbm_free");
2329 unsigned char c = da->underlying_allocation.debug2->guard_value;
2330 unsigned char *guard_area = (unsigned char *)(da + 1) + da->user_size_in_bytes;
2333 for (i = 0; i < num_bytes; ++i)
2334 if (guard_area[i] != c)
2341 static void stbm__alloc_link_debug_allocation(stbm__request *request, stbm__debug_allocated *da)
2343 stbm__debug_base1 *db = da->underlying_allocation.debug1;
2344 db->wrapped_user_pointer = da + 1;
2345 da->underlying_allocation.debug1->file = request->file;
2346 da->underlying_allocation.debug1->line = request->line;
2349 static void *stbm__alloc_debug1(stbm__request *request)
2352 size_t user_size = request->size;
2353 size_t header_bytes = sizeof(stbm__debug_base1) + sizeof(stbm__debug_allocated);
2354 void *user_ptr = stbm__alloc_aligned_internal(request, header_bytes, 0, &base);
2355 stbm__debug_allocated *da = ((stbm__debug_allocated *)user_ptr) - 1;
2357 if (user_ptr == NULL)
2360 // verify that both blocks of memory (aa & user) fit inside allocated block
2361 STBM_ASSERT((char *)user_ptr + user_size <= (char *)base + request->size);
2362 STBM_ASSERT((char *)da >= (char *)base);
2364 da->underlying_allocation.debug1 = ((stbm__debug_base1 *)base);
2365 da->user_size_in_bytes = user_size;
2366 da->prefix = STBM__USERDATA_SETBITS(STBM__TYPE_debug | STBM__DEBUG_1, request->userdata);
2367 da->prefix = STBM__DEBUG_SETMAGIC(da->prefix);
2369 stbm__alloc_link_debug_allocation(request, da);
2370 stbm__heap_check_internal(request->heap);
2374 static void *stbm__alloc_debug2(stbm__request *request)
2377 size_t excess, user_size = request->size;
2378 size_t extra_debug = stbm__round_up_to_multiple_of_power_of_two(request->extra_debug_size, sizeof(void *));
2379 int header_bytes = (int)(sizeof(stbm__debug_base2) + sizeof(stbm__debug_allocated) + extra_debug);
2380 void *user_ptr = stbm__alloc_aligned_internal(request, header_bytes,
2381 request->heap->num_guard_bytes, &base);
2382 stbm__debug_base2 *db2 = ((stbm__debug_base2 *)base);
2383 stbm__debug_allocated *da = ((stbm__debug_allocated *)user_ptr) - 1;
2385 if (user_ptr == NULL)
2388 // verify that all blocks of memory (aa & user) fit inside allocated block and don't overlap
2389 STBM_ASSERT((char *)user_ptr + user_size + request->heap->num_guard_bytes <= (char *)base + request->size);
2390 STBM_ASSERT((char *)(db2 + 1) + request->extra_debug_size <= (char *)da);
2392 da->underlying_allocation.debug2 = db2;
2393 da->user_size_in_bytes = user_size;
2394 da->prefix = STBM__USERDATA_SETBITS(STBM__TYPE_debug | STBM__DEBUG_2, request->userdata);
2395 da->prefix = STBM__DEBUG_SETMAGIC(da->prefix);
2397 db2->extra_debug_bytes = (stbm__uint32)request->extra_debug_size;
2398 db2->padding = 0x57;
2399 excess = stbm_get_allocation_size(user_ptr) - user_size;
2401 STBM_ASSERT(excess >= request->heap->num_guard_bytes);
2404 // we only room for a 16-bit length, so we limit it to 255 guard bytes, and we encode that specially
2405 int num_guard_bytes = (excess > 255) ? 255 : (int)excess;
2406 db2->num_guard_bytes = (unsigned short)(num_guard_bytes << 4);
2407 db2->guard_value = request->heap->guard_value;
2408 STBM_MEMSET((char *)user_ptr + user_size, db2->guard_value, (size_t)num_guard_bytes);
2411 db2->num_guard_bytes = 0;
2413 stbm__alloc_link_debug_allocation(request, da);
2414 stbm__heap_check_internal(request->heap);
2418 static void *stbm__alloc_debug(stbm__request *request)
2420 if (request->heap->num_guard_bytes)
2422 request->extra_debug_size = 0;
2423 return stbm__alloc_debug2(request);
2426 return stbm__alloc_debug1(request);
2429 static void stbm__alloc_free_aligned(stbm_tc *tc, stbm_heap *safe_heap, void *ptr)
2431 stbm__aligned_allocated *aa = ((stbm__aligned_allocated *)ptr) - 1;
2432 stbm__tc_free(tc, safe_heap, aa->underlying_allocation);
2435 static void stbm__alloc_free_debug(stbm_tc *tc, stbm_heap *safe_heap, void *ptr)
2437 stbm__debug_allocated *da = ((stbm__debug_allocated *)ptr) - 1;
2438 if (!STBM__DEBUG_TESTMAGIC(da->prefix))
2439 STBM_FAIL("Invalid block header in stbm_free");
2440 stbm__alloc_check_guard(da, "Invalid trailing guard bytes in stbm_free");
2441 stbm__tc_free(tc, safe_heap, da->underlying_allocation.debug1);
2444 //////////////////////////////////////////////////////////////////////////////
2449 STBM__API void *stbm_alloc(stbm_tc *tc, stbm_heap *heap, size_t size, unsigned short userdata16)
2455 r.userdata = userdata16;
2457 if (heap->force_debug)
2462 return stbm__alloc_debug(&r);
2465 // force small blocks to be larger if they need larger alignment
2466 if (size <= STBM__SMALLEST_ALLOCATION)
2467 if (heap->align_all_blocks_to_minimum)
2468 if (heap->minimum_alignment > STBM__SMALL_ALIGNMENT)
2469 r.size = STBM__SMALLEST_ALLOCATION + 1;
2471 return stbm__alloc_normal(&r);
2474 STBM__API void *stbm_alloc_fileline(stbm_tc *tc, stbm_heap *heap, size_t size, unsigned short userdata16, char *file, int line)
2480 r.userdata = userdata16;
2482 if (heap->force_file_line || heap->force_debug)
2487 return stbm__alloc_debug(&r);
2490 // force small blocks to be larger if they need larger alignment
2491 if (size <= STBM__SMALLEST_ALLOCATION)
2492 if (heap->align_all_blocks_to_minimum)
2493 if (heap->minimum_alignment > STBM__SMALL_ALIGNMENT)
2494 r.size = STBM__SMALLEST_ALLOCATION + 1;
2496 return stbm__alloc_normal(&r);
2499 STBM__API void *stbm_alloc_align(stbm_tc *tc, stbm_heap *heap, size_t size, unsigned short userdata16, size_t alignment, size_t align_offset)
2505 r.userdata = userdata16;
2506 r.alignment = (stbm__uint32)alignment;
2507 r.align_offset = (stbm__uint32)align_offset;
2509 if (heap->force_debug)
2513 return stbm__alloc_debug(&r);
2516 // check if it's non-aligned
2517 if (align_offset == 0 && alignment <= heap->minimum_alignment)
2518 if (size > STBM__SMALLEST_ALLOCATION || !heap->align_all_blocks_to_minimum)
2519 return stbm__alloc_normal(&r);
2521 return stbm__alloc_align(&r);
2524 STBM__API void *stbm_alloc_align_fileline(stbm_tc *tc, stbm_heap *heap, size_t size, unsigned short userdata16, size_t alignment, size_t align_offset, char *file, int line)
2530 r.userdata = userdata16;
2531 r.alignment = (stbm__uint32)alignment;
2532 r.align_offset = (stbm__uint32)align_offset;
2534 if (heap->force_file_line || heap->force_debug)
2538 return stbm__alloc_debug(&r);
2541 // check if it's non-aligned
2542 if (align_offset == 0 && alignment <= heap->minimum_alignment)
2543 if (size > STBM__SMALLEST_ALLOCATION || !heap->align_all_blocks_to_minimum)
2544 return stbm__alloc_normal(&r);
2546 return stbm__alloc_align(&r);
2549 STBM__API void *stbm_alloc_debug(stbm_tc *tc, stbm_heap *heap, size_t size, unsigned short userdata16, size_t alignment, size_t align_offset, char *file, int line, size_t extra_debug_size)
2559 r.userdata = userdata16;
2560 r.alignment = (stbm__uint32)alignment;
2561 r.align_offset = (stbm__uint32)align_offset;
2562 r.extra_debug_size = extra_debug_size;
2563 if (extra_debug_size || heap->num_guard_bytes)
2564 return stbm__alloc_debug2(&r);
2566 return stbm__alloc_debug1(&r);
2569 STBM__API void stbm_free(stbm_tc *tc, stbm_heap *safe_heap, void *ptr)
2573 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2575 if (!STBM__TYPE_align_debug(prefix))
2576 stbm__tc_free(tc, safe_heap, ptr);
2577 else if (STBM__TYPE(prefix) == STBM__TYPE_aligned)
2578 stbm__alloc_free_aligned(tc, safe_heap, ptr);
2579 else if (STBM__TYPE(prefix) == STBM__TYPE_debug)
2580 stbm__alloc_free_debug(tc, safe_heap, ptr);
2582 STBM_FAIL("Invalid block header in stbm_free");
2587 stbm__heap_check_internal(safe_heap);
2591 STBM__API void *stbm_realloc(stbm_tc *tc, stbm_heap *heap, void *oldptr, size_t newsize, unsigned short userdata16)
2595 return stbm_alloc(tc, heap, newsize, userdata16);
2597 else if (newsize == 0)
2599 stbm_free(tc, heap, oldptr);
2606 size_t oldsize = stbm_get_allocation_size(oldptr);
2607 if (newsize <= oldsize && oldsize <= newsize * 2)
2610 newptr = stbm_alloc(tc, heap, newsize, userdata16);
2614 copysize = STBM__MIN(oldsize, newsize);
2617 STBM_MEMCPY(newptr, oldptr, copysize);
2621 for (i = 0; i < copysize; ++i)
2622 ((char *)newptr)[i] = ((char *)oldptr)[i];
2626 stbm_free(tc, heap, oldptr);
2631 unsigned short stbm_get_userdata(void *ptr)
2633 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2635 if (STBM__TYPE(prefix) == STBM__TYPE_small)
2638 return STBM__USERDATA_BITS(prefix);
2641 STBM__API size_t stbm_get_allocation_size(void *ptr)
2643 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2645 if (STBM__TYPE(prefix) == STBM__TYPE_medium)
2647 // this case is slightly complicated by the fact that a
2648 // TLSF allocation might return slightly more data,
2649 // and the extra isn't enough to make it to the next
2650 // sizeclass, and also isn't enough to split off a
2651 // separate block--leaving us to have to explicitly store
2652 // the extra. most allocators explicitly store the actual
2653 // size in the header so this isn't a problem, but we
2654 // want to leave room for userdata, so we only store
2655 // the sizeclass. (with a max of 1MB, 8-byte aligned,
2656 // we would need 17 bits to store it exactly.)
2657 static size_t extra[2] = { 0, STBM__EXTRA_SIZE };
2658 stbm__uintptr size = STBM__SIZE_DATA(prefix);
2659 return stbm__size_for_index_slot(STBM__SIZEDATA_index(size),
2660 STBM__SIZEDATA_slot(size)) +
2661 extra[STBM__SIZEDATA_extra(size)] - sizeof(stbm__medium_allocated);
2664 if (STBM__TYPE(prefix) == STBM__TYPE_small)
2665 return STBM__SMALLEST_BLOCKSIZE - sizeof(stbm__small_allocated);
2667 if (STBM__TYPE(prefix) == STBM__TYPE_large)
2668 return ((stbm__large_allocated *)ptr)[-1].user_size_in_bytes;
2670 if (STBM__TYPE(prefix) == STBM__TYPE_debug)
2671 return ((stbm__debug_allocated *)ptr)[-1].user_size_in_bytes;
2673 if (STBM__TYPE(prefix) == STBM__TYPE_aligned)
2675 // we have to compute this because we didn't store it explicitly
2678 // find the size of the underlying allocation:
2679 void *base = ((stbm__aligned_allocated *)ptr)[-1].underlying_allocation;
2680 size_t size = stbm_get_allocation_size(base);
2682 // the end of the underlying allocation is also the end of this allocation,
2683 // so the length of this allocation is computed as end - begin:
2684 return ((char *)base + size) - (char *)ptr;
2687 STBM_ASSERT(!STBM__PREFIX_VALID(prefix));
2688 STBM_FAIL("Corrupt malloc header in stbm_get_allocation_size");
2692 stbm_heap *stbm_get_heap(void *ptr)
2694 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2696 // the heap pointer is stored in the same spot for med&large, and we can test their type in one test:
2697 STBM_ASSERT(&((stbm__medium_allocated *)ptr)[-1].heap == &((stbm__large_allocated *)ptr)[-1].heap);
2698 if (STBM__TYPE_med_large(prefix))
2699 return ((stbm__medium_allocated *)ptr)[-1].heap;
2701 if (STBM__TYPE(prefix) == STBM__TYPE_small)
2702 return ((stbm__small_allocated *)ptr)[-1].u.chunk->heap;
2704 if (STBM__TYPE(prefix) == STBM__TYPE_aligned)
2705 return stbm_get_heap(((stbm__aligned_allocated *)ptr)[-1].underlying_allocation);
2707 if (STBM__TYPE(prefix) == STBM__TYPE_debug)
2708 return stbm_get_heap(((stbm__debug_allocated *)ptr)[-1].underlying_allocation.debug1);
2710 STBM_ASSERT(!STBM__PREFIX_VALID(prefix));
2711 STBM_FAIL("Corrupt malloc header in stbm_get_heap");
2715 STBM__API size_t stbm_get_debug_data(void *ptr, void **data)
2719 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2720 if (STBM__TYPE(prefix) == STBM__TYPE_debug)
2722 if (STBM__DEBUG(prefix) == STBM__DEBUG_2)
2725 stbm__debug_allocated *da = ((stbm__debug_allocated *)ptr) - 1;
2726 stbm__debug_base2 *db2 = da->underlying_allocation.debug2;
2727 n = (int)db2->extra_debug_bytes;
2728 *data = n ? (db2 + 1) : 0;
2737 STBM__API int stbm_get_fileline(void *ptr, char **file)
2741 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2742 if (STBM__TYPE(prefix) == STBM__TYPE_debug)
2744 stbm__debug_allocated *da = ((stbm__debug_allocated *)ptr) - 1;
2745 stbm__debug_base1 *db1 = da->underlying_allocation.debug1;
2747 return (int)db1->line;
2754 STBM__API stbm_tc *stbm_tc_init(void *storage, size_t storage_bytes, stbm_heap *heap)
2759 if (storage_bytes < sizeof(stbm_tc))
2762 tc = (stbm_tc *)storage;
2765 tc->lru_counter = 0;
2766 tc->cached_bytes = 0;
2767 tc->cached_bytes_scavenge_threshhold = 1024 * 1024; // 1MB
2768 for (i = 0; i < STBM__NUM_TC_INDICES; ++i)
2769 tc->slot_for_sizeclass[i] = -1;
2775 STBM__API void stbm_heap_free(stbm_heap *heap)
2777 stbm__system_allocation *cur = heap->system_sentinel.next;
2778 while (cur != &heap->system_sentinel)
2780 stbm__system_allocation *next = cur->next;
2781 STBM_ASSERT(cur->magic == STBM__SYSTEM_ALLOC_LARGE || cur->magic == STBM__SYSTEM_ALLOC_MEDIUM);
2782 heap->system_free(heap->user_context, cur, cur->size);
2785 if (heap->medium.cached_system_alloc)
2786 heap->system_free(heap->user_context, heap->medium.cached_system_alloc, heap->medium.cached_system_alloc->size);
2789 static void stbm__heap_check_locked(stbm_heap *heap)
2791 stbm__medium_check_all(heap);
2794 STBM__API void stbm_heap_check(stbm_heap *heap)
2796 STBM__ACQUIRE(heap->allocation_mutex);
2797 stbm__heap_check_locked(heap);
2798 STBM__RELEASE(heap->allocation_mutex);
2799 stbm_debug_iterate(heap, 0, 0);
2802 STBM__API void *stbm_get_heap_user_context(stbm_heap *heap)
2804 return heap->user_context;
2807 STBM__API stbm_heap *stbm_heap_init(void *storage, size_t storage_bytes, stbm_heap_config *hc)
2809 stbm_heap *heap = (stbm_heap *)storage;
2810 if (sizeof(*heap) > storage_bytes)
2814 #ifdef STBM_ATOMIC_COMPARE_AND_SWAP32
2815 STBM_MEMSET(heap->crossthread_free_stack, 0, sizeof(heap->crossthread_free_stack));
2816 heap->crossthread_free_pointer_stack_pos = 0;
2819 heap->crossthread_mutex = hc->crossthread_free_mutex;
2820 heap->crossthread_free_list = NULL;
2823 heap->allocation_mutex = hc->allocation_mutex;
2825 heap->num_guard_bytes = 0;
2826 heap->guard_value = 0;
2827 heap->force_file_line = 0;
2828 heap->force_debug = 0;
2829 if (hc->minimum_alignment > STBM__SMALL_ALIGNMENT)
2831 STBM_ASSERT(stbm__is_pow2(hc->minimum_alignment));
2832 heap->minimum_alignment = (stbm__uint32)hc->minimum_alignment;
2836 heap->minimum_alignment = STBM__SMALL_ALIGNMENT;
2838 heap->align_all_blocks_to_minimum = (stbm__uint8)hc->align_all_blocks_to_minimum;
2839 heap->full_stats = 0;
2840 heap->large_alloc_threshhold = 262144;
2841 heap->smallchunk_size = 4040;
2842 heap->cur_medium_chunk_size = 256 * (1 << 10);
2843 heap->max_medium_chunk_size = 2048 * (1 << 10);
2844 heap->cache_medium_chunks = STBM_MEDIUM_CACHE_none;
2846 heap->system_alloc = hc->system_alloc;
2847 heap->system_free = hc->system_free;
2848 heap->user_context = hc->user_context;
2850 stbm__medium_init(heap); // minimum_alignment must already be set when we call this
2851 stbm__small_init(heap);
2852 stbm__large_init(heap);
2853 stbm__system_init(heap);
2854 stbm__stats_init(heap);
2856 // @TODO create a medium block out of the left-over storage
2861 STBM__API void stbm_heapconfig_set_trailing_guard_bytes(stbm_heap *heap, size_t num_bytes, unsigned char value)
2863 heap->num_guard_bytes = (stbm__uint32)num_bytes;
2864 heap->guard_value = value;
2865 heap->force_debug = 1;
2868 STBM__API void stbm_heapconfig_capture_file_line(stbm_heap *heap, int enable_capture)
2870 heap->force_file_line = enable_capture ? 1 : 0;
2873 STBM__API void stbm_heapconfig_gather_full_stats(stbm_heap *heap)
2875 heap->full_stats = 1;
2878 STBM__API void stbm_heapconfig_set_largealloc_threshhold(stbm_heap *heap, size_t threshhold)
2880 heap->large_alloc_threshhold = (stbm__uint32)STBM__MIN(threshhold, STBM__SIZECLASS_MAX);
2883 STBM__API void stbm_heapconfig_set_medium_chunk_size(stbm_heap *heap, size_t cur_size, size_t max_size)
2885 heap->cur_medium_chunk_size = cur_size;
2886 heap->max_medium_chunk_size = STBM__MIN(max_size, 2 << 20);
2889 STBM__API void stbm_heapconfig_set_small_chunk_size(stbm_heap *heap, size_t size)
2891 heap->smallchunk_size = size;
2894 STBM__API void stbm_heapconfig_cache_medium_chunks(stbm_heap *heap, int do_cache)
2896 heap->cache_medium_chunks = (stbm__uint8)do_cache;
2899 // perform a callback on a pointer which may be a wrapped around an aligned/debug alloc
2900 static void stbm__process_user_pointer(void *ptr, stbm_debug_iterate_func *callback, void *user_context, int maybe_wrapped)
2902 stbm__uintptr prefix = ((stbm__uintptr *)ptr)[-1];
2903 stbm_alloc_info info;
2905 if (maybe_wrapped && STBM__IS_WRAPPED(prefix))
2907 stbm__aligned_base *ab = (stbm__aligned_base *)ptr;
2908 if (ab->wrapped_user_pointer == NULL)
2909 STBM_FAIL("Corrupt block in iterate");
2910 ptr = ab->wrapped_user_pointer;
2915 info.size = stbm_get_allocation_size(ptr);
2916 info.userdata = stbm_get_userdata(ptr);
2917 info.is_aligned = (STBM__TYPE(((stbm__uintptr *)ptr)[-1]) == STBM__TYPE_aligned);
2918 info.line = stbm_get_fileline(ptr, &info.file);
2919 info.extra_data_bytes = (int)stbm_get_debug_data(ptr, &info.extra_data);
2922 callback(user_context, &info);
2925 STBM__API void stbm_debug_iterate(stbm_heap *heap, stbm_debug_iterate_func *callback, void *user_context)
2927 stbm__system_allocation *sa;
2928 STBM__ACQUIRE(heap->allocation_mutex);
2929 sa = heap->system_sentinel.next;
2930 while (sa != &heap->system_sentinel)
2932 // every system allocation is either one large allocation, or one or more
2934 if (sa->magic == STBM__SYSTEM_ALLOC_LARGE)
2936 // process a potentially wrapped allocation
2937 stbm__process_user_pointer(sa->begin_pointer, callback, user_context, 1);
2939 else if (sa->magic != STBM__SYSTEM_ALLOC_MEDIUM)
2941 STBM_FAIL("Corrupt system allocation header");
2945 // medium-block system allocation; walk through all the blocks
2946 void *user = sa->begin_pointer;
2947 while ((char *)user < (char *)sa->end_pointer)
2949 stbm__uintptr prefix = ((stbm__uintptr *)user)[-1];
2950 if ((prefix & STBM__BLOCK_MASK) == STBM__BLOCK_free)
2952 // if the block is free, skip it
2953 user = (char *)user + STBM__MED_FREE_BYTES(prefix);
2957 // if the block is allocated, compute where the next block is
2958 void *next = (char *)user + stbm_get_allocation_size(user) + sizeof(stbm__medium_allocated);
2959 // if it's wrapped, it might be a block used for small allocations,
2960 // so check for the case where it's NOT
2961 if (!STBM__IS_WRAPPED(prefix) || ((stbm__aligned_base *)user)->wrapped_user_pointer != NULL)
2962 stbm__process_user_pointer(user, callback, user_context, 1);
2965 // it's a chunk used for small allocations, so walk all the allocations
2966 stbm__small_chunk *sc = (stbm__small_chunk *)user;
2967 stbm__small_freeblock *pos = (stbm__small_freeblock *)sc->unused_storage + 1;
2968 stbm__uint32 count = 0;
2969 while (count < sc->num_alloc)
2971 if ((char *)pos >= (char *)next)
2972 STBM_FAIL("Small chunk invalid");
2973 else if (pos->prefix == (stbm__uintptr)sc)
2975 // the "wrapped" field of small blocks is invalid, so make sure we don't check it
2976 stbm__process_user_pointer(pos->next_free, callback, user_context, 0);
2979 else if (pos->prefix != STBM__SMALL_FREE_BLOCK_MAGIC)
2981 STBM_FAIL("Small block invalid");
2989 if (user != sa->end_pointer)
2990 STBM_FAIL("Heap walk lengths didn't validate");
2994 STBM__RELEASE(heap->allocation_mutex);
2999 #ifdef STBM_UNITTEST
3003 #include <sys/types.h>
3004 #include <sys/mman.h>
3011 static int stbm__num_sys_alloc, stbm__num_sys_free;
3012 static int page_size;
3014 typedef std::map<void *, size_t> alloc_size_hash_t;
3015 static alloc_size_hash_t alloc_size_hash;
3017 extern "C" void *vogl_realloc(const char *pFile_line, void *p, size_t new_size);
3019 static void *stbm__mysystem_alloc(void *context, size_t size, size_t *outsize)
3022 page_size = sysconf(_SC_PAGE_SIZE);
3024 size = (size + page_size - 1) & (~(page_size - 1));
3026 void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
3034 alloc_size_hash.insert(std::make_pair(p, size));
3037 ++stbm__num_sys_alloc;
3042 static void stbm__mysystem_free(void *context, void *ptr, size_t size)
3046 alloc_size_hash_t::iterator it(alloc_size_hash.find(ptr));
3047 if (it == alloc_size_hash.end())
3049 STBM_FAIL("stbm__mysystem_free: ptr is invalid!\n");
3051 if (it->second != size)
3053 STBM_FAIL("stbm__mysystem_free: size is invalid!\n");
3055 alloc_size_hash.erase(it);
3059 STBM_FAIL("stbm__mysystem_free: size is 0!\n");
3062 int res = munmap(ptr, size);
3065 STBM_FAIL("munmap() failed!\n");
3069 ++stbm__num_sys_free;
3072 static void stbm__test_heap(int alignment, int count)
3075 int num_outstanding;
3077 stbm_heap_config hc; // = { 0 };
3079 static void *allocations[20000];
3080 static size_t sizes[20000];
3083 STBM_MEMSET(&hc, 0, sizeof(hc));
3085 hc.system_alloc = stbm__mysystem_alloc;
3086 hc.system_free = stbm__mysystem_free;
3087 hc.user_context = NULL;
3088 hc.minimum_alignment = alignment;
3090 printf("Heap align=%d , count=%d\n", alignment, count);
3092 heap = stbm_heap_init(storage, sizeof(storage), &hc);
3094 // test medium (and small)
3096 for (i = 0; i < 5000; ++i)
3099 allocations[i] = stbm_alloc(0, heap, size, 0);
3100 if (allocations[i] == 0)
3102 STBM_ASSERT(stbm_get_heap(allocations[i]) == heap);
3103 r = stbm_get_allocation_size(allocations[i]);
3104 STBM_ASSERT(r >= size);
3106 size = size + 1 + (size >> 4);
3107 if (size > (96 << 10))
3109 for (k = 0; k <= i; ++k)
3110 STBM_ASSERT(stbm_get_allocation_size(allocations[k]) >= sizes[k]);
3112 stbm_debug_iterate(heap, 0, 0);
3113 for (i = 0; i < 5000; ++i)
3114 stbm_free(0, heap, allocations[i]);
3116 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3120 for (i = 0; i < 5000; ++i)
3125 allocations[i] = stbm_alloc(0, heap, size, 0);
3126 if (allocations[i] == 0)
3128 STBM_ASSERT(stbm_get_heap(allocations[i]) == heap);
3129 r = stbm_get_allocation_size(allocations[i]);
3130 STBM_ASSERT(r >= size);
3132 size = (size + 1) % 5;
3133 if (size > (96 << 10))
3135 for (k = 0; k <= i; ++k)
3136 STBM_ASSERT(stbm_get_allocation_size(allocations[k]) >= sizes[k]);
3138 stbm_debug_iterate(heap, 0, 0);
3139 for (i = 0; i < 5000; ++i)
3140 stbm_free(0, heap, allocations[i]);
3142 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3144 // test interleaving of small
3145 for (i = 0; i < 5000; ++i)
3146 allocations[i] = stbm_alloc(0, heap, 1, 0);
3147 stbm_debug_iterate(heap, 0, 0);
3148 for (i = 0; i < 5000; i += 2)
3149 stbm_free(0, heap, allocations[i]);
3150 for (i = 0; i < 5000; i += 2)
3151 stbm_free(0, heap, allocations[i + 1]);
3153 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3155 num_outstanding = stbm__num_sys_alloc - stbm__num_sys_free;
3157 // test large blocks
3158 for (i = 0; i < 500; ++i)
3159 allocations[i] = stbm_alloc(0, heap, (i % 2 + 1) << 20, (unsigned short)i);
3161 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3162 if (stbm__num_sys_alloc - stbm__num_sys_free != num_outstanding + 500)
3163 STBM_FAIL("Large allocations didn't trigger same number of system allocations");
3165 for (i = 0; i < 500; ++i)
3166 stbm_free(0, heap, allocations[i]);
3168 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3170 // we allow for some fuzz in case we start caching system allocations to reduce system call overhead
3171 if (stbm__num_sys_alloc - stbm__num_sys_free > num_outstanding + 10)
3172 STBM_FAIL("Freeing large allocations didn't free (almost all) system allocations");
3174 // test that memory isn't trashed, and more complex
3175 // interleaving of free & alloc
3176 for (i = 0; i < 3000; ++i)
3178 allocations[i] = stbm_alloc(0, heap, size, (unsigned short)i);
3180 STBM_MEMSET(allocations[i], i, sizes[i]);
3181 size = size + 1 + (size >> 4);
3182 if (size > (96 << 10))
3185 stbm_debug_iterate(heap, 0, 0);
3187 for (i = 0; i < count; ++i)
3189 j = (unsigned)(j + i + (i >> 5) + (j >> 11)) % 3000;
3192 for (k = 0; k < (int)sizes[j]; ++k)
3193 STBM_ASSERT(((char *)allocations[j])[k] == (char)j);
3194 STBM_ASSERT(stbm_get_userdata(allocations[j]) == (unsigned short)j);
3195 stbm_free(0, heap, allocations[j]);
3198 allocations[j] = stbm_alloc(0, heap, size, (unsigned short)j);
3199 STBM_ASSERT(stbm_get_userdata(allocations[j]) == (unsigned short)j);
3200 STBM_MEMSET(allocations[j], j, sizes[j]);
3201 size = (size * 367 + 1) % 30000;
3203 stbm_debug_iterate(heap, 0, 0);
3206 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3208 // interleave small again, should be able to be allocated from free blocks from above
3209 for (i = 0; i < 5000; ++i)
3210 allocations[i] = stbm_alloc(0, heap, 1, 0);
3211 stbm_debug_iterate(heap, 0, 0);
3212 for (i = 0; i < 5000; i += 2)
3213 stbm_free(0, heap, allocations[i]);
3214 for (i = 0; i < 5000; i += 2)
3215 stbm_free(0, heap, allocations[i + 1]);
3217 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3219 if (stbm__num_sys_alloc == stbm__num_sys_free)
3221 stbm_heap_free(heap);
3222 if (stbm__num_sys_alloc != stbm__num_sys_free)
3223 STBM_FAIL("heap free failed to free everything");
3226 static int stbm__ptr_compare(const void *p_, const void *q_)
3228 const void **p = (const void **)p_;
3229 const void **q = (const void **)q_;
3231 if ((char *)*p < (char *)*q)
3233 return (char *)*p > (char *)*q;
3236 static void stbm__dump_heap_state(stbm_heap *heap)
3239 printf("Free lists:\n");
3240 for (i = 0; i < STBM__NUM_INDICES * STBM__NUM_SLOTS; ++i)
3242 stbm__medium_freeblock *b = heap->medium.lists[0][i];
3245 printf(" Free: %8d %p\n", STBM__MED_FREE_BYTES(b->prefix), b);
3251 static void stbm__test_heap_sorted(void)
3255 stbm_heap_config hc; // = { 0 };
3257 static void *allocations[20000];
3260 STBM_MEMSET(&hc, 0, sizeof(hc));
3262 hc.system_alloc = stbm__mysystem_alloc;
3263 hc.system_free = stbm__mysystem_free;
3264 hc.user_context = NULL;
3266 heap = stbm_heap_init(storage, sizeof(storage), &hc);
3267 stbm_heapconfig_set_medium_chunk_size(heap, 16384, 65536);
3269 // first allocate and free a bunch to stabilize the system allocations
3273 for (j = 0; j < 50; ++j)
3275 for (i = 0; i < 3000; ++i)
3276 allocations[i] = stbm_alloc(0, heap, size + (i % 128), (unsigned short)i);
3279 stbm__dump_heap_state(heap);
3280 for (i=0; i < 5; ++i)
3281 printf("Allocation: %d @ %p\n", stbm_get_allocation_size(allocations[i]), allocations[i]);
3284 for (i = 0; i < 3000; ++i)
3286 stbm_free(0, heap, allocations[i]);
3287 // stbm__dump_heap_state(heap);
3290 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3293 for (j = 0; j < 10; ++j)
3295 // reallocate, and try freeing every other one
3296 for (i = 0; i < 5000; ++i)
3297 allocations[i] = stbm_alloc(0, heap, size + (i % 128), (unsigned short)i);
3298 for (i = 0; i < 5000; i += 2)
3299 stbm_free(0, heap, allocations[i]);
3300 for (i = 1; i < 5000; i += 2)
3301 stbm_free(0, heap, allocations[i]);
3303 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3306 for (j = 0; j < 10; ++j)
3308 // reallocate, and try freeing every other one in space, not time
3309 for (i = 0; i < 5000; ++i)
3310 allocations[i] = stbm_alloc(0, heap, size + (i % 128), (unsigned short)i);
3311 qsort(allocations, sizeof(allocations[0]), 5000, stbm__ptr_compare);
3312 for (i = 0; i < 5000; i += 2)
3313 stbm_free(0, heap, allocations[i]);
3314 for (i = 1; i < 5000; i += 2)
3315 stbm_free(0, heap, allocations[i]);
3316 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3319 // reallocate, and try freeing every third one in space, not time
3321 for (j = 0; j < 10; ++j)
3323 for (i = 0; i < 5000; ++i)
3324 allocations[i] = stbm_alloc(0, heap, size + (i % 128), (unsigned short)i);
3325 qsort(allocations, sizeof(allocations[0]), 5000, stbm__ptr_compare);
3326 for (i = 0; i < 5000; i += 3)
3327 stbm_free(0, heap, allocations[i]);
3328 for (i = 1; i < 5000; i += 3)
3329 stbm_free(0, heap, allocations[i]);
3330 for (i = 2; i < 5000; i += 3)
3331 stbm_free(0, heap, allocations[i]);
3332 printf("Made %d system allocations total; %d outstanding\n", stbm__num_sys_alloc, stbm__num_sys_alloc - stbm__num_sys_free);
3335 if (stbm__num_sys_alloc == stbm__num_sys_free)
3337 stbm_heap_free(heap);
3338 if (stbm__num_sys_alloc != stbm__num_sys_free)
3339 STBM_FAIL("heap free failed to free everything");
3342 static void stbm__test(void)
3345 stbm__test_heap_sorted();
3346 stbm__test_heap(0, 50000);
3347 stbm__test_heap(16, 20000);
3348 stbm__test_heap(64, 20000);
3349 stbm__test_heap(256, 20000);
3350 stbm__test_heap(32, 20000);
3351 stbm__test_heap(0, 120000);
3352 stbm__test_heap(0, 500000);
3353 stbm__test_heap(0, 2500000);
3357 #ifdef STBM_SELFTEST
3358 int stb_malloc_selftest()