Browse code

Add a new memory allocator (-DDL_MALLOC) which is based on Doug Lea's dl_malloc implementation (http://g.oswego.edu/dl/html/malloc.html). Patch has been provided by Jan Andres <jan.andres@freenet-ag.de>

Notes:
- very fast, esp. on shm_malloc() in comparison to other algorithms
- has been in production use at freenet(.de) for 6+ months
- is picky about buffer overruns (aka detects and crashes on them)
- disabled by default (set -DDL_MALLOC in Makefile.defs)
- closes SER-278

Hendrik Scholz authored on 01/06/2007 09:20:34
Showing 9 changed files
... ...
@@ -316,6 +316,8 @@ endif
316 316
 #		(not true anymore, q_malloc performs approx. the same)
317 317
 # -DF_MALLOC
318 318
 #		an even faster malloc, not recommended for debugging
319
+# -DDL_MALLOC
320
+#		a malloc implementation based on Doug Lea's dl_malloc
319 321
 # -DDBG_MALLOC
320 322
 #		issues additional debugging information if lock/unlock is called
321 323
 # -DFAST_LOCK
... ...
@@ -411,7 +413,8 @@ DEFS+= $(extra_defs) \
411 413
 	 -DUSE_DNS_FAILOVER \
412 414
 	 -DUSE_DST_BLACKLIST \
413 415
 	 -DDBG_QM_MALLOC \
414
-	 #-DF_MALLOC
416
+	 #-DDL_MALLOC \
417
+	 #-DF_MALLOC \
415 418
 	 #-DDBG_F_MALLOC \
416 419
 	 #-DNO_DEBUG \
417 420
 	 #-DEXTRA_DEBUG \
418 421
new file mode 100644
... ...
@@ -0,0 +1,10 @@
1
+#ifndef _DL_CONFIG_H
2
+#define _DL_CONFIG_H
3
+
4
+#define MSPACES 1
5
+#define USE_DL_PREFIX 1
6
+#define MALLOC_ALIGNMENT 16
7
+/* enable FOOTERS for extra consistency checks */
8
+/* #define FOOTERS 1 */
9
+
10
+#endif /* _DL_CONFIG_H */
0 11
new file mode 100644
... ...
@@ -0,0 +1,5080 @@
1
+/*
2
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
3
+  Doug Lea and released to the public domain, as explained at
4
+  http://creativecommons.org/licenses/publicdomain.  Send questions,
5
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
6
+
7
+* Version 2.8.3 Thu Sep 22 11:16:15 2005  Doug Lea  (dl at gee)
8
+
9
+   Note: There may be an updated version of this malloc obtainable at
10
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
11
+         Check before installing!
12
+
13
+* Quickstart
14
+
15
+  This library is all in one file to simplify the most common usage:
16
+  ftp it, compile it (-O3), and link it into another program. All of
17
+  the compile-time options default to reasonable values for use on
18
+  most platforms.  You might later want to step through various
19
+  compile-time and dynamic tuning options.
20
+
21
+  For convenience, an include file for code using this malloc is at:
22
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.3.h
23
+  You don't really need this .h file unless you call functions not
24
+  defined in your system include files.  The .h file contains only the
25
+  excerpts from this file needed for using this malloc on ANSI C/C++
26
+  systems, so long as you haven't changed compile-time options about
27
+  naming and tuning parameters.  If you do, then you can create your
28
+  own malloc.h that does include all settings by cutting at the point
29
+  indicated below. Note that you may already by default be using a C
30
+  library containing a malloc that is based on some version of this
31
+  malloc (for example in linux). You might still want to use the one
32
+  in this file to customize settings or to avoid overheads associated
33
+  with library versions.
34
+
35
+* Vital statistics:
36
+
37
+  Supported pointer/size_t representation:       4 or 8 bytes
38
+       size_t MUST be an unsigned type of the same width as
39
+       pointers. (If you are using an ancient system that declares
40
+       size_t as a signed type, or need it to be a different width
41
+       than pointers, you can use a previous release of this malloc
42
+       (e.g. 2.7.2) supporting these.)
43
+
44
+  Alignment:                                     8 bytes (default)
45
+       This suffices for nearly all current machines and C compilers.
46
+       However, you can define MALLOC_ALIGNMENT to be wider than this
47
+       if necessary (up to 128bytes), at the expense of using more space.
48
+
49
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
50
+                                          8 or 16 bytes (if 8byte sizes)
51
+       Each malloced chunk has a hidden word of overhead holding size
52
+       and status information, and additional cross-check word
53
+       if FOOTERS is defined.
54
+
55
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
56
+                          8-byte ptrs:  32 bytes    (including overhead)
57
+
58
+       Even a request for zero bytes (i.e., malloc(0)) returns a
59
+       pointer to something of the minimum allocatable size.
60
+       The maximum overhead wastage (i.e., number of extra bytes
61
+       allocated than were requested in malloc) is less than or equal
62
+       to the minimum size, except for requests >= mmap_threshold that
63
+       are serviced via mmap(), where the worst case wastage is about
64
+       32 bytes plus the remainder from a system page (the minimal
65
+       mmap unit); typically 4096 or 8192 bytes.
66
+
67
+  Security: static-safe; optionally more or less
68
+       The "security" of malloc refers to the ability of malicious
69
+       code to accentuate the effects of errors (for example, freeing
70
+       space that is not currently malloc'ed or overwriting past the
71
+       ends of chunks) in code that calls malloc.  This malloc
72
+       guarantees not to modify any memory locations below the base of
73
+       heap, i.e., static variables, even in the presence of usage
74
+       errors.  The routines additionally detect most improper frees
75
+       and reallocs.  All this holds as long as the static bookkeeping
76
+       for malloc itself is not corrupted by some other means.  This
77
+       is only one aspect of security -- these checks do not, and
78
+       cannot, detect all possible programming errors.
79
+
80
+       If FOOTERS is defined nonzero, then each allocated chunk
81
+       carries an additional check word to verify that it was malloced
82
+       from its space.  These check words are the same within each
83
+       execution of a program using malloc, but differ across
84
+       executions, so externally crafted fake chunks cannot be
85
+       freed. This improves security by rejecting frees/reallocs that
86
+       could corrupt heap memory, in addition to the checks preventing
87
+       writes to statics that are always on.  This may further improve
88
+       security at the expense of time and space overhead.  (Note that
89
+       FOOTERS may also be worth using with MSPACES.)
90
+
91
+       By default detected errors cause the program to abort (calling
92
+       "abort()"). You can override this to instead proceed past
93
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
94
+       has no effect, and a malloc that encounters a bad address
95
+       caused by user overwrites will ignore the bad address by
96
+       dropping pointers and indices to all known memory. This may
97
+       be appropriate for programs that should continue if at all
98
+       possible in the face of programming errors, although they may
99
+       run out of memory because dropped memory is never reclaimed.
100
+
101
+       If you don't like either of these options, you can define
102
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
103
+       else. And if if you are sure that your program using malloc has
104
+       no errors or vulnerabilities, you can define INSECURE to 1,
105
+       which might (or might not) provide a small performance improvement.
106
+
107
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined
108
+       When USE_LOCKS is defined, each public call to malloc, free,
109
+       etc is surrounded with either a pthread mutex or a win32
110
+       spinlock (depending on WIN32). This is not especially fast, and
111
+       can be a major bottleneck.  It is designed only to provide
112
+       minimal protection in concurrent environments, and to provide a
113
+       basis for extensions.  If you are using malloc in a concurrent
114
+       program, consider instead using ptmalloc, which is derived from
115
+       a version of this malloc. (See http://www.malloc.de).
116
+
117
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
118
+       This malloc can use unix sbrk or any emulation (invoked using
119
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
120
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
121
+       memory.  On most unix systems, it tends to work best if both
122
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
123
+       based on VirtualAlloc. It also uses common C library functions
124
+       like memset.
125
+
126
+  Compliance: I believe it is compliant with the Single Unix Specification
127
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
128
+       others as well.
129
+
130
+* Overview of algorithms
131
+
132
+  This is not the fastest, most space-conserving, most portable, or
133
+  most tunable malloc ever written. However it is among the fastest
134
+  while also being among the most space-conserving, portable and
135
+  tunable.  Consistent balance across these factors results in a good
136
+  general-purpose allocator for malloc-intensive programs.
137
+
138
+  In most ways, this malloc is a best-fit allocator. Generally, it
139
+  chooses the best-fitting existing chunk for a request, with ties
140
+  broken in approximately least-recently-used order. (This strategy
141
+  normally maintains low fragmentation.) However, for requests less
142
+  than 256bytes, it deviates from best-fit when there is not an
143
+  exactly fitting available chunk by preferring to use space adjacent
144
+  to that used for the previous small request, as well as by breaking
145
+  ties in approximately most-recently-used order. (These enhance
146
+  locality of series of small allocations.)  And for very large requests
147
+  (>= 256Kb by default), it relies on system memory mapping
148
+  facilities, if supported.  (This helps avoid carrying around and
149
+  possibly fragmenting memory used only for large chunks.)
150
+
151
+  All operations (except malloc_stats and mallinfo) have execution
152
+  times that are bounded by a constant factor of the number of bits in
153
+  a size_t, not counting any clearing in calloc or copying in realloc,
154
+  or actions surrounding MORECORE and MMAP that have times
155
+  proportional to the number of non-contiguous regions returned by
156
+  system allocation routines, which is often just 1.
157
+
158
+  The implementation is not very modular and seriously overuses
159
+  macros. Perhaps someday all C compilers will do as good a job
160
+  inlining modular code as can now be done by brute-force expansion,
161
+  but now, enough of them seem not to.
162
+
163
+  Some compilers issue a lot of warnings about code that is
164
+  dead/unreachable only on some platforms, and also about intentional
165
+  uses of negation on unsigned types. All known cases of each can be
166
+  ignored.
167
+
168
+  For a longer but out of date high-level description, see
169
+     http://gee.cs.oswego.edu/dl/html/malloc.html
170
+
171
+* MSPACES
172
+  If MSPACES is defined, then in addition to malloc, free, etc.,
173
+  this file also defines mspace_malloc, mspace_free, etc. These
174
+  are versions of malloc routines that take an "mspace" argument
175
+  obtained using create_mspace, to control all internal bookkeeping.
176
+  If ONLY_MSPACES is defined, only these versions are compiled.
177
+  So if you would like to use this allocator for only some allocations,
178
+  and your system malloc for others, you can compile with
179
+  ONLY_MSPACES and then do something like...
180
+    static mspace mymspace = create_mspace(0,0); // for example
181
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
182
+
183
+  (Note: If you only need one instance of an mspace, you can instead
184
+  use "USE_DL_PREFIX" to relabel the global malloc.)
185
+
186
+  You can similarly create thread-local allocators by storing
187
+  mspaces as thread-locals. For example:
188
+    static __thread mspace tlms = 0;
189
+    void*  tlmalloc(size_t bytes) {
190
+      if (tlms == 0) tlms = create_mspace(0, 0);
191
+      return mspace_malloc(tlms, bytes);
192
+    }
193
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
194
+
195
+  Unless FOOTERS is defined, each mspace is completely independent.
196
+  You cannot allocate from one and free to another (although
197
+  conformance is only weakly checked, so usage errors are not always
198
+  caught). If FOOTERS is defined, then each chunk carries around a tag
199
+  indicating its originating mspace, and frees are directed to their
200
+  originating spaces.
201
+
202
+ -------------------------  Compile-time options ---------------------------
203
+
204
+Be careful in setting #define values for numerical constants of type
205
+size_t. On some systems, literal values are not automatically extended
206
+to size_t precision unless they are explicitly casted.
207
+
208
+WIN32                    default: defined if _WIN32 defined
209
+  Defining WIN32 sets up defaults for MS environment and compilers.
210
+  Otherwise defaults are for unix.
211
+
212
+MALLOC_ALIGNMENT         default: (size_t)8
213
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
214
+  power of two and at least 8, even on machines for which smaller
215
+  alignments would suffice. It may be defined as larger than this
216
+  though. Note however that code and data structures are optimized for
217
+  the case of 8-byte alignment.
218
+
219
+MSPACES                  default: 0 (false)
220
+  If true, compile in support for independent allocation spaces.
221
+  This is only supported if HAVE_MMAP is true.
222
+
223
+ONLY_MSPACES             default: 0 (false)
224
+  If true, only compile in mspace versions, not regular versions.
225
+
226
+USE_LOCKS                default: 0 (false)
227
+  Causes each call to each public routine to be surrounded with
228
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
229
+  overridden on a per-mspace basis for mspace versions.)
230
+
231
+FOOTERS                  default: 0
232
+  If true, provide extra checking and dispatching by placing
233
+  information in the footers of allocated chunks. This adds
234
+  space and time overhead.
235
+
236
+INSECURE                 default: 0
237
+  If true, omit checks for usage errors and heap space overwrites.
238
+
239
+USE_DL_PREFIX            default: NOT defined
240
+  Causes compiler to prefix all public routines with the string 'dl'.
241
+  This can be useful when you only want to use this malloc in one part
242
+  of a program, using your regular system malloc elsewhere.
243
+
244
+ABORT                    default: defined as abort()
245
+  Defines how to abort on failed checks.  On most systems, a failed
246
+  check cannot die with an "assert" or even print an informative
247
+  message, because the underlying print routines in turn call malloc,
248
+  which will fail again.  Generally, the best policy is to simply call
249
+  abort(). It's not very useful to do more than this because many
250
+  errors due to overwriting will show up as address faults (null, odd
251
+  addresses etc) rather than malloc-triggered checks, so will also
252
+  abort.  Also, most compilers know that abort() does not return, so
253
+  can better optimize code conditionally calling it.
254
+
255
+PROCEED_ON_ERROR           default: defined as 0 (false)
256
+  Controls whether detected bad addresses cause them to bypassed
257
+  rather than aborting. If set, detected bad arguments to free and
258
+  realloc are ignored. And all bookkeeping information is zeroed out
259
+  upon a detected overwrite of freed heap space, thus losing the
260
+  ability to ever return it from malloc again, but enabling the
261
+  application to proceed. If PROCEED_ON_ERROR is defined, the
262
+  static variable malloc_corruption_error_count is compiled in
263
+  and can be examined to see if errors have occurred. This option
264
+  generates slower code than the default abort policy.
265
+
266
+DEBUG                    default: NOT defined
267
+  The DEBUG setting is mainly intended for people trying to modify
268
+  this code or diagnose problems when porting to new platforms.
269
+  However, it may also be able to better isolate user errors than just
270
+  using runtime checks.  The assertions in the check routines spell
271
+  out in more detail the assumptions and invariants underlying the
272
+  algorithms.  The checking is fairly extensive, and will slow down
273
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
274
+  set will attempt to check every non-mmapped allocated and free chunk
275
+  in the course of computing the summaries.
276
+
277
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
278
+  Debugging assertion failures can be nearly impossible if your
279
+  version of the assert macro causes malloc to be called, which will
280
+  lead to a cascade of further failures, blowing the runtime stack.
281
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
282
+  which will usually make debugging easier.
283
+
284
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
285
+  The action to take before "return 0" when malloc fails to be able to
286
+  return memory because there is none available.
287
+
288
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
289
+  True if this system supports sbrk or an emulation of it.
290
+
291
+MORECORE                  default: sbrk
292
+  The name of the sbrk-style system routine to call to obtain more
293
+  memory.  See below for guidance on writing custom MORECORE
294
+  functions. The type of the argument to sbrk/MORECORE varies across
295
+  systems.  It cannot be size_t, because it supports negative
296
+  arguments, so it is normally the signed type of the same width as
297
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
298
+  though. Internally, we only call it with arguments less than half
299
+  the max value of a size_t, which should work across all reasonable
300
+  possibilities, although sometimes generating compiler warnings.  See
301
+  near the end of this file for guidelines for creating a custom
302
+  version of MORECORE.
303
+
304
+MORECORE_CONTIGUOUS       default: 1 (true)
305
+  If true, take advantage of fact that consecutive calls to MORECORE
306
+  with positive arguments always return contiguous increasing
307
+  addresses.  This is true of unix sbrk. It does not hurt too much to
308
+  set it true anyway, since malloc copes with non-contiguities.
309
+  Setting it false when definitely non-contiguous saves time
310
+  and possibly wasted space it would take to discover this though.
311
+
312
+MORECORE_CANNOT_TRIM      default: NOT defined
313
+  True if MORECORE cannot release space back to the system when given
314
+  negative arguments. This is generally necessary only if you are
315
+  using a hand-crafted MORECORE function that cannot handle negative
316
+  arguments.
317
+
318
+HAVE_MMAP                 default: 1 (true)
319
+  True if this system supports mmap or an emulation of it.  If so, and
320
+  HAVE_MORECORE is not true, MMAP is used for all system
321
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
322
+  primarily used to directly allocate very large blocks. It is also
323
+  used as a backup strategy in cases where MORECORE fails to provide
324
+  space from system. Note: A single call to MUNMAP is assumed to be
325
+  able to unmap memory that may have be allocated using multiple calls
326
+  to MMAP, so long as they are adjacent.
327
+
328
+HAVE_MREMAP               default: 1 on linux, else 0
329
+  If true realloc() uses mremap() to re-allocate large blocks and
330
+  extend or shrink allocation spaces.
331
+
332
+MMAP_CLEARS               default: 1 on unix
333
+  True if mmap clears memory so calloc doesn't need to. This is true
334
+  for standard unix mmap using /dev/zero.
335
+
336
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
337
+  Causes malloc to use the builtin ffs() function to compute indices.
338
+  Some compilers may recognize and intrinsify ffs to be faster than the
339
+  supplied C version. Also, the case of x86 using gcc is special-cased
340
+  to an asm instruction, so is already as fast as it can be, and so
341
+  this setting has no effect. (On most x86s, the asm version is only
342
+  slightly faster than the C version.)
343
+
344
+malloc_getpagesize         default: derive from system includes, or 4096.
345
+  The system page size. To the extent possible, this malloc manages
346
+  memory from the system in page-size units.  This may be (and
347
+  usually is) a function rather than a constant. This is ignored
348
+  if WIN32, where page size is determined using getSystemInfo during
349
+  initialization.
350
+
351
+USE_DEV_RANDOM             default: 0 (i.e., not used)
352
+  Causes malloc to use /dev/random to initialize secure magic seed for
353
+  stamping footers. Otherwise, the current time is used.
354
+
355
+NO_MALLINFO                default: 0
356
+  If defined, don't compile "mallinfo". This can be a simple way
357
+  of dealing with mismatches between system declarations and
358
+  those in this file.
359
+
360
+MALLINFO_FIELD_TYPE        default: size_t
361
+  The type of the fields in the mallinfo struct. This was originally
362
+  defined as "int" in SVID etc, but is more usefully defined as
363
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
364
+
365
+REALLOC_ZERO_BYTES_FREES    default: not defined
366
+  This should be set if a call to realloc with zero bytes should 
367
+  be the same as a call to free. Some people think it should. Otherwise, 
368
+  since this malloc returns a unique pointer for malloc(0), so does 
369
+  realloc(p, 0).
370
+
371
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
372
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
373
+LACKS_STDLIB_H                default: NOT defined unless on WIN32
374
+  Define these if your system does not have these header files.
375
+  You might need to manually insert some of the declarations they provide.
376
+
377
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
378
+                                system_info.dwAllocationGranularity in WIN32,
379
+                                otherwise 64K.
380
+      Also settable using mallopt(M_GRANULARITY, x)
381
+  The unit for allocating and deallocating memory from the system.  On
382
+  most systems with contiguous MORECORE, there is no reason to
383
+  make this more than a page. However, systems with MMAP tend to
384
+  either require or encourage larger granularities.  You can increase
385
+  this value to prevent system allocation functions to be called so
386
+  often, especially if they are slow.  The value must be at least one
387
+  page and must be a power of two.  Setting to 0 causes initialization
388
+  to either page size or win32 region size.  (Note: In previous
389
+  versions of malloc, the equivalent of this option was called
390
+  "TOP_PAD")
391
+
392
+DEFAULT_TRIM_THRESHOLD    default: 2MB
393
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
394
+  The maximum amount of unused top-most memory to keep before
395
+  releasing via malloc_trim in free().  Automatic trimming is mainly
396
+  useful in long-lived programs using contiguous MORECORE.  Because
397
+  trimming via sbrk can be slow on some systems, and can sometimes be
398
+  wasteful (in cases where programs immediately afterward allocate
399
+  more large chunks) the value should be high enough so that your
400
+  overall system performance would improve by releasing this much
401
+  memory.  As a rough guide, you might set to a value close to the
402
+  average size of a process (program) running on your system.
403
+  Releasing this much memory would allow such a process to run in
404
+  memory.  Generally, it is worth tuning trim thresholds when a
405
+  program undergoes phases where several large chunks are allocated
406
+  and released in ways that can reuse each other's storage, perhaps
407
+  mixed with phases where there are no such chunks at all. The trim
408
+  value must be greater than page size to have any useful effect.  To
409
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
410
+  some people use of mallocing a huge space and then freeing it at
411
+  program startup, in an attempt to reserve system memory, doesn't
412
+  have the intended effect under automatic trimming, since that memory
413
+  will immediately be returned to the system.
414
+
415
+DEFAULT_MMAP_THRESHOLD       default: 256K
416
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
417
+  The request size threshold for using MMAP to directly service a
418
+  request. Requests of at least this size that cannot be allocated
419
+  using already-existing space will be serviced via mmap.  (If enough
420
+  normal freed space already exists it is used instead.)  Using mmap
421
+  segregates relatively large chunks of memory so that they can be
422
+  individually obtained and released from the host system. A request
423
+  serviced through mmap is never reused by any other request (at least
424
+  not directly; the system may just so happen to remap successive
425
+  requests to the same locations).  Segregating space in this way has
426
+  the benefits that: Mmapped space can always be individually released
427
+  back to the system, which helps keep the system level memory demands
428
+  of a long-lived program low.  Also, mapped memory doesn't become
429
+  `locked' between other chunks, as can happen with normally allocated
430
+  chunks, which means that even trimming via malloc_trim would not
431
+  release them.  However, it has the disadvantage that the space
432
+  cannot be reclaimed, consolidated, and then used to service later
433
+  requests, as happens with normal chunks.  The advantages of mmap
434
+  nearly always outweigh disadvantages for "large" chunks, but the
435
+  value of "large" may vary across systems.  The default is an
436
+  empirically derived value that works well in most systems. You can
437
+  disable mmap by setting to MAX_SIZE_T.
438
+
439
+*/
440
+
441
+#include "dl_config.h"
442
+#include "meminfo.h"
443
+
444
+#ifndef WIN32
445
+#ifdef _WIN32
446
+#define WIN32 1
447
+#endif  /* _WIN32 */
448
+#endif  /* WIN32 */
449
+#ifdef WIN32
450
+#define WIN32_LEAN_AND_MEAN
451
+#include <windows.h>
452
+#define HAVE_MMAP 1
453
+#define HAVE_MORECORE 0
454
+#define LACKS_UNISTD_H
455
+#define LACKS_SYS_PARAM_H
456
+#define LACKS_SYS_MMAN_H
457
+#define LACKS_STRING_H
458
+#define LACKS_STRINGS_H
459
+#define LACKS_SYS_TYPES_H
460
+#define LACKS_ERRNO_H
461
+#define MALLOC_FAILURE_ACTION
462
+#define MMAP_CLEARS 0 /* WINCE and some others apparently don't clear */
463
+#endif  /* WIN32 */
464
+
465
+#if defined(DARWIN) || defined(_DARWIN)
466
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
467
+#ifndef HAVE_MORECORE
468
+#define HAVE_MORECORE 0
469
+#define HAVE_MMAP 1
470
+#endif  /* HAVE_MORECORE */
471
+#endif  /* DARWIN */
472
+
473
+#ifndef LACKS_SYS_TYPES_H
474
+#include <sys/types.h>  /* For size_t */
475
+#endif  /* LACKS_SYS_TYPES_H */
476
+
477
+/* The maximum possible size_t value has all bits set */
478
+#define MAX_SIZE_T           (~(size_t)0)
479
+
480
+#ifndef ONLY_MSPACES
481
+#define ONLY_MSPACES 0
482
+#endif  /* ONLY_MSPACES */
483
+#ifndef MSPACES
484
+#if ONLY_MSPACES
485
+#define MSPACES 1
486
+#else   /* ONLY_MSPACES */
487
+#define MSPACES 0
488
+#endif  /* ONLY_MSPACES */
489
+#endif  /* MSPACES */
490
+#ifndef MALLOC_ALIGNMENT
491
+#define MALLOC_ALIGNMENT ((size_t)8U)
492
+#endif  /* MALLOC_ALIGNMENT */
493
+#ifndef FOOTERS
494
+#define FOOTERS 0
495
+#endif  /* FOOTERS */
496
+#ifndef ABORT
497
+#define ABORT  abort()
498
+#endif  /* ABORT */
499
+#ifndef ABORT_ON_ASSERT_FAILURE
500
+#define ABORT_ON_ASSERT_FAILURE 1
501
+#endif  /* ABORT_ON_ASSERT_FAILURE */
502
+#ifndef PROCEED_ON_ERROR
503
+#define PROCEED_ON_ERROR 0
504
+#endif  /* PROCEED_ON_ERROR */
505
+#ifndef USE_LOCKS
506
+#define USE_LOCKS 0
507
+#endif  /* USE_LOCKS */
508
+#ifndef INSECURE
509
+#define INSECURE 0
510
+#endif  /* INSECURE */
511
+#ifndef HAVE_MMAP
512
+#define HAVE_MMAP 1
513
+#endif  /* HAVE_MMAP */
514
+#ifndef MMAP_CLEARS
515
+#define MMAP_CLEARS 1
516
+#endif  /* MMAP_CLEARS */
517
+#ifndef HAVE_MREMAP
518
+#ifdef linux
519
+#define HAVE_MREMAP 1
520
+#else   /* linux */
521
+#define HAVE_MREMAP 0
522
+#endif  /* linux */
523
+#endif  /* HAVE_MREMAP */
524
+#ifndef MALLOC_FAILURE_ACTION
525
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
526
+#endif  /* MALLOC_FAILURE_ACTION */
527
+#ifndef HAVE_MORECORE
528
+#if ONLY_MSPACES
529
+#define HAVE_MORECORE 0
530
+#else   /* ONLY_MSPACES */
531
+#define HAVE_MORECORE 1
532
+#endif  /* ONLY_MSPACES */
533
+#endif  /* HAVE_MORECORE */
534
+#if !HAVE_MORECORE
535
+#define MORECORE_CONTIGUOUS 0
536
+#else   /* !HAVE_MORECORE */
537
+#ifndef MORECORE
538
+#define MORECORE sbrk
539
+#endif  /* MORECORE */
540
+#ifndef MORECORE_CONTIGUOUS
541
+#define MORECORE_CONTIGUOUS 1
542
+#endif  /* MORECORE_CONTIGUOUS */
543
+#endif  /* HAVE_MORECORE */
544
+#ifndef DEFAULT_GRANULARITY
545
+#if MORECORE_CONTIGUOUS
546
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
547
+#else   /* MORECORE_CONTIGUOUS */
548
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
549
+#endif  /* MORECORE_CONTIGUOUS */
550
+#endif  /* DEFAULT_GRANULARITY */
551
+#ifndef DEFAULT_TRIM_THRESHOLD
552
+#ifndef MORECORE_CANNOT_TRIM
553
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
554
+#else   /* MORECORE_CANNOT_TRIM */
555
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
556
+#endif  /* MORECORE_CANNOT_TRIM */
557
+#endif  /* DEFAULT_TRIM_THRESHOLD */
558
+#ifndef DEFAULT_MMAP_THRESHOLD
559
+#if HAVE_MMAP
560
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
561
+#else   /* HAVE_MMAP */
562
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
563
+#endif  /* HAVE_MMAP */
564
+#endif  /* DEFAULT_MMAP_THRESHOLD */
565
+#ifndef USE_BUILTIN_FFS
566
+#define USE_BUILTIN_FFS 0
567
+#endif  /* USE_BUILTIN_FFS */
568
+#ifndef USE_DEV_RANDOM
569
+#define USE_DEV_RANDOM 0
570
+#endif  /* USE_DEV_RANDOM */
571
+#ifndef NO_MALLINFO
572
+#define NO_MALLINFO 0
573
+#endif  /* NO_MALLINFO */
574
+#ifndef MALLINFO_FIELD_TYPE
575
+#define MALLINFO_FIELD_TYPE size_t
576
+#endif  /* MALLINFO_FIELD_TYPE */
577
+
578
+/*
579
+  mallopt tuning options.  SVID/XPG defines four standard parameter
580
+  numbers for mallopt, normally defined in malloc.h.  None of these
581
+  are used in this malloc, so setting them has no effect. But this
582
+  malloc does support the following options.
583
+*/
584
+
585
+#define M_TRIM_THRESHOLD     (-1)
586
+#define M_GRANULARITY        (-2)
587
+#define M_MMAP_THRESHOLD     (-3)
588
+
589
+/* ------------------------ Mallinfo declarations ------------------------ */
590
+
591
+#if !NO_MALLINFO
592
+/*
593
+  This version of malloc supports the standard SVID/XPG mallinfo
594
+  routine that returns a struct containing usage properties and
595
+  statistics. It should work on any system that has a
596
+  /usr/include/malloc.h defining struct mallinfo.  The main
597
+  declaration needed is the mallinfo struct that is returned (by-copy)
598
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
599
+  are not even meaningful in this version of malloc.  These fields are
600
+  are instead filled by mallinfo() with other numbers that might be of
601
+  interest.
602
+
603
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
604
+  /usr/include/malloc.h file that includes a declaration of struct
605
+  mallinfo.  If so, it is included; else a compliant version is
606
+  declared below.  These must be precisely the same for mallinfo() to
607
+  work.  The original SVID version of this struct, defined on most
608
+  systems with mallinfo, declares all fields as ints. But some others
609
+  define as unsigned long. If your system defines the fields using a
610
+  type of different width than listed here, you MUST #include your
611
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
612
+*/
613
+
614
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
615
+
616
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
617
+#include "/usr/include/malloc.h"
618
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
619
+
620
+struct mallinfo {
621
+  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
622
+  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
623
+  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
624
+  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
625
+  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
626
+  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
627
+  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
628
+  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
629
+  MALLINFO_FIELD_TYPE fordblks; /* total free space */
630
+  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
631
+};
632
+
633
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
634
+#endif /* NO_MALLINFO */
635
+
636
+#ifdef __cplusplus
637
+extern "C" {
638
+#endif /* __cplusplus */
639
+
640
+#if !ONLY_MSPACES
641
+
642
+/* ------------------- Declarations of public routines ------------------- */
643
+
644
+#ifndef USE_DL_PREFIX
645
+#define dlcalloc               calloc
646
+#define dlfree                 free
647
+#define dlmalloc               malloc
648
+#define dlmemalign             memalign
649
+#define dlrealloc              realloc
650
+#define dlvalloc               valloc
651
+#define dlpvalloc              pvalloc
652
+#define dlmallinfo             mallinfo
653
+#define dlmallopt              mallopt
654
+#define dlmalloc_trim          malloc_trim
655
+#define dlmalloc_stats         malloc_stats
656
+#define dlmalloc_usable_size   malloc_usable_size
657
+#define dlmalloc_footprint     malloc_footprint
658
+#define dlmalloc_max_footprint malloc_max_footprint
659
+#define dlindependent_calloc   independent_calloc
660
+#define dlindependent_comalloc independent_comalloc
661
+#endif /* USE_DL_PREFIX */
662
+
663
+
664
+/*
665
+  malloc(size_t n)
666
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
667
+  null if no space is available, in which case errno is set to ENOMEM
668
+  on ANSI C systems.
669
+
670
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
671
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
672
+  systems.)  Note that size_t is an unsigned type, so calls with
673
+  arguments that would be negative if signed are interpreted as
674
+  requests for huge amounts of space, which will often fail. The
675
+  maximum supported value of n differs across systems, but is in all
676
+  cases less than the maximum representable value of a size_t.
677
+*/
678
+void* dlmalloc(size_t);
679
+
680
+/*
681
+  free(void* p)
682
+  Releases the chunk of memory pointed to by p, that had been previously
683
+  allocated using malloc or a related routine such as realloc.
684
+  It has no effect if p is null. If p was not malloced or already
685
+  freed, free(p) will by default cause the current program to abort.
686
+*/
687
+void  dlfree(void*);
688
+
689
+/*
690
+  calloc(size_t n_elements, size_t element_size);
691
+  Returns a pointer to n_elements * element_size bytes, with all locations
692
+  set to zero.
693
+*/
694
+void* dlcalloc(size_t, size_t);
695
+
696
+/*
697
+  realloc(void* p, size_t n)
698
+  Returns a pointer to a chunk of size n that contains the same data
699
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
700
+  if no space is available.
701
+
702
+  The returned pointer may or may not be the same as p. The algorithm
703
+  prefers extending p in most cases when possible, otherwise it
704
+  employs the equivalent of a malloc-copy-free sequence.
705
+
706
+  If p is null, realloc is equivalent to malloc.
707
+
708
+  If space is not available, realloc returns null, errno is set (if on
709
+  ANSI) and p is NOT freed.
710
+
711
+  if n is for fewer bytes than already held by p, the newly unused
712
+  space is lopped off and freed if possible.  realloc with a size
713
+  argument of zero (re)allocates a minimum-sized chunk.
714
+
715
+  The old unix realloc convention of allowing the last-free'd chunk
716
+  to be used as an argument to realloc is not supported.
717
+*/
718
+
719
+void* dlrealloc(void*, size_t);
720
+
721
+/*
722
+  memalign(size_t alignment, size_t n);
723
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
724
+  in accord with the alignment argument.
725
+
726
+  The alignment argument should be a power of two. If the argument is
727
+  not a power of two, the nearest greater power is used.
728
+  8-byte alignment is guaranteed by normal malloc calls, so don't
729
+  bother calling memalign with an argument of 8 or less.
730
+
731
+  Overreliance on memalign is a sure way to fragment space.
732
+*/
733
+void* dlmemalign(size_t, size_t);
734
+
735
+/*
736
+  valloc(size_t n);
737
+  Equivalent to memalign(pagesize, n), where pagesize is the page
738
+  size of the system. If the pagesize is unknown, 4096 is used.
739
+*/
740
+void* dlvalloc(size_t);
741
+
742
+/*
743
+  mallopt(int parameter_number, int parameter_value)
744
+  Sets tunable parameters The format is to provide a
745
+  (parameter-number, parameter-value) pair.  mallopt then sets the
746
+  corresponding parameter to the argument value if it can (i.e., so
747
+  long as the value is meaningful), and returns 1 if successful else
748
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
749
+  normally defined in malloc.h.  None of these are use in this malloc,
750
+  so setting them has no effect. But this malloc also supports other
751
+  options in mallopt. See below for details.  Briefly, supported
752
+  parameters are as follows (listed defaults are for "typical"
753
+  configurations).
754
+
755
+  Symbol            param #  default    allowed param values
756
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (MAX_SIZE_T disables)
757
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
758
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
759
+*/
760
+int dlmallopt(int, int);
761
+
762
+/*
763
+  malloc_footprint();
764
+  Returns the number of bytes obtained from the system.  The total
765
+  number of bytes allocated by malloc, realloc etc., is less than this
766
+  value. Unlike mallinfo, this function returns only a precomputed
767
+  result, so can be called frequently to monitor memory consumption.
768
+  Even if locks are otherwise defined, this function does not use them,
769
+  so results might not be up to date.
770
+*/
771
+size_t dlmalloc_footprint(void);
772
+
773
+/*
774
+  malloc_max_footprint();
775
+  Returns the maximum number of bytes obtained from the system. This
776
+  value will be greater than current footprint if deallocated space
777
+  has been reclaimed by the system. The peak number of bytes allocated
778
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
779
+  this function returns only a precomputed result, so can be called
780
+  frequently to monitor memory consumption.  Even if locks are
781
+  otherwise defined, this function does not use them, so results might
782
+  not be up to date.
783
+*/
784
+size_t dlmalloc_max_footprint(void);
785
+
786
+#if !NO_MALLINFO
787
+/*
788
+  mallinfo()
789
+  Returns (by copy) a struct containing various summary statistics:
790
+
791
+  arena:     current total non-mmapped bytes allocated from system
792
+  ordblks:   the number of free chunks
793
+  smblks:    always zero.
794
+  hblks:     current number of mmapped regions
795
+  hblkhd:    total bytes held in mmapped regions
796
+  usmblks:   the maximum total allocated space. This will be greater
797
+                than current total if trimming has occurred.
798
+  fsmblks:   always zero
799
+  uordblks:  current total allocated space (normal or mmapped)
800
+  fordblks:  total free space
801
+  keepcost:  the maximum number of bytes that could ideally be released
802
+               back to system via malloc_trim. ("ideally" means that
803
+               it ignores page restrictions etc.)
804
+
805
+  Because these fields are ints, but internal bookkeeping may
806
+  be kept as longs, the reported values may wrap around zero and
807
+  thus be inaccurate.
808
+*/
809
+struct mallinfo dlmallinfo(void);
810
+#endif /* NO_MALLINFO */
811
+
812
+/*
813
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
814
+
815
+  independent_calloc is similar to calloc, but instead of returning a
816
+  single cleared space, it returns an array of pointers to n_elements
817
+  independent elements that can hold contents of size elem_size, each
818
+  of which starts out cleared, and can be independently freed,
819
+  realloc'ed etc. The elements are guaranteed to be adjacently
820
+  allocated (this is not guaranteed to occur with multiple callocs or
821
+  mallocs), which may also improve cache locality in some
822
+  applications.
823
+
824
+  The "chunks" argument is optional (i.e., may be null, which is
825
+  probably the most typical usage). If it is null, the returned array
826
+  is itself dynamically allocated and should also be freed when it is
827
+  no longer needed. Otherwise, the chunks array must be of at least
828
+  n_elements in length. It is filled in with the pointers to the
829
+  chunks.
830
+
831
+  In either case, independent_calloc returns this pointer array, or
832
+  null if the allocation failed.  If n_elements is zero and "chunks"
833
+  is null, it returns a chunk representing an array with zero elements
834
+  (which should be freed if not wanted).
835
+
836
+  Each element must be individually freed when it is no longer
837
+  needed. If you'd like to instead be able to free all at once, you
838
+  should instead use regular calloc and assign pointers into this
839
+  space to represent elements.  (In this case though, you cannot
840
+  independently free elements.)
841
+
842
+  independent_calloc simplifies and speeds up implementations of many
843
+  kinds of pools.  It may also be useful when constructing large data
844
+  structures that initially have a fixed number of fixed-sized nodes,
845
+  but the number is not known at compile time, and some of the nodes
846
+  may later need to be freed. For example:
847
+
848
+  struct Node { int item; struct Node* next; };
849
+
850
+  struct Node* build_list() {
851
+    struct Node** pool;
852
+    int n = read_number_of_nodes_needed();
853
+    if (n <= 0) return 0;
854
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
855
+    if (pool == 0) die();
856
+    // organize into a linked list...
857
+    struct Node* first = pool[0];
858
+    for (i = 0; i < n-1; ++i)
859
+      pool[i]->next = pool[i+1];
860
+    free(pool);     // Can now free the array (or not, if it is needed later)
861
+    return first;
862
+  }
863
+*/
864
+void** dlindependent_calloc(size_t, size_t, void**);
865
+
866
+/*
867
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
868
+
869
+  independent_comalloc allocates, all at once, a set of n_elements
870
+  chunks with sizes indicated in the "sizes" array.    It returns
871
+  an array of pointers to these elements, each of which can be
872
+  independently freed, realloc'ed etc. The elements are guaranteed to
873
+  be adjacently allocated (this is not guaranteed to occur with
874
+  multiple callocs or mallocs), which may also improve cache locality
875
+  in some applications.
876
+
877
+  The "chunks" argument is optional (i.e., may be null). If it is null
878
+  the returned array is itself dynamically allocated and should also
879
+  be freed when it is no longer needed. Otherwise, the chunks array
880
+  must be of at least n_elements in length. It is filled in with the
881
+  pointers to the chunks.
882
+
883
+  In either case, independent_comalloc returns this pointer array, or
884
+  null if the allocation failed.  If n_elements is zero and chunks is
885
+  null, it returns a chunk representing an array with zero elements
886
+  (which should be freed if not wanted).
887
+
888
+  Each element must be individually freed when it is no longer
889
+  needed. If you'd like to instead be able to free all at once, you
890
+  should instead use a single regular malloc, and assign pointers at
891
+  particular offsets in the aggregate space. (In this case though, you
892
+  cannot independently free elements.)
893
+
894
+  independent_comallac differs from independent_calloc in that each
895
+  element may have a different size, and also that it does not
896
+  automatically clear elements.
897
+
898
+  independent_comalloc can be used to speed up allocation in cases
899
+  where several structs or objects must always be allocated at the
900
+  same time.  For example:
901
+
902
+  struct Head { ... }
903
+  struct Foot { ... }
904
+
905
+  void send_message(char* msg) {
906
+    int msglen = strlen(msg);
907
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
908
+    void* chunks[3];
909
+    if (independent_comalloc(3, sizes, chunks) == 0)
910
+      die();
911
+    struct Head* head = (struct Head*)(chunks[0]);
912
+    char*        body = (char*)(chunks[1]);
913
+    struct Foot* foot = (struct Foot*)(chunks[2]);
914
+    // ...
915
+  }
916
+
917
+  In general though, independent_comalloc is worth using only for
918
+  larger values of n_elements. For small values, you probably won't
919
+  detect enough difference from series of malloc calls to bother.
920
+
921
+  Overuse of independent_comalloc can increase overall memory usage,
922
+  since it cannot reuse existing noncontiguous small chunks that
923
+  might be available for some of the elements.
924
+*/
925
+void** dlindependent_comalloc(size_t, size_t*, void**);
926
+
927
+
928
+/*
929
+  pvalloc(size_t n);
930
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
931
+  round up n to nearest pagesize.
932
+ */
933
+void*  dlpvalloc(size_t);
934
+
935
+/*
936
+  malloc_trim(size_t pad);
937
+
938
+  If possible, gives memory back to the system (via negative arguments
939
+  to sbrk) if there is unused memory at the `high' end of the malloc
940
+  pool or in unused MMAP segments. You can call this after freeing
941
+  large blocks of memory to potentially reduce the system-level memory
942
+  requirements of a program. However, it cannot guarantee to reduce
943
+  memory. Under some allocation patterns, some large free blocks of
944
+  memory will be locked between two used chunks, so they cannot be
945
+  given back to the system.
946
+
947
+  The `pad' argument to malloc_trim represents the amount of free
948
+  trailing space to leave untrimmed. If this argument is zero, only
949
+  the minimum amount of memory to maintain internal data structures
950
+  will be left. Non-zero arguments can be supplied to maintain enough
951
+  trailing space to service future expected allocations without having
952
+  to re-obtain memory from the system.
953
+
954
+  Malloc_trim returns 1 if it actually released any memory, else 0.
955
+*/
956
+int  dlmalloc_trim(size_t);
957
+
958
+/*
959
+  malloc_usable_size(void* p);
960
+
961
+  Returns the number of bytes you can actually use in
962
+  an allocated chunk, which may be more than you requested (although
963
+  often not) due to alignment and minimum size constraints.
964
+  You can use this many bytes without worrying about
965
+  overwriting other allocated objects. This is not a particularly great
966
+  programming practice. malloc_usable_size can be more useful in
967
+  debugging and assertions, for example:
968
+
969
+  p = malloc(n);
970
+  assert(malloc_usable_size(p) >= 256);
971
+*/
972
+size_t dlmalloc_usable_size(void*);
973
+
974
+/*
975
+  malloc_stats();
976
+  Prints on stderr the amount of space obtained from the system (both
977
+  via sbrk and mmap), the maximum amount (which may be more than
978
+  current if malloc_trim and/or munmap got called), and the current
979
+  number of bytes allocated via malloc (or realloc, etc) but not yet
980
+  freed. Note that this is the number of bytes allocated, not the
981
+  number requested. It will be larger than the number requested
982
+  because of alignment and bookkeeping overhead. Because it includes
983
+  alignment wastage as being in use, this figure may be greater than
984
+  zero even when no user-level chunks are allocated.
985
+
986
+  The reported current and maximum system memory can be inaccurate if
987
+  a program makes other calls to system memory allocation functions
988
+  (normally sbrk) outside of malloc.
989
+
990
+  malloc_stats prints only the most commonly interesting statistics.
991
+  More information can be obtained by calling mallinfo.
992
+*/
993
+void  dlmalloc_stats(void);
994
+
995
+#endif /* ONLY_MSPACES */
996
+
997
+#if MSPACES
998
+
999
+/*
1000
+  mspace is an opaque type representing an independent
1001
+  region of space that supports mspace_malloc, etc.
1002
+*/
1003
+typedef void* mspace;
1004
+
1005
+/*
1006
+  create_mspace creates and returns a new independent space with the
1007
+  given initial capacity, or, if 0, the default granularity size.  It
1008
+  returns null if there is no system memory available to create the
1009
+  space.  If argument locked is non-zero, the space uses a separate
1010
+  lock to control access. The capacity of the space will grow
1011
+  dynamically as needed to service mspace_malloc requests.  You can
1012
+  control the sizes of incremental increases of this space by
1013
+  compiling with a different DEFAULT_GRANULARITY or dynamically
1014
+  setting with mallopt(M_GRANULARITY, value).
1015
+*/
1016
+mspace create_mspace(size_t capacity, int locked);
1017
+
1018
+/*
1019
+  destroy_mspace destroys the given space, and attempts to return all
1020
+  of its memory back to the system, returning the total number of
1021
+  bytes freed. After destruction, the results of access to all memory
1022
+  used by the space become undefined.
1023
+*/
1024
+size_t destroy_mspace(mspace msp);
1025
+
1026
+/*
1027
+  create_mspace_with_base uses the memory supplied as the initial base
1028
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
1029
+  space is used for bookkeeping, so the capacity must be at least this
1030
+  large. (Otherwise 0 is returned.) When this initial space is
1031
+  exhausted, additional memory will be obtained from the system.
1032
+  Destroying this space will deallocate all additionally allocated
1033
+  space (if possible) but not the initial base.
1034
+*/
1035
+mspace create_mspace_with_base(void* base, size_t capacity, int locked);
1036
+
1037
+/*
1038
+  mspace_malloc behaves as malloc, but operates within
1039
+  the given space.
1040
+*/
1041
+void* mspace_malloc(mspace msp, size_t bytes);
1042
+
1043
+/*
1044
+  mspace_free behaves as free, but operates within
1045
+  the given space.
1046
+
1047
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
1048
+  free may be called instead of mspace_free because freed chunks from
1049
+  any space are handled by their originating spaces.
1050
+*/
1051
+void mspace_free(mspace msp, void* mem);
1052
+
1053
+/*
1054
+  mspace_realloc behaves as realloc, but operates within
1055
+  the given space.
1056
+
1057
+  If compiled with FOOTERS==1, mspace_realloc is not actually
1058
+  needed.  realloc may be called instead of mspace_realloc because
1059
+  realloced chunks from any space are handled by their originating
1060
+  spaces.
1061
+*/
1062
+void* mspace_realloc(mspace msp, void* mem, size_t newsize);
1063
+
1064
+/*
1065
+  mspace_calloc behaves as calloc, but operates within
1066
+  the given space.
1067
+*/
1068
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
1069
+
1070
+/*
1071
+  mspace_memalign behaves as memalign, but operates within
1072
+  the given space.
1073
+*/
1074
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
1075
+
1076
+/*
1077
+  mspace_independent_calloc behaves as independent_calloc, but
1078
+  operates within the given space.
1079
+*/
1080
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
1081
+                                 size_t elem_size, void* chunks[]);
1082
+
1083
+/*
1084
+  mspace_independent_comalloc behaves as independent_comalloc, but
1085
+  operates within the given space.
1086
+*/
1087
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
1088
+                                   size_t sizes[], void* chunks[]);
1089
+
1090
+/*
1091
+  mspace_footprint() returns the number of bytes obtained from the
1092
+  system for this space.
1093
+*/
1094
+size_t mspace_footprint(mspace msp);
1095
+
1096
+/*
1097
+  mspace_max_footprint() returns the peak number of bytes obtained from the
1098
+  system for this space.
1099
+*/
1100
+size_t mspace_max_footprint(mspace msp);
1101
+
1102
+
1103
+#if !NO_MALLINFO
1104
+/*
1105
+  mspace_mallinfo behaves as mallinfo, but reports properties of
1106
+  the given space.
1107
+*/
1108
+struct mallinfo mspace_mallinfo(mspace msp);
1109
+#endif /* NO_MALLINFO */
1110
+
1111
+/*
1112
+  mspace_malloc_stats behaves as malloc_stats, but reports
1113
+  properties of the given space.
1114
+*/
1115
+void mspace_malloc_stats(mspace msp);
1116
+
1117
+/*
1118
+  mspace_trim behaves as malloc_trim, but
1119
+  operates within the given space.
1120
+*/
1121
+int mspace_trim(mspace msp, size_t pad);
1122
+
1123
+/*
1124
+  An alias for mallopt.
1125
+*/
1126
+int mspace_mallopt(int, int);
1127
+
1128
+#endif /* MSPACES */
1129
+
1130
+#ifdef __cplusplus
1131
+};  /* end of extern "C" */
1132
+#endif /* __cplusplus */
1133
+
1134
+/*
1135
+  ========================================================================
1136
+  To make a fully customizable malloc.h header file, cut everything
1137
+  above this line, put into file malloc.h, edit to suit, and #include it
1138
+  on the next line, as well as in programs that use this malloc.
1139
+  ========================================================================
1140
+*/
1141
+
1142
+/* #include "malloc.h" */
1143
+
1144
+/*------------------------------ internal #includes ---------------------- */
1145
+
1146
+#ifdef WIN32
1147
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
1148
+#endif /* WIN32 */
1149
+
1150
+#include <stdio.h>       /* for printing in malloc_stats */
1151
+
1152
+#ifndef LACKS_ERRNO_H
1153
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
1154
+#endif /* LACKS_ERRNO_H */
1155
+#if FOOTERS
1156
+#include <time.h>        /* for magic initialization */
1157
+#endif /* FOOTERS */
1158
+#ifndef LACKS_STDLIB_H
1159
+#include <stdlib.h>      /* for abort() */
1160
+#endif /* LACKS_STDLIB_H */
1161
+#ifdef DEBUG
1162
+#if ABORT_ON_ASSERT_FAILURE
1163
+#define assert(x) if(!(x)) ABORT
1164
+#else /* ABORT_ON_ASSERT_FAILURE */
1165
+#include <assert.h>
1166
+#endif /* ABORT_ON_ASSERT_FAILURE */
1167
+#else  /* DEBUG */
1168
+#define assert(x)
1169
+#endif /* DEBUG */
1170
+#ifndef LACKS_STRING_H
1171
+#include <string.h>      /* for memset etc */
1172
+#endif  /* LACKS_STRING_H */
1173
+#if USE_BUILTIN_FFS
1174
+#ifndef LACKS_STRINGS_H
1175
+#include <strings.h>     /* for ffs */
1176
+#endif /* LACKS_STRINGS_H */
1177
+#endif /* USE_BUILTIN_FFS */
1178
+#if HAVE_MMAP
1179
+#ifndef LACKS_SYS_MMAN_H
1180
+#include <sys/mman.h>    /* for mmap */
1181
+#endif /* LACKS_SYS_MMAN_H */
1182
+#ifndef LACKS_FCNTL_H
1183
+#include <fcntl.h>
1184
+#endif /* LACKS_FCNTL_H */
1185
+#endif /* HAVE_MMAP */
1186
+#if HAVE_MORECORE
1187
+#ifndef LACKS_UNISTD_H
1188
+#include <unistd.h>     /* for sbrk */
1189
+#else /* LACKS_UNISTD_H */
1190
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
1191
+extern void*     sbrk(ptrdiff_t);
1192
+#endif /* FreeBSD etc */
1193
+#endif /* LACKS_UNISTD_H */
1194
+#endif /* HAVE_MMAP */
1195
+
1196
+#ifndef WIN32
1197
+#ifndef malloc_getpagesize
1198
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
1199
+#    ifndef _SC_PAGE_SIZE
1200
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
1201
+#    endif
1202
+#  endif
1203
+#  ifdef _SC_PAGE_SIZE
1204
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
1205
+#  else
1206
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
1207
+       extern size_t getpagesize();
1208
+#      define malloc_getpagesize getpagesize()
1209
+#    else
1210
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
1211
+#        define malloc_getpagesize getpagesize()
1212
+#      else
1213
+#        ifndef LACKS_SYS_PARAM_H
1214
+#          include <sys/param.h>
1215
+#        endif
1216
+#        ifdef EXEC_PAGESIZE
1217
+#          define malloc_getpagesize EXEC_PAGESIZE
1218
+#        else
1219
+#          ifdef NBPG
1220
+#            ifndef CLSIZE
1221
+#              define malloc_getpagesize NBPG
1222
+#            else
1223
+#              define malloc_getpagesize (NBPG * CLSIZE)
1224
+#            endif
1225
+#          else
1226
+#            ifdef NBPC
1227
+#              define malloc_getpagesize NBPC
1228
+#            else
1229
+#              ifdef PAGESIZE
1230
+#                define malloc_getpagesize PAGESIZE
1231
+#              else /* just guess */
1232
+#                define malloc_getpagesize ((size_t)4096U)
1233
+#              endif
1234
+#            endif
1235
+#          endif
1236
+#        endif
1237
+#      endif
1238
+#    endif
1239
+#  endif
1240
+#endif
1241
+#endif
1242
+
1243
+/* ------------------- size_t and alignment properties -------------------- */
1244
+
1245
+/* The byte and bit size of a size_t */
1246
+#define SIZE_T_SIZE         (sizeof(size_t))
1247
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
1248
+
1249
+/* Some constants coerced to size_t */
1250
+/* Annoying but necessary to avoid errors on some plaftorms */
1251
+#define SIZE_T_ZERO         ((size_t)0)
1252
+#define SIZE_T_ONE          ((size_t)1)
1253
+#define SIZE_T_TWO          ((size_t)2)
1254
+#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
1255
+#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
1256
+#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
1257
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
1258
+
1259
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
1260
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
1261
+
1262
+/* True if address a has acceptable alignment */
1263
+#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
1264
+
1265
+/* the number of bytes to offset an address to align it */
1266
+#define align_offset(A)\
1267
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
1268
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
1269
+
1270
+/* -------------------------- MMAP preliminaries ------------------------- */
1271
+
1272
+/*
1273
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
1274
+   checks to fail so compiler optimizer can delete code rather than
1275
+   using so many "#if"s.
1276
+*/
1277
+
1278
+
1279
+/* MORECORE and MMAP must return MFAIL on failure */
1280
+#define MFAIL                ((void*)(MAX_SIZE_T))
1281
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
1282
+
1283
+#if !HAVE_MMAP
1284
+#define IS_MMAPPED_BIT       (SIZE_T_ZERO)
1285
+#define USE_MMAP_BIT         (SIZE_T_ZERO)
1286
+#define CALL_MMAP(s)         MFAIL
1287
+#define CALL_MUNMAP(a, s)    (-1)
1288
+#define DIRECT_MMAP(s)       MFAIL
1289
+
1290
+#else /* HAVE_MMAP */
1291
+#define IS_MMAPPED_BIT       (SIZE_T_ONE)
1292
+#define USE_MMAP_BIT         (SIZE_T_ONE)
1293
+
1294
+#ifndef WIN32
1295
+#define CALL_MUNMAP(a, s)    munmap((a), (s))
1296
+#define MMAP_PROT            (PROT_READ|PROT_WRITE)
1297
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
1298
+#define MAP_ANONYMOUS        MAP_ANON
1299
+#endif /* MAP_ANON */
1300
+#ifdef MAP_ANONYMOUS
1301
+#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
1302
+#define CALL_MMAP(s)         mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
1303
+#else /* MAP_ANONYMOUS */
1304
+/*
1305
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
1306
+   is unlikely to be needed, but is supplied just in case.
1307
+*/
1308
+#define MMAP_FLAGS           (MAP_PRIVATE)
1309
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
1310
+#define CALL_MMAP(s) ((dev_zero_fd < 0) ? \
1311
+           (dev_zero_fd = open("/dev/zero", O_RDWR), \
1312
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
1313
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
1314
+#endif /* MAP_ANONYMOUS */
1315
+
1316
+#define DIRECT_MMAP(s)       CALL_MMAP(s)
1317
+#else /* WIN32 */
1318
+
1319
+/* Win32 MMAP via VirtualAlloc */
1320
+static void* win32mmap(size_t size) {
1321
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
1322
+  return (ptr != 0)? ptr: MFAIL;
1323
+}
1324
+
1325
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
1326
+static void* win32direct_mmap(size_t size) {
1327
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
1328
+                           PAGE_READWRITE);
1329
+  return (ptr != 0)? ptr: MFAIL;
1330
+}
1331
+
1332
+/* This function supports releasing coalesed segments */
1333
+static int win32munmap(void* ptr, size_t size) {
1334
+  MEMORY_BASIC_INFORMATION minfo;
1335
+  char* cptr = ptr;
1336
+  while (size) {
1337
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
1338
+      return -1;
1339
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
1340
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
1341
+      return -1;
1342
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
1343
+      return -1;
1344
+    cptr += minfo.RegionSize;
1345
+    size -= minfo.RegionSize;
1346
+  }
1347
+  return 0;
1348
+}
1349
+
1350
+#define CALL_MMAP(s)         win32mmap(s)
1351
+#define CALL_MUNMAP(a, s)    win32munmap((a), (s))
1352
+#define DIRECT_MMAP(s)       win32direct_mmap(s)
1353
+#endif /* WIN32 */
1354
+#endif /* HAVE_MMAP */
1355
+
1356
+#if HAVE_MMAP && HAVE_MREMAP