Browse code

Add a new memory allocator (-DDL_MALLOC) which is based on Doug Lea's dl_malloc implementation (http://g.oswego.edu/dl/html/malloc.html). Patch has been provided by Jan Andres <jan.andres@freenet-ag.de>

Notes:
- very fast, esp. on shm_malloc() in comparison to other algorithms
- has been in production use at freenet(.de) for 6+ months
- is picky about buffer overruns (aka detects and crashes on them)
- disabled by default (set -DDL_MALLOC in Makefile.defs)
- closes SER-278

Hendrik Scholz authored on 01/06/2007 09:20:34
Showing 9 changed files
... ...
@@ -316,6 +316,8 @@ endif
316 316
 #		(not true anymore, q_malloc performs approx. the same)
317 317
 # -DF_MALLOC
318 318
 #		an even faster malloc, not recommended for debugging
319
+# -DDL_MALLOC
320
+#		a malloc implementation based on Doug Lea's dl_malloc
319 321
 # -DDBG_MALLOC
320 322
 #		issues additional debugging information if lock/unlock is called
321 323
 # -DFAST_LOCK
... ...
@@ -411,7 +413,8 @@ DEFS+= $(extra_defs) \
411 411
 	 -DUSE_DNS_FAILOVER \
412 412
 	 -DUSE_DST_BLACKLIST \
413 413
 	 -DDBG_QM_MALLOC \
414
-	 #-DF_MALLOC
414
+	 #-DDL_MALLOC \
415
+	 #-DF_MALLOC \
415 416
 	 #-DDBG_F_MALLOC \
416 417
 	 #-DNO_DEBUG \
417 418
 	 #-DEXTRA_DEBUG \
418 419
new file mode 100644
... ...
@@ -0,0 +1,10 @@
0
+#ifndef _DL_CONFIG_H
1
+#define _DL_CONFIG_H
2
+
3
+#define MSPACES 1
4
+#define USE_DL_PREFIX 1
5
+#define MALLOC_ALIGNMENT 16
6
+/* enable FOOTERS for extra consistency checks */
7
+/* #define FOOTERS 1 */
8
+
9
+#endif /* _DL_CONFIG_H */
0 10
new file mode 100644
... ...
@@ -0,0 +1,5080 @@
0
+/*
1
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
2
+  Doug Lea and released to the public domain, as explained at
3
+  http://creativecommons.org/licenses/publicdomain.  Send questions,
4
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
5
+
6
+* Version 2.8.3 Thu Sep 22 11:16:15 2005  Doug Lea  (dl at gee)
7
+
8
+   Note: There may be an updated version of this malloc obtainable at
9
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
10
+         Check before installing!
11
+
12
+* Quickstart
13
+
14
+  This library is all in one file to simplify the most common usage:
15
+  ftp it, compile it (-O3), and link it into another program. All of
16
+  the compile-time options default to reasonable values for use on
17
+  most platforms.  You might later want to step through various
18
+  compile-time and dynamic tuning options.
19
+
20
+  For convenience, an include file for code using this malloc is at:
21
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.3.h
22
+  You don't really need this .h file unless you call functions not
23
+  defined in your system include files.  The .h file contains only the
24
+  excerpts from this file needed for using this malloc on ANSI C/C++
25
+  systems, so long as you haven't changed compile-time options about
26
+  naming and tuning parameters.  If you do, then you can create your
27
+  own malloc.h that does include all settings by cutting at the point
28
+  indicated below. Note that you may already by default be using a C
29
+  library containing a malloc that is based on some version of this
30
+  malloc (for example in linux). You might still want to use the one
31
+  in this file to customize settings or to avoid overheads associated
32
+  with library versions.
33
+
34
+* Vital statistics:
35
+
36
+  Supported pointer/size_t representation:       4 or 8 bytes
37
+       size_t MUST be an unsigned type of the same width as
38
+       pointers. (If you are using an ancient system that declares
39
+       size_t as a signed type, or need it to be a different width
40
+       than pointers, you can use a previous release of this malloc
41
+       (e.g. 2.7.2) supporting these.)
42
+
43
+  Alignment:                                     8 bytes (default)
44
+       This suffices for nearly all current machines and C compilers.
45
+       However, you can define MALLOC_ALIGNMENT to be wider than this
46
+       if necessary (up to 128bytes), at the expense of using more space.
47
+
48
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
49
+                                          8 or 16 bytes (if 8byte sizes)
50
+       Each malloced chunk has a hidden word of overhead holding size
51
+       and status information, and additional cross-check word
52
+       if FOOTERS is defined.
53
+
54
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
55
+                          8-byte ptrs:  32 bytes    (including overhead)
56
+
57
+       Even a request for zero bytes (i.e., malloc(0)) returns a
58
+       pointer to something of the minimum allocatable size.
59
+       The maximum overhead wastage (i.e., number of extra bytes
60
+       allocated than were requested in malloc) is less than or equal
61
+       to the minimum size, except for requests >= mmap_threshold that
62
+       are serviced via mmap(), where the worst case wastage is about
63
+       32 bytes plus the remainder from a system page (the minimal
64
+       mmap unit); typically 4096 or 8192 bytes.
65
+
66
+  Security: static-safe; optionally more or less
67
+       The "security" of malloc refers to the ability of malicious
68
+       code to accentuate the effects of errors (for example, freeing
69
+       space that is not currently malloc'ed or overwriting past the
70
+       ends of chunks) in code that calls malloc.  This malloc
71
+       guarantees not to modify any memory locations below the base of
72
+       heap, i.e., static variables, even in the presence of usage
73
+       errors.  The routines additionally detect most improper frees
74
+       and reallocs.  All this holds as long as the static bookkeeping
75
+       for malloc itself is not corrupted by some other means.  This
76
+       is only one aspect of security -- these checks do not, and
77
+       cannot, detect all possible programming errors.
78
+
79
+       If FOOTERS is defined nonzero, then each allocated chunk
80
+       carries an additional check word to verify that it was malloced
81
+       from its space.  These check words are the same within each
82
+       execution of a program using malloc, but differ across
83
+       executions, so externally crafted fake chunks cannot be
84
+       freed. This improves security by rejecting frees/reallocs that
85
+       could corrupt heap memory, in addition to the checks preventing
86
+       writes to statics that are always on.  This may further improve
87
+       security at the expense of time and space overhead.  (Note that
88
+       FOOTERS may also be worth using with MSPACES.)
89
+
90
+       By default detected errors cause the program to abort (calling
91
+       "abort()"). You can override this to instead proceed past
92
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
93
+       has no effect, and a malloc that encounters a bad address
94
+       caused by user overwrites will ignore the bad address by
95
+       dropping pointers and indices to all known memory. This may
96
+       be appropriate for programs that should continue if at all
97
+       possible in the face of programming errors, although they may
98
+       run out of memory because dropped memory is never reclaimed.
99
+
100
+       If you don't like either of these options, you can define
101
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
102
+       else. And if if you are sure that your program using malloc has
103
+       no errors or vulnerabilities, you can define INSECURE to 1,
104
+       which might (or might not) provide a small performance improvement.
105
+
106
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined
107
+       When USE_LOCKS is defined, each public call to malloc, free,
108
+       etc is surrounded with either a pthread mutex or a win32
109
+       spinlock (depending on WIN32). This is not especially fast, and
110
+       can be a major bottleneck.  It is designed only to provide
111
+       minimal protection in concurrent environments, and to provide a
112
+       basis for extensions.  If you are using malloc in a concurrent
113
+       program, consider instead using ptmalloc, which is derived from
114
+       a version of this malloc. (See http://www.malloc.de).
115
+
116
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
117
+       This malloc can use unix sbrk or any emulation (invoked using
118
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
119
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
120
+       memory.  On most unix systems, it tends to work best if both
121
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
122
+       based on VirtualAlloc. It also uses common C library functions
123
+       like memset.
124
+
125
+  Compliance: I believe it is compliant with the Single Unix Specification
126
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
127
+       others as well.
128
+
129
+* Overview of algorithms
130
+
131
+  This is not the fastest, most space-conserving, most portable, or
132
+  most tunable malloc ever written. However it is among the fastest
133
+  while also being among the most space-conserving, portable and
134
+  tunable.  Consistent balance across these factors results in a good
135
+  general-purpose allocator for malloc-intensive programs.
136
+
137
+  In most ways, this malloc is a best-fit allocator. Generally, it
138
+  chooses the best-fitting existing chunk for a request, with ties
139
+  broken in approximately least-recently-used order. (This strategy
140
+  normally maintains low fragmentation.) However, for requests less
141
+  than 256bytes, it deviates from best-fit when there is not an
142
+  exactly fitting available chunk by preferring to use space adjacent
143
+  to that used for the previous small request, as well as by breaking
144
+  ties in approximately most-recently-used order. (These enhance
145
+  locality of series of small allocations.)  And for very large requests
146
+  (>= 256Kb by default), it relies on system memory mapping
147
+  facilities, if supported.  (This helps avoid carrying around and
148
+  possibly fragmenting memory used only for large chunks.)
149
+
150
+  All operations (except malloc_stats and mallinfo) have execution
151
+  times that are bounded by a constant factor of the number of bits in
152
+  a size_t, not counting any clearing in calloc or copying in realloc,
153
+  or actions surrounding MORECORE and MMAP that have times
154
+  proportional to the number of non-contiguous regions returned by
155
+  system allocation routines, which is often just 1.
156
+
157
+  The implementation is not very modular and seriously overuses
158
+  macros. Perhaps someday all C compilers will do as good a job
159
+  inlining modular code as can now be done by brute-force expansion,
160
+  but now, enough of them seem not to.
161
+
162
+  Some compilers issue a lot of warnings about code that is
163
+  dead/unreachable only on some platforms, and also about intentional
164
+  uses of negation on unsigned types. All known cases of each can be
165
+  ignored.
166
+
167
+  For a longer but out of date high-level description, see
168
+     http://gee.cs.oswego.edu/dl/html/malloc.html
169
+
170
+* MSPACES
171
+  If MSPACES is defined, then in addition to malloc, free, etc.,
172
+  this file also defines mspace_malloc, mspace_free, etc. These
173
+  are versions of malloc routines that take an "mspace" argument
174
+  obtained using create_mspace, to control all internal bookkeeping.
175
+  If ONLY_MSPACES is defined, only these versions are compiled.
176
+  So if you would like to use this allocator for only some allocations,
177
+  and your system malloc for others, you can compile with
178
+  ONLY_MSPACES and then do something like...
179
+    static mspace mymspace = create_mspace(0,0); // for example
180
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
181
+
182
+  (Note: If you only need one instance of an mspace, you can instead
183
+  use "USE_DL_PREFIX" to relabel the global malloc.)
184
+
185
+  You can similarly create thread-local allocators by storing
186
+  mspaces as thread-locals. For example:
187
+    static __thread mspace tlms = 0;
188
+    void*  tlmalloc(size_t bytes) {
189
+      if (tlms == 0) tlms = create_mspace(0, 0);
190
+      return mspace_malloc(tlms, bytes);
191
+    }
192
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
193
+
194
+  Unless FOOTERS is defined, each mspace is completely independent.
195
+  You cannot allocate from one and free to another (although
196
+  conformance is only weakly checked, so usage errors are not always
197
+  caught). If FOOTERS is defined, then each chunk carries around a tag
198
+  indicating its originating mspace, and frees are directed to their
199
+  originating spaces.
200
+
201
+ -------------------------  Compile-time options ---------------------------
202
+
203
+Be careful in setting #define values for numerical constants of type
204
+size_t. On some systems, literal values are not automatically extended
205
+to size_t precision unless they are explicitly casted.
206
+
207
+WIN32                    default: defined if _WIN32 defined
208
+  Defining WIN32 sets up defaults for MS environment and compilers.
209
+  Otherwise defaults are for unix.
210
+
211
+MALLOC_ALIGNMENT         default: (size_t)8
212
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
213
+  power of two and at least 8, even on machines for which smaller
214
+  alignments would suffice. It may be defined as larger than this
215
+  though. Note however that code and data structures are optimized for
216
+  the case of 8-byte alignment.
217
+
218
+MSPACES                  default: 0 (false)
219
+  If true, compile in support for independent allocation spaces.
220
+  This is only supported if HAVE_MMAP is true.
221
+
222
+ONLY_MSPACES             default: 0 (false)
223
+  If true, only compile in mspace versions, not regular versions.
224
+
225
+USE_LOCKS                default: 0 (false)
226
+  Causes each call to each public routine to be surrounded with
227
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
228
+  overridden on a per-mspace basis for mspace versions.)
229
+
230
+FOOTERS                  default: 0
231
+  If true, provide extra checking and dispatching by placing
232
+  information in the footers of allocated chunks. This adds
233
+  space and time overhead.
234
+
235
+INSECURE                 default: 0
236
+  If true, omit checks for usage errors and heap space overwrites.
237
+
238
+USE_DL_PREFIX            default: NOT defined
239
+  Causes compiler to prefix all public routines with the string 'dl'.
240
+  This can be useful when you only want to use this malloc in one part
241
+  of a program, using your regular system malloc elsewhere.
242
+
243
+ABORT                    default: defined as abort()
244
+  Defines how to abort on failed checks.  On most systems, a failed
245
+  check cannot die with an "assert" or even print an informative
246
+  message, because the underlying print routines in turn call malloc,
247
+  which will fail again.  Generally, the best policy is to simply call
248
+  abort(). It's not very useful to do more than this because many
249
+  errors due to overwriting will show up as address faults (null, odd
250
+  addresses etc) rather than malloc-triggered checks, so will also
251
+  abort.  Also, most compilers know that abort() does not return, so
252
+  can better optimize code conditionally calling it.
253
+
254
+PROCEED_ON_ERROR           default: defined as 0 (false)
255
+  Controls whether detected bad addresses cause them to bypassed
256
+  rather than aborting. If set, detected bad arguments to free and
257
+  realloc are ignored. And all bookkeeping information is zeroed out
258
+  upon a detected overwrite of freed heap space, thus losing the
259
+  ability to ever return it from malloc again, but enabling the
260
+  application to proceed. If PROCEED_ON_ERROR is defined, the
261
+  static variable malloc_corruption_error_count is compiled in
262
+  and can be examined to see if errors have occurred. This option
263
+  generates slower code than the default abort policy.
264
+
265
+DEBUG                    default: NOT defined
266
+  The DEBUG setting is mainly intended for people trying to modify
267
+  this code or diagnose problems when porting to new platforms.
268
+  However, it may also be able to better isolate user errors than just
269
+  using runtime checks.  The assertions in the check routines spell
270
+  out in more detail the assumptions and invariants underlying the
271
+  algorithms.  The checking is fairly extensive, and will slow down
272
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
273
+  set will attempt to check every non-mmapped allocated and free chunk
274
+  in the course of computing the summaries.
275
+
276
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
277
+  Debugging assertion failures can be nearly impossible if your
278
+  version of the assert macro causes malloc to be called, which will
279
+  lead to a cascade of further failures, blowing the runtime stack.
280
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
281
+  which will usually make debugging easier.
282
+
283
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
284
+  The action to take before "return 0" when malloc fails to be able to
285
+  return memory because there is none available.
286
+
287
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
288
+  True if this system supports sbrk or an emulation of it.
289
+
290
+MORECORE                  default: sbrk
291
+  The name of the sbrk-style system routine to call to obtain more
292
+  memory.  See below for guidance on writing custom MORECORE
293
+  functions. The type of the argument to sbrk/MORECORE varies across
294
+  systems.  It cannot be size_t, because it supports negative
295
+  arguments, so it is normally the signed type of the same width as
296
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
297
+  though. Internally, we only call it with arguments less than half
298
+  the max value of a size_t, which should work across all reasonable
299
+  possibilities, although sometimes generating compiler warnings.  See
300
+  near the end of this file for guidelines for creating a custom
301
+  version of MORECORE.
302
+
303
+MORECORE_CONTIGUOUS       default: 1 (true)
304
+  If true, take advantage of fact that consecutive calls to MORECORE
305
+  with positive arguments always return contiguous increasing
306
+  addresses.  This is true of unix sbrk. It does not hurt too much to
307
+  set it true anyway, since malloc copes with non-contiguities.
308
+  Setting it false when definitely non-contiguous saves time
309
+  and possibly wasted space it would take to discover this though.
310
+
311
+MORECORE_CANNOT_TRIM      default: NOT defined
312
+  True if MORECORE cannot release space back to the system when given
313
+  negative arguments. This is generally necessary only if you are
314
+  using a hand-crafted MORECORE function that cannot handle negative
315
+  arguments.
316
+
317
+HAVE_MMAP                 default: 1 (true)
318
+  True if this system supports mmap or an emulation of it.  If so, and
319
+  HAVE_MORECORE is not true, MMAP is used for all system
320
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
321
+  primarily used to directly allocate very large blocks. It is also
322
+  used as a backup strategy in cases where MORECORE fails to provide
323
+  space from system. Note: A single call to MUNMAP is assumed to be
324
+  able to unmap memory that may have be allocated using multiple calls
325
+  to MMAP, so long as they are adjacent.
326
+
327
+HAVE_MREMAP               default: 1 on linux, else 0
328
+  If true realloc() uses mremap() to re-allocate large blocks and
329
+  extend or shrink allocation spaces.
330
+
331
+MMAP_CLEARS               default: 1 on unix
332
+  True if mmap clears memory so calloc doesn't need to. This is true
333
+  for standard unix mmap using /dev/zero.
334
+
335
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
336
+  Causes malloc to use the builtin ffs() function to compute indices.
337
+  Some compilers may recognize and intrinsify ffs to be faster than the
338
+  supplied C version. Also, the case of x86 using gcc is special-cased
339
+  to an asm instruction, so is already as fast as it can be, and so
340
+  this setting has no effect. (On most x86s, the asm version is only
341
+  slightly faster than the C version.)
342
+
343
+malloc_getpagesize         default: derive from system includes, or 4096.
344
+  The system page size. To the extent possible, this malloc manages
345
+  memory from the system in page-size units.  This may be (and
346
+  usually is) a function rather than a constant. This is ignored
347
+  if WIN32, where page size is determined using getSystemInfo during
348
+  initialization.
349
+
350
+USE_DEV_RANDOM             default: 0 (i.e., not used)
351
+  Causes malloc to use /dev/random to initialize secure magic seed for
352
+  stamping footers. Otherwise, the current time is used.
353
+
354
+NO_MALLINFO                default: 0
355
+  If defined, don't compile "mallinfo". This can be a simple way
356
+  of dealing with mismatches between system declarations and
357
+  those in this file.
358
+
359
+MALLINFO_FIELD_TYPE        default: size_t
360
+  The type of the fields in the mallinfo struct. This was originally
361
+  defined as "int" in SVID etc, but is more usefully defined as
362
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
363
+
364
+REALLOC_ZERO_BYTES_FREES    default: not defined
365
+  This should be set if a call to realloc with zero bytes should 
366
+  be the same as a call to free. Some people think it should. Otherwise, 
367
+  since this malloc returns a unique pointer for malloc(0), so does 
368
+  realloc(p, 0).
369
+
370
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
371
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
372
+LACKS_STDLIB_H                default: NOT defined unless on WIN32
373
+  Define these if your system does not have these header files.
374
+  You might need to manually insert some of the declarations they provide.
375
+
376
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
377
+                                system_info.dwAllocationGranularity in WIN32,
378
+                                otherwise 64K.
379
+      Also settable using mallopt(M_GRANULARITY, x)
380
+  The unit for allocating and deallocating memory from the system.  On
381
+  most systems with contiguous MORECORE, there is no reason to
382
+  make this more than a page. However, systems with MMAP tend to
383
+  either require or encourage larger granularities.  You can increase
384
+  this value to prevent system allocation functions to be called so
385
+  often, especially if they are slow.  The value must be at least one
386
+  page and must be a power of two.  Setting to 0 causes initialization
387
+  to either page size or win32 region size.  (Note: In previous
388
+  versions of malloc, the equivalent of this option was called
389
+  "TOP_PAD")
390
+
391
+DEFAULT_TRIM_THRESHOLD    default: 2MB
392
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
393
+  The maximum amount of unused top-most memory to keep before
394
+  releasing via malloc_trim in free().  Automatic trimming is mainly
395
+  useful in long-lived programs using contiguous MORECORE.  Because
396
+  trimming via sbrk can be slow on some systems, and can sometimes be
397
+  wasteful (in cases where programs immediately afterward allocate
398
+  more large chunks) the value should be high enough so that your
399
+  overall system performance would improve by releasing this much
400
+  memory.  As a rough guide, you might set to a value close to the
401
+  average size of a process (program) running on your system.
402
+  Releasing this much memory would allow such a process to run in
403
+  memory.  Generally, it is worth tuning trim thresholds when a
404
+  program undergoes phases where several large chunks are allocated
405
+  and released in ways that can reuse each other's storage, perhaps
406
+  mixed with phases where there are no such chunks at all. The trim
407
+  value must be greater than page size to have any useful effect.  To
408
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
409
+  some people use of mallocing a huge space and then freeing it at
410
+  program startup, in an attempt to reserve system memory, doesn't
411
+  have the intended effect under automatic trimming, since that memory
412
+  will immediately be returned to the system.
413
+
414
+DEFAULT_MMAP_THRESHOLD       default: 256K
415
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
416
+  The request size threshold for using MMAP to directly service a
417
+  request. Requests of at least this size that cannot be allocated
418
+  using already-existing space will be serviced via mmap.  (If enough
419
+  normal freed space already exists it is used instead.)  Using mmap
420
+  segregates relatively large chunks of memory so that they can be
421
+  individually obtained and released from the host system. A request
422
+  serviced through mmap is never reused by any other request (at least
423
+  not directly; the system may just so happen to remap successive
424
+  requests to the same locations).  Segregating space in this way has
425
+  the benefits that: Mmapped space can always be individually released
426
+  back to the system, which helps keep the system level memory demands
427
+  of a long-lived program low.  Also, mapped memory doesn't become
428
+  `locked' between other chunks, as can happen with normally allocated
429
+  chunks, which means that even trimming via malloc_trim would not
430
+  release them.  However, it has the disadvantage that the space
431
+  cannot be reclaimed, consolidated, and then used to service later
432
+  requests, as happens with normal chunks.  The advantages of mmap
433
+  nearly always outweigh disadvantages for "large" chunks, but the
434
+  value of "large" may vary across systems.  The default is an
435
+  empirically derived value that works well in most systems. You can
436
+  disable mmap by setting to MAX_SIZE_T.
437
+
438
+*/
439
+
440
+#include "dl_config.h"
441
+#include "meminfo.h"
442
+
443
+#ifndef WIN32
444
+#ifdef _WIN32
445
+#define WIN32 1
446
+#endif  /* _WIN32 */
447
+#endif  /* WIN32 */
448
+#ifdef WIN32
449
+#define WIN32_LEAN_AND_MEAN
450
+#include <windows.h>
451
+#define HAVE_MMAP 1
452
+#define HAVE_MORECORE 0
453
+#define LACKS_UNISTD_H
454
+#define LACKS_SYS_PARAM_H
455
+#define LACKS_SYS_MMAN_H
456
+#define LACKS_STRING_H
457
+#define LACKS_STRINGS_H
458
+#define LACKS_SYS_TYPES_H
459
+#define LACKS_ERRNO_H
460
+#define MALLOC_FAILURE_ACTION
461
+#define MMAP_CLEARS 0 /* WINCE and some others apparently don't clear */
462
+#endif  /* WIN32 */
463
+
464
+#if defined(DARWIN) || defined(_DARWIN)
465
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
466
+#ifndef HAVE_MORECORE
467
+#define HAVE_MORECORE 0
468
+#define HAVE_MMAP 1
469
+#endif  /* HAVE_MORECORE */
470
+#endif  /* DARWIN */
471
+
472
+#ifndef LACKS_SYS_TYPES_H
473
+#include <sys/types.h>  /* For size_t */
474
+#endif  /* LACKS_SYS_TYPES_H */
475
+
476
+/* The maximum possible size_t value has all bits set */
477
+#define MAX_SIZE_T           (~(size_t)0)
478
+
479
+#ifndef ONLY_MSPACES
480
+#define ONLY_MSPACES 0
481
+#endif  /* ONLY_MSPACES */
482
+#ifndef MSPACES
483
+#if ONLY_MSPACES
484
+#define MSPACES 1
485
+#else   /* ONLY_MSPACES */
486
+#define MSPACES 0
487
+#endif  /* ONLY_MSPACES */
488
+#endif  /* MSPACES */
489
+#ifndef MALLOC_ALIGNMENT
490
+#define MALLOC_ALIGNMENT ((size_t)8U)
491
+#endif  /* MALLOC_ALIGNMENT */
492
+#ifndef FOOTERS
493
+#define FOOTERS 0
494
+#endif  /* FOOTERS */
495
+#ifndef ABORT
496
+#define ABORT  abort()
497
+#endif  /* ABORT */
498
+#ifndef ABORT_ON_ASSERT_FAILURE
499
+#define ABORT_ON_ASSERT_FAILURE 1
500
+#endif  /* ABORT_ON_ASSERT_FAILURE */
501
+#ifndef PROCEED_ON_ERROR
502
+#define PROCEED_ON_ERROR 0
503
+#endif  /* PROCEED_ON_ERROR */
504
+#ifndef USE_LOCKS
505
+#define USE_LOCKS 0
506
+#endif  /* USE_LOCKS */
507
+#ifndef INSECURE
508
+#define INSECURE 0
509
+#endif  /* INSECURE */
510
+#ifndef HAVE_MMAP
511
+#define HAVE_MMAP 1
512
+#endif  /* HAVE_MMAP */
513
+#ifndef MMAP_CLEARS
514
+#define MMAP_CLEARS 1
515
+#endif  /* MMAP_CLEARS */
516
+#ifndef HAVE_MREMAP
517
+#ifdef linux
518
+#define HAVE_MREMAP 1
519
+#else   /* linux */
520
+#define HAVE_MREMAP 0
521
+#endif  /* linux */
522
+#endif  /* HAVE_MREMAP */
523
+#ifndef MALLOC_FAILURE_ACTION
524
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
525
+#endif  /* MALLOC_FAILURE_ACTION */
526
+#ifndef HAVE_MORECORE
527
+#if ONLY_MSPACES
528
+#define HAVE_MORECORE 0
529
+#else   /* ONLY_MSPACES */
530
+#define HAVE_MORECORE 1
531
+#endif  /* ONLY_MSPACES */
532
+#endif  /* HAVE_MORECORE */
533
+#if !HAVE_MORECORE
534
+#define MORECORE_CONTIGUOUS 0
535
+#else   /* !HAVE_MORECORE */
536
+#ifndef MORECORE
537
+#define MORECORE sbrk
538
+#endif  /* MORECORE */
539
+#ifndef MORECORE_CONTIGUOUS
540
+#define MORECORE_CONTIGUOUS 1
541
+#endif  /* MORECORE_CONTIGUOUS */
542
+#endif  /* HAVE_MORECORE */
543
+#ifndef DEFAULT_GRANULARITY
544
+#if MORECORE_CONTIGUOUS
545
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
546
+#else   /* MORECORE_CONTIGUOUS */
547
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
548
+#endif  /* MORECORE_CONTIGUOUS */
549
+#endif  /* DEFAULT_GRANULARITY */
550
+#ifndef DEFAULT_TRIM_THRESHOLD
551
+#ifndef MORECORE_CANNOT_TRIM
552
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
553
+#else   /* MORECORE_CANNOT_TRIM */
554
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
555
+#endif  /* MORECORE_CANNOT_TRIM */
556
+#endif  /* DEFAULT_TRIM_THRESHOLD */
557
+#ifndef DEFAULT_MMAP_THRESHOLD
558
+#if HAVE_MMAP
559
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
560
+#else   /* HAVE_MMAP */
561
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
562
+#endif  /* HAVE_MMAP */
563
+#endif  /* DEFAULT_MMAP_THRESHOLD */
564
+#ifndef USE_BUILTIN_FFS
565
+#define USE_BUILTIN_FFS 0
566
+#endif  /* USE_BUILTIN_FFS */
567
+#ifndef USE_DEV_RANDOM
568
+#define USE_DEV_RANDOM 0
569
+#endif  /* USE_DEV_RANDOM */
570
+#ifndef NO_MALLINFO
571
+#define NO_MALLINFO 0
572
+#endif  /* NO_MALLINFO */
573
+#ifndef MALLINFO_FIELD_TYPE
574
+#define MALLINFO_FIELD_TYPE size_t
575
+#endif  /* MALLINFO_FIELD_TYPE */
576
+
577
+/*
578
+  mallopt tuning options.  SVID/XPG defines four standard parameter
579
+  numbers for mallopt, normally defined in malloc.h.  None of these
580
+  are used in this malloc, so setting them has no effect. But this
581
+  malloc does support the following options.
582
+*/
583
+
584
+#define M_TRIM_THRESHOLD     (-1)
585
+#define M_GRANULARITY        (-2)
586
+#define M_MMAP_THRESHOLD     (-3)
587
+
588
+/* ------------------------ Mallinfo declarations ------------------------ */
589
+
590
+#if !NO_MALLINFO
591
+/*
592
+  This version of malloc supports the standard SVID/XPG mallinfo
593
+  routine that returns a struct containing usage properties and
594
+  statistics. It should work on any system that has a
595
+  /usr/include/malloc.h defining struct mallinfo.  The main
596
+  declaration needed is the mallinfo struct that is returned (by-copy)
597
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
598
+  are not even meaningful in this version of malloc.  These fields are
599
+  are instead filled by mallinfo() with other numbers that might be of
600
+  interest.
601
+
602
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
603
+  /usr/include/malloc.h file that includes a declaration of struct
604
+  mallinfo.  If so, it is included; else a compliant version is
605
+  declared below.  These must be precisely the same for mallinfo() to
606
+  work.  The original SVID version of this struct, defined on most
607
+  systems with mallinfo, declares all fields as ints. But some others
608
+  define as unsigned long. If your system defines the fields using a
609
+  type of different width than listed here, you MUST #include your
610
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
611
+*/
612
+
613
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
614
+
615
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
616
+#include "/usr/include/malloc.h"
617
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
618
+
619
+struct mallinfo {
620
+  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
621
+  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
622
+  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
623
+  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
624
+  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
625
+  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
626
+  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
627
+  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
628
+  MALLINFO_FIELD_TYPE fordblks; /* total free space */
629
+  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
630
+};
631
+
632
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
633
+#endif /* NO_MALLINFO */
634
+
635
+#ifdef __cplusplus
636
+extern "C" {
637
+#endif /* __cplusplus */
638
+
639
+#if !ONLY_MSPACES
640
+
641
+/* ------------------- Declarations of public routines ------------------- */
642
+
643
+#ifndef USE_DL_PREFIX
644
+#define dlcalloc               calloc
645
+#define dlfree                 free
646
+#define dlmalloc               malloc
647
+#define dlmemalign             memalign
648
+#define dlrealloc              realloc
649
+#define dlvalloc               valloc
650
+#define dlpvalloc              pvalloc
651
+#define dlmallinfo             mallinfo
652
+#define dlmallopt              mallopt
653
+#define dlmalloc_trim          malloc_trim
654
+#define dlmalloc_stats         malloc_stats
655
+#define dlmalloc_usable_size   malloc_usable_size
656
+#define dlmalloc_footprint     malloc_footprint
657
+#define dlmalloc_max_footprint malloc_max_footprint
658
+#define dlindependent_calloc   independent_calloc
659
+#define dlindependent_comalloc independent_comalloc
660
+#endif /* USE_DL_PREFIX */
661
+
662
+
663
+/*
664
+  malloc(size_t n)
665
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
666
+  null if no space is available, in which case errno is set to ENOMEM
667
+  on ANSI C systems.
668
+
669
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
670
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
671
+  systems.)  Note that size_t is an unsigned type, so calls with
672
+  arguments that would be negative if signed are interpreted as
673
+  requests for huge amounts of space, which will often fail. The
674
+  maximum supported value of n differs across systems, but is in all
675
+  cases less than the maximum representable value of a size_t.
676
+*/
677
+void* dlmalloc(size_t);
678
+
679
+/*
680
+  free(void* p)
681
+  Releases the chunk of memory pointed to by p, that had been previously
682
+  allocated using malloc or a related routine such as realloc.
683
+  It has no effect if p is null. If p was not malloced or already
684
+  freed, free(p) will by default cause the current program to abort.
685
+*/
686
+void  dlfree(void*);
687
+
688
+/*
689
+  calloc(size_t n_elements, size_t element_size);
690
+  Returns a pointer to n_elements * element_size bytes, with all locations
691
+  set to zero.
692
+*/
693
+void* dlcalloc(size_t, size_t);
694
+
695
+/*
696
+  realloc(void* p, size_t n)
697
+  Returns a pointer to a chunk of size n that contains the same data
698
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
699
+  if no space is available.
700
+
701
+  The returned pointer may or may not be the same as p. The algorithm
702
+  prefers extending p in most cases when possible, otherwise it
703
+  employs the equivalent of a malloc-copy-free sequence.
704
+
705
+  If p is null, realloc is equivalent to malloc.
706
+
707
+  If space is not available, realloc returns null, errno is set (if on
708
+  ANSI) and p is NOT freed.
709
+
710
+  if n is for fewer bytes than already held by p, the newly unused
711
+  space is lopped off and freed if possible.  realloc with a size
712
+  argument of zero (re)allocates a minimum-sized chunk.
713
+
714
+  The old unix realloc convention of allowing the last-free'd chunk
715
+  to be used as an argument to realloc is not supported.
716
+*/
717
+
718
+void* dlrealloc(void*, size_t);
719
+
720
+/*
721
+  memalign(size_t alignment, size_t n);
722
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
723
+  in accord with the alignment argument.
724
+
725
+  The alignment argument should be a power of two. If the argument is
726
+  not a power of two, the nearest greater power is used.
727
+  8-byte alignment is guaranteed by normal malloc calls, so don't
728
+  bother calling memalign with an argument of 8 or less.
729
+
730
+  Overreliance on memalign is a sure way to fragment space.
731
+*/
732
+void* dlmemalign(size_t, size_t);
733
+
734
+/*
735
+  valloc(size_t n);
736
+  Equivalent to memalign(pagesize, n), where pagesize is the page
737
+  size of the system. If the pagesize is unknown, 4096 is used.
738
+*/
739
+void* dlvalloc(size_t);
740
+
741
+/*
742
+  mallopt(int parameter_number, int parameter_value)
743
+  Sets tunable parameters The format is to provide a
744
+  (parameter-number, parameter-value) pair.  mallopt then sets the
745
+  corresponding parameter to the argument value if it can (i.e., so
746
+  long as the value is meaningful), and returns 1 if successful else
747
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
748
+  normally defined in malloc.h.  None of these are use in this malloc,
749
+  so setting them has no effect. But this malloc also supports other
750
+  options in mallopt. See below for details.  Briefly, supported
751
+  parameters are as follows (listed defaults are for "typical"
752
+  configurations).
753
+
754
+  Symbol            param #  default    allowed param values
755
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (MAX_SIZE_T disables)
756
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
757
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
758
+*/
759
+int dlmallopt(int, int);
760
+
761
+/*
762
+  malloc_footprint();
763
+  Returns the number of bytes obtained from the system.  The total
764
+  number of bytes allocated by malloc, realloc etc., is less than this
765
+  value. Unlike mallinfo, this function returns only a precomputed
766
+  result, so can be called frequently to monitor memory consumption.
767
+  Even if locks are otherwise defined, this function does not use them,
768
+  so results might not be up to date.
769
+*/
770
+size_t dlmalloc_footprint(void);
771
+
772
+/*
773
+  malloc_max_footprint();
774
+  Returns the maximum number of bytes obtained from the system. This
775
+  value will be greater than current footprint if deallocated space
776
+  has been reclaimed by the system. The peak number of bytes allocated
777
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
778
+  this function returns only a precomputed result, so can be called
779
+  frequently to monitor memory consumption.  Even if locks are
780
+  otherwise defined, this function does not use them, so results might
781
+  not be up to date.
782
+*/
783
+size_t dlmalloc_max_footprint(void);
784
+
785
+#if !NO_MALLINFO
786
+/*
787
+  mallinfo()
788
+  Returns (by copy) a struct containing various summary statistics:
789
+
790
+  arena:     current total non-mmapped bytes allocated from system
791
+  ordblks:   the number of free chunks
792
+  smblks:    always zero.
793
+  hblks:     current number of mmapped regions
794
+  hblkhd:    total bytes held in mmapped regions
795
+  usmblks:   the maximum total allocated space. This will be greater
796
+                than current total if trimming has occurred.
797
+  fsmblks:   always zero
798
+  uordblks:  current total allocated space (normal or mmapped)
799
+  fordblks:  total free space
800
+  keepcost:  the maximum number of bytes that could ideally be released
801
+               back to system via malloc_trim. ("ideally" means that
802
+               it ignores page restrictions etc.)
803
+
804
+  Because these fields are ints, but internal bookkeeping may
805
+  be kept as longs, the reported values may wrap around zero and
806
+  thus be inaccurate.
807
+*/
808
+struct mallinfo dlmallinfo(void);
809
+#endif /* NO_MALLINFO */
810
+
811
+/*
812
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
813
+
814
+  independent_calloc is similar to calloc, but instead of returning a
815
+  single cleared space, it returns an array of pointers to n_elements
816
+  independent elements that can hold contents of size elem_size, each
817
+  of which starts out cleared, and can be independently freed,
818
+  realloc'ed etc. The elements are guaranteed to be adjacently
819
+  allocated (this is not guaranteed to occur with multiple callocs or
820
+  mallocs), which may also improve cache locality in some
821
+  applications.
822
+
823
+  The "chunks" argument is optional (i.e., may be null, which is
824
+  probably the most typical usage). If it is null, the returned array
825
+  is itself dynamically allocated and should also be freed when it is
826
+  no longer needed. Otherwise, the chunks array must be of at least
827
+  n_elements in length. It is filled in with the pointers to the
828
+  chunks.
829
+
830
+  In either case, independent_calloc returns this pointer array, or
831
+  null if the allocation failed.  If n_elements is zero and "chunks"
832
+  is null, it returns a chunk representing an array with zero elements
833
+  (which should be freed if not wanted).
834
+
835
+  Each element must be individually freed when it is no longer
836
+  needed. If you'd like to instead be able to free all at once, you
837
+  should instead use regular calloc and assign pointers into this
838
+  space to represent elements.  (In this case though, you cannot
839
+  independently free elements.)
840
+
841
+  independent_calloc simplifies and speeds up implementations of many
842
+  kinds of pools.  It may also be useful when constructing large data
843
+  structures that initially have a fixed number of fixed-sized nodes,
844
+  but the number is not known at compile time, and some of the nodes
845
+  may later need to be freed. For example:
846
+
847
+  struct Node { int item; struct Node* next; };
848
+
849
+  struct Node* build_list() {
850
+    struct Node** pool;
851
+    int n = read_number_of_nodes_needed();
852
+    if (n <= 0) return 0;
853
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
854
+    if (pool == 0) die();
855
+    // organize into a linked list...
856
+    struct Node* first = pool[0];
857
+    for (i = 0; i < n-1; ++i)
858
+      pool[i]->next = pool[i+1];
859
+    free(pool);     // Can now free the array (or not, if it is needed later)
860
+    return first;
861
+  }
862
+*/
863
+void** dlindependent_calloc(size_t, size_t, void**);
864
+
865
+/*
866
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
867
+
868
+  independent_comalloc allocates, all at once, a set of n_elements
869
+  chunks with sizes indicated in the "sizes" array.    It returns
870
+  an array of pointers to these elements, each of which can be
871
+  independently freed, realloc'ed etc. The elements are guaranteed to
872
+  be adjacently allocated (this is not guaranteed to occur with
873
+  multiple callocs or mallocs), which may also improve cache locality
874
+  in some applications.
875
+
876
+  The "chunks" argument is optional (i.e., may be null). If it is null
877
+  the returned array is itself dynamically allocated and should also
878
+  be freed when it is no longer needed. Otherwise, the chunks array
879
+  must be of at least n_elements in length. It is filled in with the
880
+  pointers to the chunks.
881
+
882
+  In either case, independent_comalloc returns this pointer array, or
883
+  null if the allocation failed.  If n_elements is zero and chunks is
884
+  null, it returns a chunk representing an array with zero elements
885
+  (which should be freed if not wanted).
886
+
887
+  Each element must be individually freed when it is no longer
888
+  needed. If you'd like to instead be able to free all at once, you
889
+  should instead use a single regular malloc, and assign pointers at
890
+  particular offsets in the aggregate space. (In this case though, you
891
+  cannot independently free elements.)
892
+
893
+  independent_comallac differs from independent_calloc in that each
894
+  element may have a different size, and also that it does not
895
+  automatically clear elements.
896
+
897
+  independent_comalloc can be used to speed up allocation in cases
898
+  where several structs or objects must always be allocated at the
899
+  same time.  For example:
900
+
901
+  struct Head { ... }
902
+  struct Foot { ... }
903
+
904
+  void send_message(char* msg) {
905
+    int msglen = strlen(msg);
906
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
907
+    void* chunks[3];
908
+    if (independent_comalloc(3, sizes, chunks) == 0)
909
+      die();
910
+    struct Head* head = (struct Head*)(chunks[0]);
911
+    char*        body = (char*)(chunks[1]);
912
+    struct Foot* foot = (struct Foot*)(chunks[2]);
913
+    // ...
914
+  }
915
+
916
+  In general though, independent_comalloc is worth using only for
917
+  larger values of n_elements. For small values, you probably won't
918
+  detect enough difference from series of malloc calls to bother.
919
+
920
+  Overuse of independent_comalloc can increase overall memory usage,
921
+  since it cannot reuse existing noncontiguous small chunks that
922
+  might be available for some of the elements.
923
+*/
924
+void** dlindependent_comalloc(size_t, size_t*, void**);
925
+
926
+
927
+/*
928
+  pvalloc(size_t n);
929
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
930
+  round up n to nearest pagesize.
931
+ */
932
+void*  dlpvalloc(size_t);
933
+
934
+/*
935
+  malloc_trim(size_t pad);
936
+
937
+  If possible, gives memory back to the system (via negative arguments
938
+  to sbrk) if there is unused memory at the `high' end of the malloc
939
+  pool or in unused MMAP segments. You can call this after freeing
940
+  large blocks of memory to potentially reduce the system-level memory
941
+  requirements of a program. However, it cannot guarantee to reduce
942
+  memory. Under some allocation patterns, some large free blocks of
943
+  memory will be locked between two used chunks, so they cannot be
944
+  given back to the system.
945
+
946
+  The `pad' argument to malloc_trim represents the amount of free
947
+  trailing space to leave untrimmed. If this argument is zero, only
948
+  the minimum amount of memory to maintain internal data structures
949
+  will be left. Non-zero arguments can be supplied to maintain enough
950
+  trailing space to service future expected allocations without having
951
+  to re-obtain memory from the system.
952
+
953
+  Malloc_trim returns 1 if it actually released any memory, else 0.
954
+*/
955
+int  dlmalloc_trim(size_t);
956
+
957
+/*
958
+  malloc_usable_size(void* p);
959
+
960
+  Returns the number of bytes you can actually use in
961
+  an allocated chunk, which may be more than you requested (although
962
+  often not) due to alignment and minimum size constraints.
963
+  You can use this many bytes without worrying about
964
+  overwriting other allocated objects. This is not a particularly great
965
+  programming practice. malloc_usable_size can be more useful in
966
+  debugging and assertions, for example:
967
+
968
+  p = malloc(n);
969
+  assert(malloc_usable_size(p) >= 256);
970
+*/
971
+size_t dlmalloc_usable_size(void*);
972
+
973
+/*
974
+  malloc_stats();
975
+  Prints on stderr the amount of space obtained from the system (both
976
+  via sbrk and mmap), the maximum amount (which may be more than
977
+  current if malloc_trim and/or munmap got called), and the current
978
+  number of bytes allocated via malloc (or realloc, etc) but not yet
979
+  freed. Note that this is the number of bytes allocated, not the
980
+  number requested. It will be larger than the number requested
981
+  because of alignment and bookkeeping overhead. Because it includes
982
+  alignment wastage as being in use, this figure may be greater than
983
+  zero even when no user-level chunks are allocated.
984
+
985
+  The reported current and maximum system memory can be inaccurate if
986
+  a program makes other calls to system memory allocation functions
987
+  (normally sbrk) outside of malloc.
988
+
989
+  malloc_stats prints only the most commonly interesting statistics.
990
+  More information can be obtained by calling mallinfo.
991
+*/
992
+void  dlmalloc_stats(void);
993
+
994
+#endif /* ONLY_MSPACES */
995
+
996
+#if MSPACES
997
+
998
+/*
999
+  mspace is an opaque type representing an independent
1000
+  region of space that supports mspace_malloc, etc.
1001
+*/
1002
+typedef void* mspace;
1003
+
1004
+/*
1005
+  create_mspace creates and returns a new independent space with the
1006
+  given initial capacity, or, if 0, the default granularity size.  It
1007
+  returns null if there is no system memory available to create the
1008
+  space.  If argument locked is non-zero, the space uses a separate
1009
+  lock to control access. The capacity of the space will grow
1010
+  dynamically as needed to service mspace_malloc requests.  You can
1011
+  control the sizes of incremental increases of this space by
1012
+  compiling with a different DEFAULT_GRANULARITY or dynamically
1013
+  setting with mallopt(M_GRANULARITY, value).
1014
+*/
1015
+mspace create_mspace(size_t capacity, int locked);
1016
+
1017
+/*
1018
+  destroy_mspace destroys the given space, and attempts to return all
1019
+  of its memory back to the system, returning the total number of
1020
+  bytes freed. After destruction, the results of access to all memory
1021
+  used by the space become undefined.
1022
+*/
1023
+size_t destroy_mspace(mspace msp);
1024
+
1025
+/*
1026
+  create_mspace_with_base uses the memory supplied as the initial base
1027
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
1028
+  space is used for bookkeeping, so the capacity must be at least this
1029
+  large. (Otherwise 0 is returned.) When this initial space is
1030
+  exhausted, additional memory will be obtained from the system.
1031
+  Destroying this space will deallocate all additionally allocated
1032
+  space (if possible) but not the initial base.
1033
+*/
1034
+mspace create_mspace_with_base(void* base, size_t capacity, int locked);
1035
+
1036
+/*
1037
+  mspace_malloc behaves as malloc, but operates within
1038
+  the given space.
1039
+*/
1040
+void* mspace_malloc(mspace msp, size_t bytes);
1041
+
1042
+/*
1043
+  mspace_free behaves as free, but operates within
1044
+  the given space.
1045
+
1046
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
1047
+  free may be called instead of mspace_free because freed chunks from
1048
+  any space are handled by their originating spaces.
1049
+*/
1050
+void mspace_free(mspace msp, void* mem);
1051
+
1052
+/*
1053
+  mspace_realloc behaves as realloc, but operates within
1054
+  the given space.
1055
+
1056
+  If compiled with FOOTERS==1, mspace_realloc is not actually
1057
+  needed.  realloc may be called instead of mspace_realloc because
1058
+  realloced chunks from any space are handled by their originating
1059
+  spaces.
1060
+*/
1061
+void* mspace_realloc(mspace msp, void* mem, size_t newsize);
1062
+
1063
+/*
1064
+  mspace_calloc behaves as calloc, but operates within
1065
+  the given space.
1066
+*/
1067
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
1068
+
1069
+/*
1070
+  mspace_memalign behaves as memalign, but operates within
1071
+  the given space.
1072
+*/
1073
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
1074
+
1075
+/*
1076
+  mspace_independent_calloc behaves as independent_calloc, but
1077
+  operates within the given space.
1078
+*/
1079
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
1080
+                                 size_t elem_size, void* chunks[]);
1081
+
1082
+/*
1083
+  mspace_independent_comalloc behaves as independent_comalloc, but
1084
+  operates within the given space.
1085
+*/
1086
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
1087
+                                   size_t sizes[], void* chunks[]);
1088
+
1089
+/*
1090
+  mspace_footprint() returns the number of bytes obtained from the
1091
+  system for this space.
1092
+*/
1093
+size_t mspace_footprint(mspace msp);
1094
+
1095
+/*
1096
+  mspace_max_footprint() returns the peak number of bytes obtained from the
1097
+  system for this space.
1098
+*/
1099
+size_t mspace_max_footprint(mspace msp);
1100
+
1101
+
1102
+#if !NO_MALLINFO
1103
+/*
1104
+  mspace_mallinfo behaves as mallinfo, but reports properties of
1105
+  the given space.
1106
+*/
1107
+struct mallinfo mspace_mallinfo(mspace msp);
1108
+#endif /* NO_MALLINFO */
1109
+
1110
+/*
1111
+  mspace_malloc_stats behaves as malloc_stats, but reports
1112
+  properties of the given space.
1113
+*/
1114
+void mspace_malloc_stats(mspace msp);
1115
+
1116
+/*
1117
+  mspace_trim behaves as malloc_trim, but
1118
+  operates within the given space.
1119
+*/
1120
+int mspace_trim(mspace msp, size_t pad);
1121
+
1122
+/*
1123
+  An alias for mallopt.
1124
+*/
1125
+int mspace_mallopt(int, int);
1126
+
1127
+#endif /* MSPACES */
1128
+
1129
+#ifdef __cplusplus
1130
+};  /* end of extern "C" */
1131
+#endif /* __cplusplus */
1132
+
1133
+/*
1134
+  ========================================================================
1135
+  To make a fully customizable malloc.h header file, cut everything
1136
+  above this line, put into file malloc.h, edit to suit, and #include it
1137
+  on the next line, as well as in programs that use this malloc.
1138
+  ========================================================================
1139
+*/
1140
+
1141
+/* #include "malloc.h" */
1142
+
1143
+/*------------------------------ internal #includes ---------------------- */
1144
+
1145
+#ifdef WIN32
1146
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
1147
+#endif /* WIN32 */
1148
+
1149
+#include <stdio.h>       /* for printing in malloc_stats */
1150
+
1151
+#ifndef LACKS_ERRNO_H
1152
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
1153
+#endif /* LACKS_ERRNO_H */
1154
+#if FOOTERS
1155
+#include <time.h>        /* for magic initialization */
1156
+#endif /* FOOTERS */
1157
+#ifndef LACKS_STDLIB_H
1158
+#include <stdlib.h>      /* for abort() */
1159
+#endif /* LACKS_STDLIB_H */
1160
+#ifdef DEBUG
1161
+#if ABORT_ON_ASSERT_FAILURE
1162
+#define assert(x) if(!(x)) ABORT
1163
+#else /* ABORT_ON_ASSERT_FAILURE */
1164
+#include <assert.h>
1165
+#endif /* ABORT_ON_ASSERT_FAILURE */
1166
+#else  /* DEBUG */
1167
+#define assert(x)
1168
+#endif /* DEBUG */
1169
+#ifndef LACKS_STRING_H
1170
+#include <string.h>      /* for memset etc */
1171
+#endif  /* LACKS_STRING_H */
1172
+#if USE_BUILTIN_FFS
1173
+#ifndef LACKS_STRINGS_H
1174
+#include <strings.h>     /* for ffs */
1175
+#endif /* LACKS_STRINGS_H */
1176
+#endif /* USE_BUILTIN_FFS */
1177
+#if HAVE_MMAP
1178
+#ifndef LACKS_SYS_MMAN_H
1179
+#include <sys/mman.h>    /* for mmap */
1180
+#endif /* LACKS_SYS_MMAN_H */
1181
+#ifndef LACKS_FCNTL_H
1182
+#include <fcntl.h>
1183
+#endif /* LACKS_FCNTL_H */
1184
+#endif /* HAVE_MMAP */
1185
+#if HAVE_MORECORE
1186
+#ifndef LACKS_UNISTD_H
1187
+#include <unistd.h>     /* for sbrk */
1188
+#else /* LACKS_UNISTD_H */
1189
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
1190
+extern void*     sbrk(ptrdiff_t);
1191
+#endif /* FreeBSD etc */
1192
+#endif /* LACKS_UNISTD_H */
1193
+#endif /* HAVE_MMAP */
1194
+
1195
+#ifndef WIN32
1196
+#ifndef malloc_getpagesize
1197
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
1198
+#    ifndef _SC_PAGE_SIZE
1199
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
1200
+#    endif
1201
+#  endif
1202
+#  ifdef _SC_PAGE_SIZE
1203
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
1204
+#  else
1205
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
1206
+       extern size_t getpagesize();
1207
+#      define malloc_getpagesize getpagesize()
1208
+#    else
1209
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
1210
+#        define malloc_getpagesize getpagesize()
1211
+#      else
1212
+#        ifndef LACKS_SYS_PARAM_H
1213
+#          include <sys/param.h>
1214
+#        endif
1215
+#        ifdef EXEC_PAGESIZE
1216
+#          define malloc_getpagesize EXEC_PAGESIZE
1217
+#        else
1218
+#          ifdef NBPG
1219
+#            ifndef CLSIZE
1220
+#              define malloc_getpagesize NBPG
1221
+#            else
1222
+#              define malloc_getpagesize (NBPG * CLSIZE)
1223
+#            endif
1224
+#          else
1225
+#            ifdef NBPC
1226
+#              define malloc_getpagesize NBPC
1227
+#            else
1228
+#              ifdef PAGESIZE
1229
+#                define malloc_getpagesize PAGESIZE
1230
+#              else /* just guess */
1231
+#                define malloc_getpagesize ((size_t)4096U)
1232
+#              endif
1233
+#            endif
1234
+#          endif
1235
+#        endif
1236
+#      endif
1237
+#    endif
1238
+#  endif
1239
+#endif
1240
+#endif
1241
+
1242
+/* ------------------- size_t and alignment properties -------------------- */
1243
+
1244
+/* The byte and bit size of a size_t */
1245
+#define SIZE_T_SIZE         (sizeof(size_t))
1246
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
1247
+
1248
+/* Some constants coerced to size_t */
1249
+/* Annoying but necessary to avoid errors on some plaftorms */
1250
+#define SIZE_T_ZERO         ((size_t)0)
1251
+#define SIZE_T_ONE          ((size_t)1)
1252
+#define SIZE_T_TWO          ((size_t)2)
1253
+#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
1254
+#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
1255
+#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
1256
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
1257
+
1258
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
1259
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
1260
+
1261
+/* True if address a has acceptable alignment */
1262
+#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
1263
+
1264
+/* the number of bytes to offset an address to align it */
1265
+#define align_offset(A)\
1266
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
1267
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
1268
+
1269
+/* -------------------------- MMAP preliminaries ------------------------- */
1270
+
1271
+/*
1272
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
1273
+   checks to fail so compiler optimizer can delete code rather than
1274
+   using so many "#if"s.
1275
+*/
1276
+
1277
+
1278
+/* MORECORE and MMAP must return MFAIL on failure */
1279
+#define MFAIL                ((void*)(MAX_SIZE_T))
1280
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
1281
+
1282
+#if !HAVE_MMAP
1283
+#define IS_MMAPPED_BIT       (SIZE_T_ZERO)
1284
+#define USE_MMAP_BIT         (SIZE_T_ZERO)
1285
+#define CALL_MMAP(s)         MFAIL
1286
+#define CALL_MUNMAP(a, s)    (-1)
1287
+#define DIRECT_MMAP(s)       MFAIL
1288
+
1289
+#else /* HAVE_MMAP */
1290
+#define IS_MMAPPED_BIT       (SIZE_T_ONE)
1291
+#define USE_MMAP_BIT         (SIZE_T_ONE)
1292
+
1293
+#ifndef WIN32
1294
+#define CALL_MUNMAP(a, s)    munmap((a), (s))
1295
+#define MMAP_PROT            (PROT_READ|PROT_WRITE)
1296
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
1297
+#define MAP_ANONYMOUS        MAP_ANON
1298
+#endif /* MAP_ANON */
1299
+#ifdef MAP_ANONYMOUS
1300
+#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
1301
+#define CALL_MMAP(s)         mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
1302
+#else /* MAP_ANONYMOUS */
1303
+/*
1304
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
1305
+   is unlikely to be needed, but is supplied just in case.
1306
+*/
1307
+#define MMAP_FLAGS           (MAP_PRIVATE)
1308
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
1309
+#define CALL_MMAP(s) ((dev_zero_fd < 0) ? \
1310
+           (dev_zero_fd = open("/dev/zero", O_RDWR), \
1311
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
1312
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
1313
+#endif /* MAP_ANONYMOUS */
1314
+
1315
+#define DIRECT_MMAP(s)       CALL_MMAP(s)
1316
+#else /* WIN32 */
1317
+
1318
+/* Win32 MMAP via VirtualAlloc */
1319
+static void* win32mmap(size_t size) {
1320
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
1321
+  return (ptr != 0)? ptr: MFAIL;
1322
+}
1323
+
1324
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
1325
+static void* win32direct_mmap(size_t size) {
1326
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
1327
+                           PAGE_READWRITE);
1328
+  return (ptr != 0)? ptr: MFAIL;
1329
+}
1330
+
1331
+/* This function supports releasing coalesed segments */
1332
+static int win32munmap(void* ptr, size_t size) {
1333
+  MEMORY_BASIC_INFORMATION minfo;
1334
+  char* cptr = ptr;
1335
+  while (size) {
1336
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
1337
+      return -1;
1338
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
1339
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
1340
+      return -1;
1341
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
1342
+      return -1;
1343
+    cptr += minfo.RegionSize;
1344
+    size -= minfo.RegionSize;
1345
+  }
1346
+  return 0;
1347
+}
1348
+
1349
+#define CALL_MMAP(s)         win32mmap(s)
1350
+#define CALL_MUNMAP(a, s)    win32munmap((a), (s))
1351
+#define DIRECT_MMAP(s)       win32direct_mmap(s)
1352
+#endif /* WIN32 */
1353
+#endif /* HAVE_MMAP */
1354
+
1355
+#if HAVE_MMAP && HAVE_MREMAP